-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpomdps.jl
322 lines (264 loc) · 10.7 KB
/
pomdps.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
using StaticArrays, LightGraphs, MetaGraphs,
LazySets, Random, POMDPs, Combinatorics,
Parameters, POMDPModelTools, Distributions, StaticArrays
include("ndgrid.jl")
import POMDPs: actions, states, discount, isterminal, generate_s, generate_o, reward
import POMDPModelTools: obs_weight
const Vec2 = SVector{2, Int64}
const AGENT_ACTIONS = [Vec2(i, j) for i in -1:1 for j in -1:1]
const SENSOR_MODEL = Dict(:o1s1 => BoolDistribution(0.9),
:o1s0 => BoolDistribution(0.1))
# Just an alias
const Region = Vector{Vec2}
############################################
######### Environment -related ###########
############################################
struct Environment
size::NTuple{2, Int}
obstacles::Vector{<:Hyperrectangle}
visibility::Vector{Vector{Tuple}}
G::MetaGraph
pos_to_ind::Dict{Vec2, Int64}
R::Int64
function Environment(size::NTuple{2, Int}, obstacles::Vector{<:Hyperrectangle}, R::Int64)
G = create_graph(size...)
remove_obstacles(G, obstacles)
visibility = limited_visible(G, obstacles, R, inf_visible(G, obstacles))
node_mapping = Dict(get_pos(G, i) => i for i in 1:nv(G))
return new(size, obstacles, visibility, G, node_mapping, R)
end
end
Base.size(env::Environment) = env.size
graph(env::Environment) = env.G
obstacles(env::Environment) = env.obstacles
visibility(env::Environment) = env.visibility
visibility(env::Environment, ind::Int64) = env.visibility[ind]
visibility(env::Environment, pos::Vec2) = env.visibility[pos_to_ind(env, pos)]
pos_to_ind(env::Environment, pos::Vec2) = env.pos_to_ind[pos]
get_pos(G::MetaGraph, i::Int64) = Vec2(get_pos_tup(G, i))
in_obstacle(env::Environment, x) = in_obstacle(env::Environment, Vector{Float64}(x))
in_obstacle(env::Environment, x::Vector{Float64}) = any(x ∈ o for o in obstacles(env))
Base.rand(G::AbstractGraph, rng::AbstractRNG = Random.GLOBAL_RNG) = rand(1:nv(G))
function rand_pos(G::AbstractGraph, rng::AbstractRNG = Random.GLOBAL_RNG)
i = rand(G, rng)
Vec2(get_pos_tup(G, i))
end
rand_pos(G::AbstractGraph) = rand_pos(G, Random.GLOBAL_RNG)
function rand_pos(G::AbstractGraph, n::Integer, rng::AbstractRNG = Random.GLOBAL_RNG; replace = false)
points = Set{Vec2}()
while length(points) < n
push!(points, rand_pos(G, rng))
end
return collect(points)
end
####################################################
#################### S, A, O #######################
####################################################
struct Agent
pos::Vec2
partition::Region
end
partition(ag::Agent) = ag.partition # not sure how much this will be used.
pos(ag::Agent) = ag.pos
Agent(ag::Agent, pos::Vec2) = Agent(pos, ag.partition)
Base.:+(ag::Agent, a::Vec2) = Agent(ag, ag.pos+a)
isvalid_position(s::Agent, pos::Vec2) = pos ∈ partition(s) && !in_obstacle(env(p), pos)
isvalid_position(pos::Vec2) = !in_obstacle(env(p), pos)
struct SNState
patrollers::Vector{Agent}
adversaries::Vector{Vec2}
end
struct Alarm
guess::Vec2
end
SNAction = Union{Vector{Vec2}, Alarm}
SNObservation = Vector{Pair{Vec2, Bool}}
agents(s::SNState) = (s.patrollers, s.adversaries)
patrollers(s::SNState) = s.patrollers
adversaries(s::SNState) = s.adversaries
# occupied_probability(o::SNObservation, pos::Vec2) = o.occupied[pos]
####################################################
################ Adversary related #################
####################################################
# need to define these before the pomdp type, even
# though their implementations only happen later on
abstract type AdversaryModel end
struct RandomActions <: AdversaryModel end
struct MaximizeDistance <: AdversaryModel end
struct MaximizeSafety <: AdversaryModel end
# struct Omniscient{APB<:AbstractParticleBelief} <: AdversaryModel
# belief::APB
# end
# There was another one wasn't there?
####################################################
############# POMDP type + functions ###############
####################################################
"""
S :: vector of agents and pos of Adversary/s
A :: a rearrangement of the agents or an Alarm
O :: A boolean for each node being observed
R ::
seeing Adversary (closer is better),
minimize uncertainty,
maximize overall overage,
maximize agent utilization
"""
@with_kw mutable struct SkyNet <: POMDP{SNState, SNAction, SNObservation}
size::NTuple{2, Int64} = (20, 20)
R::Int64 = 3
n_agents::Int8 = 5
discount_factor::Float64 = 0.9
env::Environment = begin
obs = [Hyperrectangle(low=[3, 3], high=[6, 8]),
Hyperrectangle(low=[4, 9], high=[8, 10]),
Hyperrectangle(low=[15,15], high=[16,19]),
Hyperrectangle(low=[16,3], high=[18,10]),
Hyperrectangle(low=[3,16], high=[10,18])]
Environment(size, obs, R)
end
partition_points::Vector{Vec2} = [CartesianIndex((2, 3)),
CartesianIndex((9, 2)),
CartesianIndex((5,13)),
CartesianIndex((14,11)),
CartesianIndex((13,7))]
# rand_pos(graph(env), n_agents) # maybe don't need
partitions::Vector{Region} = begin
voronoi_regions = voronoi_cells(graph(env), obstacles(env), partition_points)
Region.(voronoi_regions)
end
adversarial_model = RandomActions()
# max_adversaries::Int8 = 1 # maybe don't need
# n_adversaries::Int8 = 1 # maybe don't need
# intrusion_prob::Float64 = 1.0 # maybe don't need
# sensor_distr::Truncated{Normal{Float64},Continuous} = Truncated(Normal(0, -R, R))
end
environment(p::SkyNet) = p.env
env(p::SkyNet) = p.env
graph(p::SkyNet) = p.env.G
obstacles(p::SkyNet) = p.env.obstacles
adversarial_model(p::SkyNet) = p.adversarial_model
discount(p::SkyNet) = p.discount_factor
"""
isterminal checks if the nodes are outside the environment,
which is used as a flag for being terminal. Should only happen
if they were intentionally set that way at the end of generate_s,
if they chose to sound the alarm.
"""
isterminal(p::SkyNet, s::SNState) = any(pat->pos(pat) > Vec2(size(env(p))), patrollers(s))
####################################################
############### More Adversary stuff ###############
####################################################
heuristic_update(p, s, rng) = heuristic_update(adversarial_model(p), p, s, rng)
function heuristic_update(::MaximizeDistance, p::SkyNet, s::SNState, rng)
end
function heuristic_update(::RandomActions, p::SkyNet, s::SNState, rng)
G = graph(p)
newadv = similar(adversaries(s))
for (i, adv) in enumerate(adversaries(s))
adv2 = adv + rand(rng, AGENT_ACTIONS)
if all(adv2 .> 0) && all(adv2 .<= Vec2(size(G))) && !in_obstacle(env(p), adv2)
newadv[i] = adv2
else
newadv[i] = adv
end
end
return newadv
end
function actions(p::SkyNet, s::SNState)
agentwise_action_space = actions.([p], patrollers(s))
moves = tuple.(vec.(ndgrid(agentwise_action_space...))...)
alarms = alarm_actions(p, s)
A = SNAction[collect.(moves); alarms]
println("A size: $(length(A))")
println("$(length(moves)) moves")
println("$(length(alarms)) alarms")
A
end
function actions(p::SkyNet, s::Agent)
x0 = pos(s)
A = filter(dir->isvalid_position(s, x0+dir), AGENT_ACTIONS)
end
function alarm_actions(p::SkyNet, s::SNState)
env = environment(p)
# can only ring an alarm of a cell you can see
agent_inds = pos_to_ind.([env], pos.(patrollers(s))) # hacky broadcast
visible_cells = vcat(visibility.([env], vcat(agent_inds...))...)
Alarm.(Set(visible_cells))
end
function generate_s(p::SkyNet, s::SNState, a::SNAction, rng::AbstractRNG)
# @show a
if isterminal(p, s)
error("State is terminal in generate_s. Why is this happening? \ns = $s, \n a = $a")
# return s
end
if a isa Alarm
out_of_bounds_pats = patrollers(s) .+ [Vec2(size(env(p)))]
return SNState(out_of_bounds_pats, adversaries(s))
end
pats, advs = agents(s)
updated_pats = pats .+ a
# update adversary's position somehow:
new_adv = heuristic_update(p, s, rng)
return SNState(updated_pats, new_adv)
end
function obs_weight(p::SkyNet, s::SNState, a::SNAction, sp::SNState, o::SNObservation)
advs = adversaries(s)
prob = 1.0
# if a isa Alarm
# return 1.0 * (a.guess ∈ advs)
# return 0.5
# end
for oᵢ in o
pos, val = oᵢ
isin = pos ∈ advs ? :o1s1 : :o1s0
p_accurate = pdf(SENSOR_MODEL[isin], val)
prob *= p_accurate
end
# @show prob
return prob
end
function generate_o(p::SkyNet, s::SNState, a::SNAction, sp::SNState, rng::AbstractRNG)
# println(pos.(patrollers))
# println("adv: ", adversaries)
# println(a)
o = Vector{Pair{Vec2, Bool}}()
if a isa Alarm
# println(a)
return o
# sp = s
end
patrollers, adversaries = agents(sp)
for pat in patrollers
visibles = visibility(env(p), pos(pat))
for v in Vec2.(visibles)
positive_rate = v ∈ adversaries ? :o1s1 : :o1s0
detected = rand(rng, SENSOR_MODEL[positive_rate])
push!(o, v=>detected)
end
end
return o
end
function reward(p::SkyNet, s::SNState, a::SNAction, sp::SNState)
# subtract some points for each adversary in the scene.
# @show typeof(a)
r = -20.0 * length(adversaries(s))
if a isa Alarm
# Alarms are very costly.
r -= 10000.0
dists = norm.([a.guess] .- adversaries(s))
# @show dists
if any(dists .== 0)
r += 600.0
elseif any(dists .<= 1.415) # √2 is diagonal distance
r += 300.0
end
else
travelled = norm.(a) # gives, 0, 1, or √2 depeding on direction of travel
r -= 10*sum(travelled)
# @show r
# old_distance_to_adversary = norm.(pos.(patrollers(s)) .- adversaries(s))
# new_distance_to_adversary = norm.(pos.(patrollers(sp)) .- adversaries(sp))
# r += 200*maximum(new_distance_to_adversary .- old_distance_to_adversary)
end
return r
end