-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreward_function.py
92 lines (71 loc) · 2.82 KB
/
reward_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def reward_function(params):
import math
reward_maximum = 1e5
reward_minimum = -1e5
# Read input parameters
track_width = params['track_width']
distance_from_center = params['distance_from_center']
steering_angle = params['steering_angle']
steps = params['steps']
heading = params['heading']
all_wheels_on_track = params['all_wheels_on_track']
progress = params['progress']
speed = params['speed']
# Calculated variables
central_lane = track_width * .25
middle_of_track = track_width / 2
SPEED_THRESHOLD = 1.0
if not all_wheels_on_track:
reward = 1e-3
elif speed < SPEED_THRESHOLD:
reward = 0.5
else:
reward = 1.0
steering = abs(params['steering_angle']) # We don't care whether it is left or righ
reward = 1.0
STEERING_THRESHOLD = 20.0
if steering > STEERING_THRESHOLD:
reward *= 0.8
# Read input variable
steps = params['steps']
progress = params['progress']
# Total num of steps we want the car to finish the lap, it will vary depends on the track length
TOTAL_NUM_STEPS = 300
# Initialize the reward with typical value
reward = 1.0
# Give additional reward if the car pass every 100 steps faster than expected
if (steps % 100) == 0 and progress > (steps / TOTAL_NUM_STEPS) :
reward += 10.0
# Read input variables
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
# Initialize the reward with typical value
reward = 1.0
# Calculate the direction of the center line based on the closest waypoints
next_point = waypoints[closest_waypoints[1]]
prev_point = waypoints[closest_waypoints[0]]
# Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
# Convert to degree
track_direction = math.degrees(track_direction)
# Calculate the difference between the track direction and the heading direction of the car
direction_diff = abs(track_direction - heading)
# Penalize the reward if the difference is too large
DIRECTION_THRESHOLD = 10.0
if direction_diff > DIRECTION_THRESHOLD:
reward *= 0.5
# Calculate 3 markers that are at varying distances away from the center line
marker_1 = 0.1 * track_width
marker_2 = 0.25 * track_width
marker_3 = 0.5 * track_width
# Give higher reward if the car is closer to center line and vice versa
if distance_from_center <= marker_1:
reward = 1.0
elif distance_from_center <= marker_2:
reward = 0.5
elif distance_from_center <= marker_3:
reward = 0.1
else:
reward = 1e-3 # likely crashed/ close to off track
return float(reward)