-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathtraj_compression.py
88 lines (67 loc) · 2.57 KB
/
traj_compression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import sys
import pandas as pd
from helper import *
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
from haversine import haversine
from tqdm import tqdm
tqdm.pandas()
sys.setrecursionlimit(10_000)
def td_tr(gdf, dthr):
'''
td-tr as described by Meratnia and De by
Input:
gdf : GeoDataFrame with geom column containing Shapely Points
dthr : Distance threshold in Kilometers
Output:
simplified GeoDataFrame
'''
gdf.reset_index(drop=True, inplace=True)
if len(gdf)<=2:
return gdf
else:
start = gdf.iloc[0]
end = gdf.iloc[-1]
de = (end.ts - start.ts)/3600
dlat = end.geom.x - start.geom.x
dlon = end.geom.y - start.geom.y
# distances for each point and the calulated one based on it and start
dists = gdf.apply(lambda rec: dist_from_calced(rec, start, de, dlat, dlon), axis=1)
if dists.max()>dthr:
return pd.concat([td_tr(gdf.iloc[:dists.idxmax()], dthr), td_tr(gdf.iloc[dists.idxmax():], dthr)])
else:
return gdf.iloc[[0,-1]]
def dist_from_calced(rec, start, de, dlat, dlon):
di = (rec.ts - start.ts)/3600
calced = Point(start.geom.x + dlat * di / de, start.geom.y + dlon * di / de)
return rec.geom.distance(calced)*100
def compress(input, threshold, lat='lat', lon='lon', output_filename=None):
df = movin_read(input, check_init=False)
print('read df')
gdf = df_to_gdf(df, lat=lat, lon=lon)
print('DF TO GDF')
spl_gdf = gdf.groupby(['oid', 'tid'], group_keys=False).progress_apply(lambda grp: td_tr(grp, threshold))
if output_filename!= None:
movin_write(spl_gdf, output_filename)
else:
return df
if __name__=='__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input', metavar='FILE', help='Input csv path', type=lambda x: is_valid_csv(parser, x))
parser.add_argument('-o', required=True, dest='outfile', metavar='FILE', help='Output csv path. Overwrites input by default', type=lambda x: is_valid_csv(parser, x))
parser.add_argument('--lat', dest='lat', default='ts', metavar='COLUMN', nargs="?", help='Name of the latitude column')
parser.add_argument('--lon', dest='lon', default='ts', metavar='COLUMN', nargs="?", help='Name of the longitude column')
parser.add_argument('--threshold', dest='threshold', default=2, nargs="?", help='Distance threshold in Km', type=int)
args = parser.parse_args()
print(args)
INPUT = args.input
THRESHOLD = args.threshold
LAT = args.lat
LON = args.lon
if args.outfile != None:
OUTFILE = args.outfile
else:
OUTFILE = INPUT
compress(INPUT, THRESHOLD, lat=LAT, lon=LON, output_filename=OUTFILE)