-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathtest_full_seq.py
126 lines (100 loc) · 4.74 KB
/
test_full_seq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import shutil
import pandas as pd
from deeplens.dataflow.agg import count, counts
from deeplens.full_manager.condition import Condition
from deeplens.full_manager.full_manager import FullStorageManager
from deeplens.full_manager.full_video_processing import CropSplitter
from deeplens.constants import *
from deeplens.simple_manager.manager import SimpleStorageManager
from deeplens.struct import VideoStream
from deeplens.tracking.contour import KeyPoints
from experiments.environ import logrecord
from timeit import default_timer as timer
#loads directly from the mp4 file
def runNaive(src, cleanUp = False):
if cleanUp:
if os.path.exists('./videos_naive'):
shutil.rmtree('./videos_naive')
c = VideoStream(src)
pipelines = c[KeyPoints()]
result = count(pipelines, ['one'], stats=True)
logrecord('naive',({'file': src}), 'get', str(result), 's')
def runSimple(src, cleanUp = False):
if cleanUp:
if os.path.exists('./videos_simple'):
shutil.rmtree('./videos_simple')
manager = SimpleStorageManager('videos_simple')
now = timer()
manager.put(src, os.path.basename(src), args={'encoding': XVID, 'size': -1, 'sample': 1.0, 'offset': 0, 'limit': -1, 'batch_size': 30})
put_time = timer() - now
logrecord('simple', ({'file': src}), 'put', str({'elapsed': put_time}), 's')
clips = manager.get(os.path.basename(src), lambda f: True)
pipelines = []
for c in clips:
pipelines.append(c[KeyPoints()])
result = counts(pipelines, ['one'], stats=True)
logrecord('simple', ({'file': src}), 'get', str(result), 's')
def runFull(src, cleanUp = False):
if cleanUp:
if os.path.exists('./videos_full'):
shutil.rmtree('./videos_full')
manager = FullStorageManager(None, CropSplitter(), 'videos_full')
now = timer()
manager.put(src, os.path.basename(src), parallel = True, args={'encoding': XVID, 'size': -1, 'sample': 1.0, 'offset': 0, 'limit': -1, 'batch_size': 30, 'num_processes': os.cpu_count()})
put_time = timer() - now
logrecord('full', ({'file': src}), 'put', str({'elapsed': put_time}), 's')
# Don't call get() for now
# clips = manager.get(os.path.basename(src), Condition())
# pipelines = []
# for c in clips:
# pipelines.append(c[KeyPoints()])
# result = counts(pipelines, ['one'], stats=True)
# logrecord('full', ({'file': src}), 'get', str(result), 's')
def runFullSequential(src, cleanUp = False):
if cleanUp:
if os.path.exists('./videos_full'):
shutil.rmtree('./videos_full')
manager = FullStorageManager(None, CropSplitter(), 'videos_full')
now = timer()
manager.put(src, os.path.basename(src), parallel = False, args={'encoding': XVID, 'size': -1, 'sample': 1.0, 'offset': 0, 'limit': -1, 'batch_size': 30, 'num_processes': os.cpu_count()})
put_time = timer() - now
logrecord('full', ({'file': src}), 'put', str({'elapsed': put_time}), 's')
clips = manager.get(os.path.basename(src), Condition())
pipelines = []
for c in clips:
pipelines.append(c[KeyPoints()])
result = counts(pipelines, ['one'], stats=True)
logrecord('full', ({'file': src}), 'get', str(result), 's')
def runFullPutMany(src_list, cleanUp = False):
if cleanUp:
if os.path.exists('./videos_full'):
shutil.rmtree('./videos_full')
manager = FullStorageManager(None, CropSplitter(), 'videos_full')
now = timer()
targets = [os.path.basename(src) for src in src_list]
logs = manager.put_many(src_list, targets, log = True, args={'encoding': XVID, 'size': -1, 'sample': 1.0, 'offset': 0, 'limit': -1, 'batch_size': 30, 'num_processes': os.cpu_count()})
put_time = timer() - now
logrecord('full', ({'file': src_list}), 'put', str({'elapsed': put_time}), 's')
for i, log in enumerate(logs):
logrecord('fullMany', i, 'put', str({'elapsed': log}), 's')
# Don't call get() for now
# for src in src_list:
# clips = manager.get(os.path.basename(src), Condition())
# pipelines = []
# for c in clips:
# pipelines.append(c[KeyPoints()])
# result = counts(pipelines, ['one'], stats=True)
# logrecord('full', ({'file': src}), 'get', str(result), 's')
df = pd.read_csv('./deeplens/media/train/processed_yt_bb_detection_train.csv', sep=',',
dtype={'youtube_id': str})
youtube_ids=df['youtube_id']
youtube_ids2=list(dict.fromkeys(youtube_ids))
total_start = timer()
for item in youtube_ids2:
try:
video_path="./deeplens/media/train/"+item+".mp4"
runFullSequential(video_path, cleanUp=False)
except:
print("missing file for full", item)
print("Total time for full without parallelism within a video (cleanUp = False):", timer() - total_start)