-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathload_data.rb
54 lines (41 loc) · 1.27 KB
/
load_data.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
require 'bundler/setup'
require('riak')
require('riak_crdts')
require('./riak_hosts')
require('./models/zombie')
def load_data(filename)
logname = "load_progress.txt"
client = RiakHosts.new.get_riak_connection
zip3 = RiakCrdts::InvertedIndex.new(client, 'zip3_inv')
log = File.open(logname, "a+")
target_i = -1
if log.size > 0
target_i = `tail -n 1 #{logname}`.split(",")[1].to_i
end
File.open(filename) do |file|
file.each_with_index do |line, i|
next if i <= target_i
fields = line.strip().split(",")
zombie = Zombie.new(client)
zombie.from_array(fields)
zip = zombie.data[:zip]
zip_3 = zip[0, 3]
geohash = zombie.geohash(4)
zip3.put_index(zip_3, zip)
zombie.add_index('zip_bin', zip)
zombie.add_index('zip_inv', zip)
zombie.add_index('geohash_inv', geohash)
zombie.save
# Retrieve indexes periodically to keep unmerged size smallish
if i % 20 == 0
zombie.search_index('zip_inv', zip)
zombie.search_index('geohash_inv', geohash)
zip3.get_index(zip_3)
end
log.write(Time.now.to_s + "," + i.to_s + "," + ((i / 1000000.0) * 100.0).round(3).to_s + "%\n")
end
end
log.close unless log == nil
end
filename = ARGV[0]
load_data(filename)