-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
28 lines (20 loc) · 786 Bytes
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from simhash import Simhash, SimhashIndex
print '%x' % Simhash('How are you? I am fine. Thanks.').value
print '%x' % Simhash('How are u? I am fine. Thanks.').value
print '%x' % Simhash('How r you?I am fine. Thanks.').value
print '-'*80
print Simhash('aa').distance(Simhash('bb'))
print Simhash('aa').distance(Simhash('aa'))
print '-'*80
data = {
1: u'How are you? I Am fine. blar blar blar blar blar Thanks.',
2: u'How are you i am fine. blar blar blar blar blar than',
3: u'This is simhash test.',
}
objs = [(str(k), Simhash(v)) for k, v in data.items()]
index = SimhashIndex(objs)
print index.bucket_size()
s1 = Simhash(u'How are you i am fine. blar blar blar blar blar thank')
print index.get_near_dups(s1)
index.add('4', s1)
print index.get_near_dups(s1)