-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathduplicates.py
67 lines (51 loc) · 2.04 KB
/
duplicates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
# * coding: utf8 *
'''
duplicates.py
A module that removes duplicate geometry or attributes or both
'''
import arcpy
import re
import os
from xxhash import xxh64
class DuplicateTest(object):
def __init__(self, workspace, table_name):
self.report = {}
self.workspace = workspace
self.table_name = table_name
def sweep(self):
digest_dict = {}
dig_trim = re.compile(r'(\d+\.\d{2})(\d+)')
arcpy.env.workspace = self.workspace
fields = [f.name for f in arcpy.ListFields(self.table_name) if f.type not in ['OID', 'GlobalID', 'Geometry']]
fields.append('SHAPE@WKT')
fields.append('OID@')
with arcpy.da.SearchCursor(self.table_name, fields) as search_cursor:
for row in search_cursor:
shp = row[-2]
if shp != None:
coord_trim = dig_trim.sub(r'\1', shp)
hash = xxh64(f'{row[:-2]} {coord_trim}')
digest = hash.hexdigest()
if digest not in digest_dict:
digest_dict.setdefault(digest)
else:
self.report[row[-1]] = 'duplicate feature'
else:
self.report[row[-1]] = 'empty geometry'
arcpy.ClearEnvironment('workspace')
return self.report
def try_fix(self):
arcpy.env.workspace = self.workspace
if len(self.report) > 0:
try:
sql = f"\"OBJECTID\" IN ({', '.join(str(d) for d in self.report)})"
duplicate_FL = arcpy.management.MakeFeatureLayer(self.table_name, 'duplicate_FL', sql)
print(f'Deleted {len(self.report)} duplicate records')
arcpy.DeleteFeatures_management(duplicate_FL)
except:
print('unable to delete features')
arcpy.ClearEnvironment('workspace')
if __name__ == '__main__':
fc = r'C:\ZBECK\MISC_WORK\CensusCountReview\ReturnedErrors\errors.gdb\duplicateErrors'
error = sw