-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathremdup.py
executable file
·93 lines (83 loc) · 2.72 KB
/
remdup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
#
#+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!
# #
# remdup.py #
# #
#+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!+!
#
# Author: Pat Prodanovic, Ph.D., P.Eng.
#
# Date: July 23, 2015
#
# Updated: Feb 21, 2016
# Made it work under python 2 or 3
#
# Purpose: Script takes in a *.csv of the nodes, and removes duplicates
# using OrderedDict from collections.
#
# Uses: Python 2 or 3, Numpy
#
# Example:
#
# python remdup.py -i nodes.csv -o nodes_remdup.csv
# where:
# -i input nodes file
# -o output nodes file where duplicates are removed
#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Global Imports
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import os,sys
import numpy as np
from collections import OrderedDict
#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# MAIN
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# I/O
if len(sys.argv) != 5 :
print('Wrong number of Arguments, stopping now...')
print('Usage:')
print('python remdup.py -i nodes.csv -o nodes_remdup.csv')
sys.exit()
dummy1 = sys.argv[1]
input_file = sys.argv[2]
dummy2 = sys.argv[3]
output_file = sys.argv[4]
# find out if the nodes file is x,y,z or x,y,x,size
with open(input_file, 'r') as f:
line = next(f) # read 1 line
n_attr = len(line.split(','))
# to create the output file
fout = open(output_file,"w")
# use numpy to read the file
nodes_data = np.loadtxt(input_file, delimiter=',',skiprows=0,unpack=True)
# master nodes in the file (from the input file)
x = nodes_data[0,:]
y = nodes_data[1,:]
z = nodes_data[2,:]
if (n_attr == 4):
size = nodes_data[3,:]
else:
size = np.zeros(len(x))
# crop all the points to three decimals only
x = np.around(x,decimals=3)
y = np.around(y,decimals=3)
z = np.around(z,decimals=3)
n = len(x)
# this piece of code uses OrderedDict to remove duplicate nodes
# source "http://stackoverflow.com/questions/12698987"
# ###################################################################
tmp = OrderedDict()
for point in zip(x, y, z, size):
tmp.setdefault(point[:2], point)
# in python 3 tmp.values() is a view object that needs to be
# converted to a list
mypoints = list(tmp.values())
# ###################################################################
n_rev = len(mypoints)
# prints the nodes that have duplicates removed
for i in range(n_rev):
fout.write(str(mypoints[i][0]) + ',' + str(mypoints[i][1]) + ',' +
str("{:.3f}".format(mypoints[i][2])) + '\n')