-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnip-import.py
61 lines (52 loc) · 1.8 KB
/
snip-import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import unicodecsv
import sys
import stdnum.issn
import psycopg2
from contextlib import closing
import re
from decimal import Decimal
csv = unicodecsv.DictReader(sys.stdin, encoding='UTF-8')
def normalize_val(val):
val = val.strip()
if val == '':
return None
return val
def normalize_issn(val, typ):
val = normalize_val(val)
if val == None:
return []
issn2 = [stdnum.issn.compact(x).zfill(8) for x in re.split(r'[,;\s]+', val)]
issn = []
for val in issn2:
if (val, typ) not in issn:
issn.append((val, typ))
try:
stdnum.issn.validate(val)
except:
print 'Warning, invalid ISSN: {}'.format(val)
return issn
years = range(2008, 2013)
with closing(psycopg2.connect(sys.argv[1])) as conn:
with closing(conn.cursor()) as cursor:
for line in csv:
title = normalize_val(line['Source Title'])
issns = []
issns.extend(normalize_issn(line['Print-ISSN'], 'P'))
issns.extend(normalize_issn(line['E-ISSN'], 'E'))
country = normalize_val(line['Country'])
snip = {}
for year in years:
snip[year] = normalize_val(line['{} SNIP'.format(year)])
snip[year] = Decimal(snip[year]) if snip[year] != None else None
if len(issns) == 0 and snip[2012] is None:
continue
vals = [title, country]
for year in years:
vals.append(snip[year])
cursor.execute('INSERT INTO impact_factors (source_title, country, ' + ', '.join('snip_{}'.format(x) for x in years) + ') VALUES (%s, %s' + ', %s' * len(years) + ') RETURNING id', vals)
if_id = cursor.fetchone()[0]
for issn, typ in issns:
cursor.execute('INSERT INTO impact_factors_issn (impact_factors_id, issn, typ) VALUES (%s, %s, %s)', (if_id, issn, typ))
conn.commit()