Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python 3 compatibility #86

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
container:
image: python:2.7.18-buster
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10.13'
- name: Install requirements
run: pip install flake8 pycodestyle
- name: Check syntax
Expand All @@ -16,7 +18,7 @@ jobs:
needs: lint
strategy:
matrix:
ckan-version: [2.8]
ckan-version: [2.8, 2.9, "2.9-py2", "2.10"]
fail-fast: false

name: CKAN ${{ matrix.ckan-version }}
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ the CKAN config file, comma separated:

ckanext.dcat_ch_rdf_harvester.test_env_urls = https://test.example.com,https://staging.example.com

The Swiss DCAT Harvester inherits all configuration options from the DCAT RDF harvester.
The Swiss DCAT Harvester inherits all configuration options from the [DCAT RDF harvester](https://github.com/ckan/ckanext-dcat#rdf-dcat-harvester).
It has the following additional configuration options:

Exclude datasets from import: this will prevent the import of datasets with certain identifiers.
Expand Down
28 changes: 14 additions & 14 deletions ckanext/dcatapchharvest/dcat_helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import iribaker
import json
import os
from urlparse import urlparse
from urllib.parse import urlparse
from ckantoolkit import config
from rdflib import URIRef, Graph
from rdflib.namespace import Namespace, RDF, SKOS
Expand Down Expand Up @@ -85,7 +85,7 @@ def dataset_uri(dataset_dict, dataset_ref=None):
to the production site. In that case, the dataset uris will contain the
url of the test environment, so we have to replace it with the prod one.
"""
uri = (unicode(dataset_ref)
uri = (str(dataset_ref)
if isinstance(dataset_ref, URIRef)
else '')
if not uri:
Expand All @@ -110,7 +110,7 @@ def dataset_uri(dataset_dict, dataset_ref=None):

def get_permalink(identifier):
site_url = config.get('ckan.site_url')
return u'{0}/perma/{1}'.format(site_url, identifier)
return '{0}/perma/{1}'.format(site_url, identifier)


def resource_uri(resource_dict, distribution=None):
Expand All @@ -127,7 +127,7 @@ def resource_uri(resource_dict, distribution=None):
resource haven't been saved. This is all right as it will be generated
when the dataset is output in RDF format.
"""
uri = (unicode(distribution)
uri = (str(distribution)
if isinstance(distribution, URIRef)
else '')
if not uri:
Expand All @@ -154,7 +154,7 @@ def resource_uri(resource_dict, distribution=None):
def get_frequency_values():
g = Graph()
frequency_mapping = {}
for prefix, namespace in frequency_namespaces.items():
for prefix, namespace in list(frequency_namespaces.items()):
g.bind(prefix, namespace)
file = os.path.join(__location__, 'frequency.ttl')
g.parse(file, format='turtle')
Expand All @@ -169,24 +169,24 @@ def get_frequency_values():

def get_license_uri_by_name(vocabulary_name):
license_vocabulary = get_license_values()
for key, value in license_vocabulary.items():
if unicode(vocabulary_name) == unicode(value):
for key, value in list(license_vocabulary.items()):
if str(vocabulary_name) == str(value):
return key
return None


def get_license_name_by_uri(vocabulary_uri):
license_vocabulary = get_license_values()
for key, value in license_vocabulary.items():
if unicode(vocabulary_uri) == unicode(key):
return unicode(value)
for key, value in list(license_vocabulary.items()):
if str(vocabulary_uri) == str(key):
return str(value)
return None


def get_license_values():
g = Graph()
license_mapping = {}
for prefix, namespace in license_namespaces.items():
for prefix, namespace in list(license_namespaces.items()):
g.bind(prefix, namespace)
file = os.path.join(__location__, 'license.ttl')
g.parse(file, format='turtle')
Expand All @@ -204,7 +204,7 @@ def get_license_values():
def get_theme_mapping():
g = Graph()
theme_mapping = {}
for prefix, namespace in theme_namespaces.items():
for prefix, namespace in list(theme_namespaces.items()):
g.bind(prefix, namespace)
file = os.path.join(__location__, 'themes.ttl')
g.parse(file, format='turtle')
Expand Down Expand Up @@ -232,13 +232,13 @@ def get_pagination(catalog_graph):
]
for key, ref in items:
for obj in catalog_graph.objects(pagination_node, ref):
pagination[key] = unicode(obj)
pagination[key] = str(obj)
return pagination


def get_format_values():
g = Graph()
for prefix, namespace in format_namespaces.items():
for prefix, namespace in list(format_namespaces.items()):
g.bind(prefix, namespace)
file = os.path.join(__location__, 'formats.xml')
g.parse(file, format='xml')
Expand Down
2 changes: 1 addition & 1 deletion ckanext/dcatapchharvest/harvest_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def map_resources_to_ids(pkg_dict, package_id):
{r['id']: _get_resource_id_string(r) for r in existing_resources}
for resource in pkg_dict.get('resources'):
resource_id_dict = _get_resource_id_string(resource)
id_to_reuse = [k for k, v in existing_resources_mapping.items()
id_to_reuse = [k for k, v in list(existing_resources_mapping.items())
if v == resource_id_dict]
if id_to_reuse:
id_to_reuse = id_to_reuse[0]
Expand Down
4 changes: 2 additions & 2 deletions ckanext/dcatapchharvest/harvesters.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def validate_config(self, source_config):
if not isinstance(excluded_dataset_identifiers, list):
raise ValueError('excluded_dataset_identifiers must be '
'a list of strings')
if not all(isinstance(item, basestring)
if not all(isinstance(item, str)
for item in excluded_dataset_identifiers):
raise ValueError('excluded_dataset_identifiers must be '
'a list of strings')
Expand All @@ -52,7 +52,7 @@ def validate_config(self, source_config):
if not isinstance(excluded_rights, list):
raise ValueError('excluded_rights must be '
'a list of strings')
if not all(isinstance(item, basestring)
if not all(isinstance(item, str)
for item in excluded_rights):
raise ValueError('excluded_rights must be '
'a list of strings')
Expand Down
43 changes: 25 additions & 18 deletions ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def _add_multilang_value(self, subject, predicate, dataset_key=None,
multilang_values = dataset_dict.get(dataset_key)
if multilang_values:
try:
for key, values in multilang_values.iteritems():
for key, values in multilang_values.items():
if values:
# the values can be either a multilang-dict or they are
# nested in another iterable (e.g. keywords)
Expand Down Expand Up @@ -140,11 +140,11 @@ def _object_value(self, subject, predicate, multilang=False):
lang_dict = {}
for o in self.g.objects(subject, predicate):
if multilang and o.language:
lang_dict[o.language] = unicode(o)
lang_dict[o.language] = str(o)
elif multilang:
lang_dict[default_lang] = unicode(o)
lang_dict[default_lang] = str(o)
else:
return unicode(o)
return str(o)
if multilang:
# when translation does not exist, create an empty one
for lang in dh.get_langs():
Expand All @@ -160,8 +160,8 @@ def _object_value_and_datatype(self, subject, predicate):
"""
for o in self.g.objects(subject, predicate):
if isinstance(o, Literal):
return unicode(o), o.datatype
return unicode(o), None
return str(o), o.datatype
return str(o), None
return None, None

def _get_publisher_url_from_identifier(self, identifier):
Expand Down Expand Up @@ -272,7 +272,7 @@ def _license_rights_name(self, subject, predicate):
# DCAT-AP CH v1: the license as a literal (should be
# the code for one of the DCAT-AP CH licenses)
if isinstance(node, Literal):
return unicode(node)
return str(node)
if isinstance(node, URIRef):
return dh.get_license_name_by_uri(node)
return None
Expand All @@ -294,7 +294,7 @@ def _keywords(self, subject):

for keyword_node in self.g.objects(subject, DCAT.keyword):
lang = keyword_node.language
keyword = munge_tag(unicode(keyword_node))
keyword = munge_tag(str(keyword_node))
keywords.setdefault(lang, []).append(keyword)

return keywords
Expand Down Expand Up @@ -352,14 +352,14 @@ def _temporals(self, subject):
return temporals

def _clean_datetime(self, datetime_value, data_type):
"""Convert a literal in one of the accepted data types into an isoformat
datetime string.
"""Convert a literal in one of the accepted data types into an
isoformat datetime string.

Accepted types are: xsd:date, xsd:dateTime, xsd:gYear, or
xsd:gYearMonth; or schema:Date or schema:DateTime, for temporals
specified as schema:startDate and schema:endDate.

We only consider the parts of the date that are expected from the given
We only consider parts of the date that are expected from the given
data_type, e.g. the year of an xsd:gYear, even if the month and day
have been included in the datetime_value. If a datetime_value with
data_type of xsd:dateTime or schema:DateTime does not contain time
Expand Down Expand Up @@ -438,7 +438,7 @@ def _clean_end_datetime(self, datetime_value, data_type):
def _get_eu_accrual_periodicity(self, subject):
ogdch_value = self._object_value(subject, DCT.accrualPeriodicity)
ogdch_value = URIRef(ogdch_value)
for key, value in valid_frequencies.items():
for key, value in list(valid_frequencies.items()):
if ogdch_value == value:
ogdch_value = key
return ogdch_value
Expand All @@ -461,12 +461,18 @@ def _get_groups(self, subject):
for dcat_theme_url in dcat_theme_urls:
eu_theme_url = None

# Python 2 / 3 compatibility
import sys
if sys.version_info[0] >= 3:
unicode = str

# Case 1: We get a deprecated opendata.swiss theme. Replace
# the base url with the dcat-ap.ch base url, so we can
# look it up in the theme mapping.
if dcat_theme_url.startswith(OGD_THEMES_URI):
new_theme_url = dcat_theme_url.replace(
OGD_THEMES_URI, CHTHEMES_URI)

eu_theme_url = unicode(
eu_theme_mapping[URIRef(new_theme_url)][0])

Expand Down Expand Up @@ -545,7 +551,7 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
# Tags
keywords = self._object_value_list(dataset_ref, DCAT.keyword) or []
for keyword in keywords:
dataset_dict['tags'].append({'name': munge_tag(unicode(keyword))})
dataset_dict['tags'].append({'name': munge_tag(str(keyword))})

# Keywords
dataset_dict['keywords'] = self._keywords(dataset_ref)
Expand Down Expand Up @@ -726,7 +732,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa

g = self.g

for prefix, namespace in namespaces.iteritems():
for prefix, namespace in namespaces.items():
g.bind(prefix, namespace)

g.add((dataset_ref, RDF.type, DCAT.Dataset))
Expand Down Expand Up @@ -1095,10 +1101,11 @@ def graph_from_catalog(self, catalog_dict, catalog_ref):

def _accrual_periodicity_to_graph(self, dataset_ref, accrual_periodicity):
g = self.g
old_valid_frequencies = filter(
lambda i: i != URIRef(
"http://purl.org/cld/freq/completelyIrregular"),
list(valid_frequencies.values()))
old_valid_frequencies = [
i for i in list(valid_frequencies.values())
if i != URIRef(
"http://purl.org/cld/freq/completelyIrregular")
]
if URIRef(accrual_periodicity) in \
old_valid_frequencies + list(valid_frequencies.keys()):
g.add((
Expand Down
Loading
Loading