Skip to content

Commit

Permalink
feat(partsGenerator): this new function will delay the part generation
Browse files Browse the repository at this point in the history
This will help for label deduplication in the API
  • Loading branch information
Joxit committed May 16, 2022
1 parent e60f6b1 commit 12ab8d0
Show file tree
Hide file tree
Showing 17 changed files with 1,265 additions and 131 deletions.
8 changes: 4 additions & 4 deletions builders/JPN-JPN.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function scalarOrFirstElement(param) {

function formatPostalCode(record) {
if (record.postalcode) {
return '〒' + record.postalcode;
return { label: '〒' + record.postalcode, role: 'required', layer: 'postalcode' };
}
}

Expand All @@ -31,7 +31,7 @@ function buildAdminLabelPart(schema, record) {
// detect this case and then return the street value
function formatDistrictPart(record) {
if (scalarOrFirstElement(record.name.default).includes(record.street)) {
return record.street;
return { label: record.street, role: 'required', layer: 'district' };
}
}

Expand All @@ -43,10 +43,10 @@ function formatBlockPart(record) {

const match = bangoRegex.exec(record.housenumber);
if (match) {
return `${match[1]}${match[2]}号`;
return { label: `${match[1]}${match[2]}号`, role: 'required', layer: 'housenumber' };
}

return record.housenumber;
return record.housenumber && { label: record.housenumber, role: 'required', layer: 'housenumber' };
}

function venueName(record) {
Expand Down
7 changes: 4 additions & 3 deletions builders/JPN.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
const _ = require('lodash');
const labelUtils = require('../labelUtils');

function buildPrimaryName(schema, record) {
if (Array.isArray(record.name.default)) {
return record.name.default.slice(0,1);
}

return [record.name.default];
return [{ label: record.name.default, role: 'required', layer: 'name' }];
}

// create a "normalized" version of a Japanese address part
// do this by removing some portions of an admin area that should
// otherwise be ignored for deduplication, like the "prefecture" suffix
function normalizeJapaneseAdmin(input) {
const lower = input.toLowerCase();
const lower = labelUtils.getLabel(input).toLowerCase();
return lower
.replace(/^(.*)-shi$/i, '$1')
.replace(/^(.*)\sprefecture$/i, '$1');
Expand Down Expand Up @@ -52,7 +53,7 @@ function japanBuilder(schema, record) {
labelParts = _.compact(labelParts);

// remove exact duplicates or admin areas will have their name twice
labelParts = _.uniq(labelParts);
labelParts = labelUtils.uniq(labelParts);

return labelParts;
}
Expand Down
10 changes: 6 additions & 4 deletions builders/KOR.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
const _ = require('lodash');
const labelUtils = require('../labelUtils');

function dedupeNameAndLastLabelElement(labelParts) {
// only dedupe if a result has more than a name (the first label part)
if (labelParts.length > 1) {
// first, dedupe the name and second to last label array elements
// this is used to ensure that the `name` and most granular admin hierarchy elements aren't repeated
// eg - `["South Korea", "Seoul", "Seoul"]` -> `["South Korea", "Seoul"]`
const deduped = _.uniq([labelParts.pop(), labelParts.pop()]).reverse();

// second, unshift the deduped parts back onto the labelParts
labelParts.push.apply(labelParts, deduped);
// we take the last part because the layer should be the name and is required
if (labelUtils.getLabel(labelParts[labelParts.length - 1]) === labelUtils.getLabel(labelParts[labelParts.length - 2])) {
const last = labelParts.pop();
labelParts[labelParts.length - 1] = last;
}

}

Expand Down
34 changes: 27 additions & 7 deletions labelGenerator.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
const _ = require('lodash');

const getSchema = require('./getSchema');
const labelUtils = require('./labelUtils');

function dedupeNameAndFirstLabelElement(labelParts) {
// only dedupe if a result has more than a name (the first label part)
if (labelParts.length > 1) {
// first, dedupe the name and 1st label array elements
// this is used to ensure that the `name` and first admin hierarchy elements aren't repeated
// eg - `["Lancaster", "Lancaster", "PA", "United States"]` -> `["Lancaster", "PA", "United States"]`
const deduped = _.uniq([labelParts.shift(), labelParts.shift()]);
// second, unshift the deduped parts back onto the labelParts
labelParts.unshift.apply(labelParts, deduped);
// we take the first part because the layer should be the name and is required
if (labelUtils.getLabel(labelParts[0]) === labelUtils.getLabel(labelParts[1])) {
const first = labelParts.shift();
labelParts[0] = first;
}

}

Expand Down Expand Up @@ -62,12 +65,17 @@ function buildPrefixLabelParts(schema, record) {
return [];
}

const street = [];
if (record.layer === 'venue' && record.street) {
street.push({ label: record.street, role: 'optional', layer: 'street' });
}

// support name aliases
if (Array.isArray(record.name.default)) {
return record.name.default.slice(0,1);
return _.concat({ label: record.name.default.slice(0, 1), role: 'required', layer: 'name' }, street);
}

return [record.name.default];
return _.concat({ label: record.name.default, role: 'required', layer: 'name' }, street);

}

Expand Down Expand Up @@ -100,12 +108,24 @@ function defaultBuilder(schema, record) {
return dedupeNameAndFirstLabelElement(labelParts);
}

module.exports = function( record, language ){
function generator( record, language ) {
const schema = getSchema(record, language);
const separator = _.get(schema, ['meta','separator'], ', ');
const builder = _.get(schema, ['meta', 'builder'], defaultBuilder);

let labelParts = builder(schema, record);

return _.trim(labelParts.join(separator));
return { labelParts, separator };
}

module.exports = function( record, language ) {
const { labelParts, separator } = generator(record, language);
const label = labelParts
.filter(labelUtils.isRequired)
.map(labelUtils.getLabel)
.join(separator);

return _.trim(label);
};

module.exports.partsGenerator = generator;
82 changes: 48 additions & 34 deletions labelSchema.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ function normalizeString(str){
const FRA_OVERSEAS = ['GF', 'GP', 'MQ', 'RE', 'YT'];

// find the first field of record that has a non-empty value that's not already in labelParts
function getFirstProperty(fields) {
function getFirstProperty(fields, role = 'required') {
return function(record) {
for (var i = 0; i < fields.length; i++) {
var fieldValue = record[fields[i]];

if (!_.isEmpty(fieldValue)) {
return fieldValue[0];
return { label: fieldValue[0], role, layer: fields[i] };
}

}
Expand All @@ -34,29 +34,29 @@ function getFirstProperty(fields) {
// the full state/province name, eg: Pennsylvania, USA and Ontario, CA
// 3. otherwise, the state/province abbreviation should be used, eg: Lancaster, PA, USA and Bruce, ON, CA
// 4. if the abbreviation isn't available, use the full state/province name
function getRegionalValue(record) {
function getRegionalValue(record, role = 'required') {
if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) {
return;
}

if ('region' === record.layer && !_.isEmpty(record.region)) {
// return full state name when state is the most granular piece of info
return record.region[0];
return { label: record.region[0], role, layer: 'region' };

} else if (!_.isEmpty(record.region_a)) {
// otherwise just return the region code when available
return record.region_a[0];
return { label: record.region_a[0], role, layer: 'region' };

} else if (!_.isEmpty(record.region)) {
// return the full name when there's no region code available
return record.region[0];
return { label: record.region[0], role, layer: 'region' };
}
}

// The same as getRegionalValue above, but only returns a region if the region name
// is distinct from the locality/localadmin/city name
// This works best for large cities in countries where the region name/abbr is not _always_ included in the label
function getUniqueRegionalValue(record) {
function getUniqueRegionalValue(record, role = 'required') {
if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) {
return;
}
Expand All @@ -65,10 +65,10 @@ function getUniqueRegionalValue(record) {
if ('region' === record.layer) {
if (!_.isEmpty(record.region)) {
// return full state name when state is the most granular piece of info
return record.region[0];
return { label: record.region[0], role, layer: 'region' };
}
} else {
const localityValue = getFirstProperty(['locality', 'localadmin'])(record);
const localityValue = _.get(getFirstProperty(['locality', 'localadmin'])(record), 'label');

if (record.region && normalizeString(localityValue) === normalizeString(record.region[0])) {
// skip returning anything when the region and locality name are identical
Expand All @@ -78,9 +78,9 @@ function getUniqueRegionalValue(record) {

// prefer the region abbreviation, fall back to the region name if no abbreviation
if (!_.isEmpty(record.region_a)) {
return record.region_a[0];
return { label: record.region_a[0], role, layer: 'region' };
} else if (!_.isEmpty(record.region)) {
return record.region[0];
return { label: record.region[0], role, layer: 'region' };
}
}
}
Expand All @@ -91,20 +91,20 @@ function getUniqueRegionalValue(record) {
// 3. use dependency abbreviation if applicable, eg - San Juan, PR
// 4. use dependency name if no abbreviation, eg - San Juan, Puerto Rico
// 5. use country abbreviation, eg - Lancaster, PA, USA
function getUSADependencyOrCountryValue(record) {
function getUSADependencyOrCountryValue(record, role = 'required') {
if ('dependency' === record.layer && !_.isEmpty(record.dependency)) {
return record.dependency[0];
return { label: record.dependency[0], role, layer: 'dependency' };
} else if ('country' === record.layer && !_.isEmpty(record.country)) {
return record.country[0];
return { label: record.country[0], role, layer: 'country' };
}

if (!_.isEmpty(record.dependency_a)) {
return record.dependency_a[0];
return { label: record.dependency_a[0], role, layer: 'dependency' };
} else if (!_.isEmpty(record.dependency)) {
return record.dependency[0];
return { label: record.dependency[0], role, layer: 'dependency' };
}

return record.country_a[0];
return { label: record.country_a[0], role, layer: 'country' };
}

// this function generates the last field of the labels for FRA records
Expand All @@ -122,10 +122,25 @@ function getFRACountryValue() {
};
}

// this function generates the region field for FRA records.
// 1. use nothing if the record is a in the French overseas or Paris (VP),
// eg - Saint-Denis, Reunion (instead of Saint-Denis, Reunion, Reunion)
// 2. use region name, eg - Bagneux, Hauts-De-Seine, France
// 3. use this with caution, Paris is both a locality and region. This can cause label like `Tour Eiffel, Paris, Paris, France`
function getFRARegionValue() {
const _default = getFirstProperty(['region'], 'optional');
return (record) => {
if (!_.isEmpty(record.region_a) && (_.includes(FRA_OVERSEAS, record.region_a[0]) || record.region_a[0] === 'VP')) {
return undefined;
}
return _default(record);
};
}

function isInNYC(record) {
const _region_a = getFirstProperty(['region_a'])(record);
const _country_a = getFirstProperty(['country_a'])(record);
const _locality_a = getFirstProperty(['locality_a'])(record);
const _region_a = _.get(getFirstProperty(['region_a'])(record), 'label');
const _country_a = _.get(getFirstProperty(['country_a'])(record), 'label');
const _locality_a = _.get(getFirstProperty(['locality_a'])(record), 'label');

return _country_a === 'USA' && _region_a === 'NY' && _locality_a === 'NYC';
}
Expand All @@ -146,26 +161,23 @@ function getUSABoroughValue(record) {
// - The borough is used for the locality in addresses
// - Except in Queens, where ideally the neighbourhood is
// - Also, 'New York' is the proper locality name for Manhattan
function getNYCLocalValue(record) {
const _default = getFirstProperty(['locality', 'localadmin', 'county'])(record);
const _borough = getFirstProperty(['borough'])(record);
const _neighbourhood = getFirstProperty(['neighbourhood'])(record);
function getNYCLocalValue(record, role = 'required') {
const _default = getFirstProperty(['locality', 'localadmin', 'county'], role)(record);
const _borough = getFirstProperty(['borough'], role)(record);
const _neighbourhood = getFirstProperty(['neighbourhood'], role)(record);
// We still want to return "neighborhood, borough, region_a" when a user searches for a neighborhood
// otherwise it looks incomplete, so skip to returning the borough in that case
// Otherwise, in Queens only, use the neighborhood for the city in address labels
if ('neighbourhood' !== record.layer &&
_borough &&
_borough.startsWith('Queens') &&
_neighbourhood
_.get(_borough, 'label', '').startsWith('Queens') &&
_.get(_neighbourhood, 'label')
) {
return _neighbourhood;
} else if (_borough &&
_borough.startsWith('Manhattan')
) {
} else if (_.get(_borough, 'label', '').startsWith('Manhattan')) {
// return 'Manhattan, New York, for Manhattan neighbourhoods
if (record.layer === 'neighbourhood') {
return `${_borough}, ${_default}`;
// return only locality for Manhattan venues/addresses
return { label: `${_borough.label}, ${_default.label}`, role };
// return only locality for Manhattan venues/addresses
} else{
return _default;
}
Expand All @@ -174,8 +186,8 @@ function getNYCLocalValue(record) {
}
}

function getUSALocalValue(record) {
const _default = getFirstProperty(['locality', 'localadmin', 'county'])(record);
function getUSALocalValue(record, role = 'required') {
const _default = getFirstProperty(['locality', 'localadmin', 'county'], role)(record);

// NYC is special for addresses
if (isInNYC(record)) {
Expand Down Expand Up @@ -235,7 +247,9 @@ module.exports = {
},
'FRA': {
'valueFunctions': {
'borough': getFirstProperty(['borough'], 'optional'),
'local': getFirstProperty(['locality', 'localadmin']),
'regional': getFRARegionValue(),
'country': getFRACountryValue()
}
},
Expand Down
15 changes: 15 additions & 0 deletions labelUtils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const _ = require('lodash');

function isRequired(labelPart) {
return typeof labelPart === 'string' || labelPart.role === 'required';
}

function getLabel(labelPart) {
return typeof labelPart === 'string' ? labelPart : labelPart.label;
}

function uniq(labelParts) {
return _.uniqWith(labelParts, (value, other) => getLabel(value) === getLabel(other));
}

module.exports = { getLabel, isRequired, uniq };
Loading

0 comments on commit 12ab8d0

Please sign in to comment.