Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions config/venue_normalization.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

/**
expansion and normalization of venue names to allow for consistent searching and more effective synonym use
**/

var mapping = {
'railway': {
'station': {
'alt_suffixes': ['station'],
'suffix': 'Railway Station',
}
},
'station': {
'light_rail': {
'alt_suffixes': ['station', 'light rail'],
'suffix': 'light rail station',
}
}
};

module.exports = mapping;
6 changes: 5 additions & 1 deletion stream/importPipeline.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
var categoryDefaults = require('../config/category_map');
var venueNormalizations = require('../config/venue_normalization');

var streams = {};

streams.config = {
categoryDefaults: categoryDefaults
categoryDefaults: categoryDefaults,
venueNormalizations: venueNormalizations
};

streams.pbfParser = require('./multiple_pbfs').create;
Expand All @@ -13,6 +15,7 @@ streams.tagMapper = require('./tag_mapper');
streams.adminLookup = require('pelias-wof-admin-lookup').create;
streams.addressExtractor = require('./address_extractor');
streams.categoryMapper = require('./category_mapper');
streams.venueNormalization = require('./venue_normalization');
streams.dbMapper = require('pelias-model').createDocumentMapperStream;
streams.elasticsearch = require('pelias-dbclient');

Expand All @@ -24,6 +27,7 @@ streams.import = function(){
.pipe( streams.addressExtractor() )
.pipe( streams.blacklistStream() )
.pipe( streams.categoryMapper( categoryDefaults ) )
.pipe( streams.venueNormalization( venueNormalizations ) )
.pipe( streams.adminLookup() )
.pipe( streams.dbMapper() )
.pipe( streams.elasticsearch({name: 'openstreetmap'}) );
Expand Down
76 changes: 76 additions & 0 deletions stream/venue_normalization.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@

/**
The venue normalization is similar to the category mapper
It's designed to add standardized aliases for different venue types to allow easier searching'
**/

var through = require('through2');
var peliasLogger = require( 'pelias-logger' ).get( 'openstreetmap' );

module.exports = function( mapping ){

return through.obj( function( doc, enc, next ){

try {

// do not normalize addresses
if( doc.getType().match('address') ){
return next( null, doc );
}

// skip records with no tags
var tags = doc.getMeta('tags');
if( !tags ){
return next( null, doc );
}

var name = doc.getName( 'default' );
if ( !name ) {
name = doc.getNameAliases( 'default' );
}
if ( !name || name.legnth === 0 ){
return next( null, doc );
}

// iterate over mapping
for( var key in mapping ){

// check each mapping key against document tags
if( !tags.hasOwnProperty( key ) ){ continue; }

// handle regular features
for( var feature in mapping[key] ){
if( tags[key] === feature ){
var rule = mapping[key][feature];
addAliases( name, rule, doc );
}
}
}
}

catch( e ){
peliasLogger.error( 'venue normalization error' );
peliasLogger.error( e.stack );
peliasLogger.error( JSON.stringify( doc, null, 2 ) );
}

return next( null, doc );

});

};

function addAliases( name, rule, doc ) {
if( rule.hasOwnProperty( 'alt_suffixes' ) ){
for( var suffix in rule.alt_suffixes ){
var suffix_name = rule.alt_suffixes[suffix];
if( name.toLowerCase().endsWith( ' ' + suffix_name ) ){
name = name.slice( 0, -suffix_name.length - 1 );
}
}
}

if( rule.hasOwnProperty( 'suffix' ) && name.length > 0 ){
doc.setNameAlias( 'default', name + ' ' + rule.suffix );
}
}
1 change: 1 addition & 0 deletions test/end-to-end.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ streams.pbfParser()
.pipe( streams.tagMapper() )
.pipe( streams.addressExtractor() )
.pipe( streams.categoryMapper( streams.config.categoryDefaults ) )
.pipe( streams.venueNormalization( streams.config.venueNormalizations ) )
.pipe( model.createDocumentMapperStream() )
.pipe( sink.obj(function (doc) {
results.push(doc);
Expand Down
Loading