package de.komoot.photon.nominatim; import com.google.common.collect.ImmutableList; import com.neovisionaries.i18n.CountryCode; import com.vividsolutions.jts.geom.Envelope; import com.vividsolutions.jts.geom.Geometry; import com.vividsolutions.jts.geom.GeometryFactory; import com.vividsolutions.jts.geom.Point; import com.vividsolutions.jts.linearref.LengthIndexedLine; import de.komoot.photon.Importer; import de.komoot.photon.PhotonDoc; import de.komoot.photon.nominatim.model.AddressRow; import lombok.extern.slf4j.Slf4j; import org.apache.commons.dbcp.BasicDataSource; import org.postgis.jts.JtsWrapper; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.RowCallbackHandler; import org.springframework.jdbc.core.RowMapper; import java.sql.ResultSet; import java.sql.SQLException; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.atomic.AtomicLong; /** * A Nominatim result consisting of the basic PhotonDoc for the object * and a map of attached house numbers together with their respective positions. */ class NominatimResult { private PhotonDoc doc; private Map<String, Point> housenumbers; public NominatimResult(PhotonDoc baseobj) { doc = baseobj; housenumbers = null; } PhotonDoc getBaseDoc() { return doc; } boolean isUsefulForIndex() { return (housenumbers != null && !housenumbers.isEmpty()) || doc.isUsefulForIndex(); } List<PhotonDoc> getDocsWithHousenumber() { if (housenumbers == null || housenumbers.isEmpty()) return ImmutableList.of(doc); List<PhotonDoc> results = new ArrayList<PhotonDoc>(housenumbers.size()); for (Map.Entry<String, Point> e : housenumbers.entrySet()) { PhotonDoc copy = new PhotonDoc(doc); copy.setHouseNumber(e.getKey()); copy.setCentroid(e.getValue()); results.add(copy); } return results; } /** * Adds house numbers from a house number string. * * This may either be a single house number or multiple * house numbers delimited by a semicolon. All locations * will be set to the centroid of the doc geometry. * * @param str House number string. May be null, in which case nothing is added. */ public void addHousenumbersFromString(String str) { if (str == null || str.isEmpty()) return; if (housenumbers == null) housenumbers = new HashMap<String, Point>(); String[] parts = str.split(";"); for (String part : parts) { String h = part.trim(); if (!h.isEmpty()) housenumbers.put(h, doc.getCentroid()); } } public void addHouseNumbersFromInterpolation(long first, long last, String interpoltype, Geometry geom) { if (last <= first || (last - first) > 1000) return; if (housenumbers == null) housenumbers = new HashMap<String, Point>(); LengthIndexedLine line = new LengthIndexedLine(geom); double si = line.getStartIndex(); double ei = line.getEndIndex(); double lstep = (ei - si) / (double) (last - first); // leave out first and last, they have a distinct OSM node that is already indexed long step = 2; long num = 1; if (interpoltype.equals("odd")) { if (first % 2 == 1) ++num; } else if (interpoltype.equals("even")) { if (first % 2 == 0) ++num; } else { step = 1; } GeometryFactory fac = geom.getFactory(); for (; first + num < last; num += step) { housenumbers.put(String.valueOf(num + first), fac.createPoint(line.extractPoint(si + lstep * num))); } } } /** * Export nominatim data * * @author felix, christoph */ @Slf4j public class NominatimConnector { private final JdbcTemplate template; private Map<String, Map<String, String>> countryNames; /** * Maps a row from location_property_osmline (address interpolation lines) to a photon doc. */ private final RowMapper<NominatimResult> osmlineRowMapper = new RowMapper<NominatimResult>() { @Override public NominatimResult mapRow(ResultSet rs, int rownum) throws SQLException { Geometry geometry = DBUtils.extractGeometry(rs, "linegeo"); PhotonDoc doc = new PhotonDoc( rs.getLong("place_id"), "W", rs.getLong("osm_id"), "place", "house_number", Collections.<String, String>emptyMap(), // no name (String) null, Collections.<String, String>emptyMap(), // no extratags (Envelope) null, rs.getLong("parent_place_id"), 0d, // importance CountryCode.getByCode(rs.getString("calculated_country_code")), (Point) null, // centroid 0, 30 ); doc.setPostcode(rs.getString("postcode")); doc.setCountry(getCountryNames(rs.getString("calculated_country_code"))); NominatimResult result = new NominatimResult(doc); result.addHouseNumbersFromInterpolation(rs.getLong("startnumber"), rs.getLong("endnumber"), rs.getString("interpolationtype"), geometry); return result; } }; /** * maps a placex row in nominatim to a photon doc, some attributes are still missing and can be derived by connected address items. */ private final RowMapper<NominatimResult> placeRowMapper = new RowMapper<NominatimResult>() { @Override public NominatimResult mapRow(ResultSet rs, int rowNum) throws SQLException { Double importance = rs.getDouble("importance"); if(rs.wasNull()) { // https://github.com/komoot/photon/issues/12 int rankSearch = rs.getInt("rank_search"); importance = 0.75 - rankSearch / 40d; } Geometry geometry = DBUtils.extractGeometry(rs, "bbox"); Envelope envelope = geometry != null ? geometry.getEnvelopeInternal() : null; PhotonDoc doc = new PhotonDoc( rs.getLong("place_id"), rs.getString("osm_type"), rs.getLong("osm_id"), rs.getString("class"), rs.getString("type"), DBUtils.getMap(rs, "name"), (String) null, DBUtils.getMap(rs, "extratags"), envelope, rs.getLong("parent_place_id"), importance, CountryCode.getByCode(rs.getString("calculated_country_code")), (Point) DBUtils.extractGeometry(rs, "centroid"), rs.getLong("linked_place_id"), rs.getInt("rank_search") ); doc.setPostcode(rs.getString("postcode")); doc.setCountry(getCountryNames(rs.getString("calculated_country_code"))); NominatimResult result = new NominatimResult(doc); result.addHousenumbersFromString(rs.getString("housenumber")); return result; } }; private final String selectColsPlaceX = "place_id, osm_type, osm_id, class, type, name, housenumber, postcode, extratags, ST_Envelope(geometry) AS bbox, parent_place_id, linked_place_id, rank_search, importance, calculated_country_code, centroid"; private Importer importer; private Map<String, String> getCountryNames(String countrycode) { if(countryNames == null) { countryNames = new HashMap<String, Map<String, String>>(); template.query("SELECT country_code, name FROM country_name;", new RowCallbackHandler() { @Override public void processRow(ResultSet rs) throws SQLException { countryNames.put(rs.getString("country_code"), DBUtils.getMap(rs, "name")); } } ); } return countryNames.get(countrycode); } /** * @param host database host * @param port database port * @param database database name * @param username db username * @param password db username's password */ public NominatimConnector(String host, int port, String database, String username, String password) { BasicDataSource dataSource = new BasicDataSource(); dataSource.setUrl(String.format("jdbc:postgres_jts://%s:%d/%s", host, port, database)); dataSource.setUsername(username); dataSource.setPassword(password); dataSource.setDriverClassName(JtsWrapper.class.getCanonicalName()); dataSource.setDefaultAutoCommit(false); template = new JdbcTemplate(dataSource); template.setFetchSize(100000); } public void setImporter(Importer importer) { this.importer = importer; } public PhotonDoc getByPlaceId(long placeId) { return template.queryForObject("SELECT " + selectColsPlaceX + " FROM placex WHERE place_id = ?", new Object[]{placeId}, placeRowMapper).getBaseDoc(); } List<AddressRow> getAddresses(PhotonDoc doc) { long placeId = doc.getPlaceId(); if(doc.getRankSearch() > 28) placeId = doc.getParentPlaceId(); return template.query("SELECT p.place_id, p.osm_type, p.osm_id, p.name, p.class, p.type, p.rank_address, p.admin_level, p.postcode, p.extratags->'place' as place FROM placex p, place_addressline pa WHERE p.place_id = pa.address_place_id and pa.place_id = ? and pa.cached_rank_address > 4 and pa.address_place_id != ? and pa.isaddress order by rank_address desc,fromarea desc,distance asc,rank_search desc", new Object[]{placeId, doc.getPlaceId()}, new RowMapper<AddressRow>() { @Override public AddressRow mapRow(ResultSet rs, int rowNum) throws SQLException { Integer adminLevel = rs.getInt("admin_level"); if(rs.wasNull()) { adminLevel = null; } return new AddressRow( rs.getLong("place_id"), DBUtils.getMap(rs, "name"), rs.getString("class"), rs.getString("type"), rs.getInt("rank_address"), adminLevel, rs.getString("postcode"), rs.getString("place"), rs.getString("osm_type"), rs.getLong("osm_id") ); } }); } private static final PhotonDoc FINAL_DOCUMENT = new PhotonDoc(0, null, 0, null, null, null, null, null, null, 0, 0, null, null, 0, 0); private class ImportThread implements Runnable { private final BlockingQueue<PhotonDoc> documents; public ImportThread(BlockingQueue<PhotonDoc> documents) { this.documents = documents; } @Override public void run() { while(true) { PhotonDoc doc; try { doc = documents.take(); if(doc == FINAL_DOCUMENT) break; importer.add(doc); } catch(InterruptedException e) { /* safe to ignore? */ } } importer.finish(); } } /** * parses every relevant row in placex, creates a corresponding document and calls the {@link #importer} for every document */ public void readEntireDatabase() { log.info("start importing documents from nominatim ..."); final AtomicLong counter = new AtomicLong(); final int progressInterval = 50000; final long startMillis = System.currentTimeMillis(); final BlockingQueue<PhotonDoc> documents = new LinkedBlockingDeque<PhotonDoc>(20); Thread importThread = new Thread(new ImportThread(documents)); importThread.start(); template.query("SELECT " + selectColsPlaceX + " FROM placex WHERE linked_place_id IS NULL AND centroid IS NOT NULL ORDER BY geometry_sector; ", new RowCallbackHandler() { @Override public void processRow(ResultSet rs) throws SQLException { // turns a placex row into a photon document that gathers all de-normalised information NominatimResult docs = placeRowMapper.mapRow(rs, 0); if(!docs.isUsefulForIndex()) return; // do not import document // finalize document by taking into account the higher level placex rows assigned to this row completePlace(docs.getBaseDoc()); for (PhotonDoc doc : docs.getDocsWithHousenumber()) { while(true) { try { documents.put(doc); } catch(InterruptedException e) { log.warn("Thread interrupted while placing document in queue."); continue; } break; } if(counter.incrementAndGet() % progressInterval == 0) { final double documentsPerSecond = 1000d * counter.longValue() / (System.currentTimeMillis() - startMillis); log.info(String.format("imported %s documents [%.1f/second]", MessageFormat.format("{0}", counter.longValue()), documentsPerSecond)); } } } }); template.query("SELECT place_id, osm_id, parent_place_id, startnumber, endnumber, interpolationtype, postcode, calculated_country_code, linegeo FROM location_property_osmline ORDER BY geometry_sector; ", new RowCallbackHandler() { @Override public void processRow(ResultSet rs) throws SQLException { NominatimResult docs = osmlineRowMapper.mapRow(rs, 0); if (!docs.isUsefulForIndex()) return; // do not import document // finalize document by taking into account the higher level placex rows assigned to this row completePlace(docs.getBaseDoc()); for (PhotonDoc doc : docs.getDocsWithHousenumber()) { while(true) { try { documents.put(doc); } catch(InterruptedException e) { log.warn("Thread interrupted while placing document in queue."); continue; } break; } if(counter.incrementAndGet() % progressInterval == 0) { final double documentsPerSecond = 1000d * counter.longValue() / (System.currentTimeMillis() - startMillis); log.info(String.format("imported %s documents [%.1f/second]", MessageFormat.format("{0}", counter.longValue()), documentsPerSecond)); } } } }); while(true) { try { documents.put(FINAL_DOCUMENT); importThread.join(); } catch(InterruptedException e) { log.warn("Thread interrupted while placing document in queue."); continue; } break; } log.info(String.format("finished import of %s photon documents.", MessageFormat.format("{0}", counter.longValue()))); } /** * retrieves a single document, used for testing / developing * * @param osmId * @param osmType 'N': node, 'W': way or 'R' relation * @return */ public List<PhotonDoc> readDocument(long osmId, char osmType) { return template.query("SELECT " + selectColsPlaceX + " FROM placex WHERE osm_id = ? AND osm_type = ?; ", new Object[]{osmId, osmType}, new RowMapper<PhotonDoc>() { @Override public PhotonDoc mapRow(ResultSet resultSet, int i) throws SQLException { PhotonDoc doc = placeRowMapper.mapRow(resultSet, 0).getBaseDoc(); completePlace(doc); return doc; } }); } /** * querying nominatim's address hierarchy to complete photon doc with missing data (like country, city, street, ...) * * @param doc */ private void completePlace(PhotonDoc doc) { final List<AddressRow> addresses = getAddresses(doc); for(AddressRow address : addresses) { if(address.hasPostcode() && doc.getPostcode() == null) { doc.setPostcode(address.getPostcode()); } if(address.isCity()) { if(doc.getCity() == null) { doc.setCity(address.getName()); } else { // there is more than one city address for this document if(address.hasPlace()) { // this city is more important than the previous one doc.getContext().add(doc.getCity()); // move previous city to context doc.setCity(address.getName()); // use new city } else { doc.getContext().add(address.getName()); } } continue; } if(address.isCuratedCity()) { if(doc.getCity() == null) { doc.setCity(address.getName()); } else { doc.getContext().add(doc.getCity()); // move previous city to context doc.setCity(address.getName()); // use new city } // do not continue as a curated city might be a state as well } if(address.isStreet() && doc.getStreet() == null) { doc.setStreet(address.getName()); continue; } if(address.isState() && doc.getState() == null) { doc.setState(address.getName()); continue; } // no specifically handled item, check if useful for context if(address.isUsefulForContext()) { doc.getContext().add(address.getName()); } } } }