This is a comparison of the performance of ‘geocode()’ funcion, using Google Maps and Here map services

I am using a dataset of 100 adresses in Lisbon, Portugal.

Setup

library(readxl)
library(tidyverse)
library(sf)
library(mapview)
library(usethis)
library(ggmap)
register_google(Sys.getenv("GOOGLE_KEY"))
library(hereR)
hereR::set_key(Sys.getenv("HERER_KEY"))

Prepare data

MORADAS = read_excel("origens e destinos.xlsx")
TESTE = MORADAS[1:101,c(1,2)]
TESTE = na.omit(TESTE) #100 addresses
TESTE$city = "Lisbon"
TESTE$address = paste(TESTE$Origem, TESTE$city, sep = ", ")
head(TESTE)
ID Origem city address
1 1600-082 Lisbon 1600-082, Lisbon
2 Instituto Superior Técnico Lisbon Instituto Superior Técnico, Lisbon
3 Marquês de Pombal Lisbon Marquês de Pombal, Lisbon
4 São Domingos de Benfica Lisbon São Domingos de Benfica, Lisbon
5 Lisboa Lisbon Lisboa, Lisbon
6 1150 Lisbon 1150, Lisbon

Geocode the addresses

Using Google Maps service

GMAPStime = system.time({
     googlemaps = ggmap::geocode(TESTE$address)
   })
googlemaps = cbind(TESTE, googlemaps)
googlemaps$wkt = paste("POINT(",googlemaps$lon," ",googlemaps$lat,")")
googlemaps = st_as_sf(googlemaps, wkt = "wkt", crs=4326)
head(googlemaps)
ID Origem city address lon lat wkt
1 1600-082 Lisbon 1600-082, Lisbon -9.150095 38.74807 c(-9.1500946, 38.7480719)
2 Instituto Superior Técnico Lisbon Instituto Superior Técnico, Lisbon -9.138705 38.73682 c(-9.138705, 38.7368192)
3 Marquês de Pombal Lisbon Marquês de Pombal, Lisbon -9.149122 38.72561 c(-9.1491222, 38.7256098)
4 São Domingos de Benfica Lisbon São Domingos de Benfica, Lisbon -9.168561 38.75001 c(-9.1685614, 38.7500101)
5 Lisboa Lisbon Lisboa, Lisbon -9.139337 38.72225 c(-9.1393366, 38.7222524)
6 1150 Lisbon 1150, Lisbon -9.139337 38.72225 c(-9.1393366, 38.7222524)

Using Here service

It took me some time to realize that it drops the original ID, creating a sequential one without the missing results. We need fo fix that, that’s why the code has more details.

heregeocode <- TESTE
HEREtime = system.time({
  locs <- hereR::geocode(heregeocode$address)
})
## Address(es) 'Instituto Superior Técnico, Lisbon', 'LX Factory, Lisbon', 'El Corte Inglês, Lisbon', 'Palacio SotoMayor, Lisbon' not found.
heregeocode = st_as_sf(data.frame(locs, heregeocode[locs$id, ]))
heregeocode = heregeocode[,-c(1,13,14)] #remove duplicate columns
heregeocode = heregeocode[,c(10,11,1:8)] #reorder columns
head(heregeocode)
ID Origem address postalCode city county country type district street geometry
1 1600-082 1600-082, Lisboa, Portugal 1600-082 Lisboa Lisboa PRT point NA NA c(-9.15044, 38.74805)
3 Marquês de Pombal Marquês de Pombal, Lisboa, Portugal 1250 Lisboa Lisboa PRT point Marquês de Pombal NA c(-9.14939, 38.72453)
4 São Domingos de Benfica Rua de São Domingos de Benfica, 1500-559 Lisboa, Portugal 1500-559 Lisboa Lisboa PRT point Lisboa Rua de São Domingos de Benfica c(-9.17488, 38.74212)
5 Lisboa Lisboa, Portugal 1050-115 Lisboa Lisboa PRT point NA NA c(-9.14949, 38.72639)
6 1150 1150, Lisboa, Portugal 1150 Lisboa Lisboa PRT point NA NA c(-9.14168, 38.7228)
7 Arco do Cego Rua do Arco do Cego, 1000-020 Lisboa, Portugal 1000-020 Lisboa Lisboa PRT point Lisboa Rua do Arco do Cego c(-9.14275, 38.74048)

It did not find my university address, as more as I tried other names :(

mapview(hereR::geocode("Instituto Superior Tecnico, Lisbon"))
mapview(hereR::geocode("Instituto Superior Técnico, Portugal"))
mapview(hereR::geocode("UL-Instituto Superior Tecnico, Lisbon"))
mapview(hereR::geocode("UL-Instituto Superior Técnico, Lisboa"))
mapview(hereR::geocode("UL-Instituto Superior Técnico"))
mapview(hereR::geocode("Instituto Superior Técnico"))
mapview(hereR::geocode("IST, Lisbon"))
mapview(hereR::geocode("Instituto Superior Técnico Alameda")) #Something in Alabama(?!)
mapview(hereR::geocode("Instituto Superior Técnico Alameda, Lisbon"))

All NULL.

Comparing time to geocode

GMAPStime
##    user  system elapsed 
##    2.17    0.24   12.98
HEREtime
##    user  system elapsed 
##    0.77    0.06    2.06

Wow! Here it’s really fast!!
And it retrieves much more information than lat/lon…

Map the results

rownames(heregeocode) = heregeocode$ID
rownames(googlemaps) = googlemaps$ID

mapview::mapview(heregeocode, col.regions="blue") + mapview(googlemaps, col.regions="red")

There are some odd results in EEUU :)

If you select which layers to show, you may see that they are not quite in the same locations.
If you hover the dots, you may check their IDs

Check the distances between them

googlemapsredux = googlemaps[googlemaps$ID %in% heregeocode$ID,] #keep the same rows from both 
distances = sf::st_distance(googlemapsredux, heregeocode, by_element = T)
summary(distances) #in meters
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       4     186     557  125843    1323 6423068

The median is over half km!

Let’s remove the odd dots from Here

ODDS = c(82,100)
heregeocodeodd = heregeocode[!heregeocode$ID %in% ODDS,]
googlemapsodd = googlemapsredux[googlemapsredux$ID %in% heregeocodeodd$ID,]
distances2 = st_distance(googlemapsodd, heregeocodeodd, by_element = T)
summary(distances2) #in meters
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     4.28   185.91   556.69  2798.51  1323.41 38679.81

Better, the max distance is now 38 km. Still far…