Google have a great API called Google Places which is a good way to generate data. For example something like:
- All food places in London
- Starbucks in England
- etc.
The python script I used is below. The general idea behind this is :
- Establish a bounding box you will search within (should be too big rather than too small as we can cut down on the results later)
- Tessellate the bounding box with circles in a hexagonal structure to establish a “search area” -> these should be big enough to reduce the total number of circles but not so big that we find more than 60 stores (as that is cut-off for the free “nearbysearch“
- Produce a map of the search circles to double-check we have the desired area and highlight whenever we hit the limit of 60
- Save the list of stores found (with the “place_id”) to an excel spreadsheet using the
- Perform a further look-up using the Place Details service to get further components (such as website, postal code, etc.)
As an example, suppose I want all the Cafe Nero stores in the City:
COMPANY_SEARCH = ‘Cafe Nero’
RADIUS_KM = 0.5
coord = coordinates_box()
coord.createcoordinates(51.471834, -0.204326, 51.542672, -0.049488)
The following parameters produce the following “search map”:
Most of the 96 stores found return postcodes:
The most important components for me are the latitudes and longitudes which we can use to easily produce a map of the found stores:
Appendix: Python Script
import urllib import csv import time import math import requests import itertools API_KEY = [ ... ] shops_list = [] debug_list = [] SAVE_PATH = 'H:/' COMPANY_SEARCH = 'Cafe Nero' RADIUS_KM = 0.5 LIMIT = 60 class coordinates_box(object): """ Initialise a coordinates_box class which will hold the produced coordinates and output a html map of the search area """ def __init__(self): self.coordset = [] def createcoordinates(self, southwest_lat, southwest_lng, northeast_lat, northeast_lng): """ Based on the input radius this tesselates a 2D space with circles in a hexagonal structure """ earth_radius_km = 6371 lat_start = math.radians(southwest_lat) lon_start = math.radians(southwest_lng) lat = lat_start lon = lon_start lat_level = 1 while True: if (math.degrees(lat) <= northeast_lat) & (math.degrees(lon) <= northeast_lng): self.coordset.append([math.degrees(lat), math.degrees(lon)]) parallel_radius = earth_radius_km * math.cos(lat) if math.degrees(lat) > northeast_lat: break elif math.degrees(lon) > northeast_lng: lat_level += 1 lat += (RADIUS_KM / earth_radius_km) + (RADIUS_KM / earth_radius_km) * math.sin(math.radians(30)) if lat_level % 2 != 0: lon = lon_start else: lon = lon_start + (RADIUS_KM / parallel_radius) * math.cos(math.radians(30)) else: lon += 2 * (RADIUS_KM / parallel_radius) * math.cos(math.radians(30)) print('Coordinates-set contains %d coordinates' % len(self.coordset)) # Save coordinates: f = open(SAVE_PATH + 'circles_' + COMPANY_SEARCH + '_python_mined.csv', 'w', newline='') w = csv.writer(f) for coord in self.coordset: w.writerow(coord) f.close() # LOG MAP self.htmlmaplog(SAVE_PATH + 'htmlmaplog_' + COMPANY_SEARCH + '.html') def htmlmaplog(self, map_save_path): """ Outputs a HTML map """ htmltext = """ <!DOCTYPE html > <style type="text/css"> html, body { height: 100%; width: 100%; padding: 0px; margin: 0px; } </style> <head> <meta name="viewport" content="initial-scale=1.0, user-scalable=no" /> <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> <title>Boundary Partitioning</title> <xml id="myxml"> <markers> """ # Content for coord in self.coordset: rowcord = '<marker name = "' + COMPANY_SEARCH + '" lat = "' + \ '%.5f' % coord[0] + '" lng = "' + '%.5f' % coord[1] + '"/>\n' htmltext += rowcord # Bottom htmltext += """ </markers> </xml> <script type="text/javascript" src="https://maps.googleapis.com/maps/api/js?&sensor=false&libraries=geometry"></script> <script type="text/javascript"> var XML = document.getElementById("myxml"); if(XML.documentElement == null) XML.documentElement = XML.firstChild; var MARKERS = XML.getElementsByTagName("marker"); """ htmltext += "var RADIUS_KM = " + str(RADIUS_KM) + ";" htmltext += """ var map; var geocoder = new google.maps.Geocoder(); var counter = 0 function load() { // Initialize around City, London var my_lat = 51.518175; var my_lng = -0.129064; var mapOptions = { center: new google.maps.LatLng(my_lat, my_lng), zoom: 12 }; map = new google.maps.Map(document.getElementById('map'), mapOptions); var bounds = new google.maps.LatLngBounds(); for (var i = 0; i < MARKERS.length; i++) { var name = MARKERS[i].getAttribute("name"); var point_i = new google.maps.LatLng( parseFloat(MARKERS[i].getAttribute("lat")), parseFloat(MARKERS[i].getAttribute("lng"))); var icon = {icon: 'http://labs.google.com/ridefinder/images/mm_20_gray.png'}; var col = '#0033CC'; var draw_circle = new google.maps.Circle({ center: point_i, radius: RADIUS_KM*1000, strokeColor: col, strokeOpacity: 0.15, strokeWeight: 2, fillColor: col, fillOpacity: 0.15, map: map }); var marker = new google.maps.Marker({ position: point_i, map: map, icon: 'https://maps.gstatic.com/intl/en_us/mapfiles/markers2/measle_blue.png' }) bounds.extend(point_i); }; map.fitBounds(bounds); } </script> </head> <body onload="load()"> <center> <div style="padding-top: 20px; padding-bottom: 20px;"> <div id="map" style="width:90%; height:1024px;"></div> </center> </body> </html> """ with open(map_save_path, 'w') as f: f.write(htmltext) f.close() class counter(object): """ Counter class to keep track of the requests usage """ def __init__(self): self.keynum = 0 self.partition_num = 0 self.detailnum = 0 def increment_key(self): self.keynum += 1 def increment_partition(self): self.partition_num += 1 def increment_detail(self): self.detailnum += 1 def googleplaces(lat, lng, radius_metres, search_term, key, pagetoken=None, nmbr_returned=0): """ Function uses the 'nearbysearch', however it is possible to use the radar-search and others located here: https://developers.google.com/places/web-service/search The API call returns a page_token for the next page up to a total of 60 results """ location = urllib.parse.quote("%.5f,%.5f" % (lat,lng)) radius = float(radius_metres) name = urllib.parse.quote(str(search_term)) search_url = ('https://maps.googleapis.com/maps/api/place/' + 'nearbysearch' + '/json?location=%s&radius=%d&keyword=%s&key=%s') % (location, radius, name, key) if pagetoken is not None: search_url += '&pagetoken=%s' % pagetoken # SLEEP so that request is generated time.sleep(2) time.sleep(0.1) req_count.increment_key() print("Search number %d: %s" % (req_count.keynum, search_url)) google_search_request = requests.get(search_url) search_json_data = google_search_request.json() print(search_json_data['status']) if search_json_data['status'] == 'OK': nmbr_returned += len(search_json_data['results']) for place in search_json_data['results']: shop = [place['name'].encode('ascii', 'ignore').decode('ascii'), place['vicinity'].encode('ascii', 'ignore').decode('ascii'), place['geometry']['location']['lat'], place['geometry']['location']['lng'], place['types'], place['place_id']] if shop not in shops_list: shops_list.append(shop) # Possible to get up to 60 results # from one search by passing next_page_token try: next_token = search_json_data['next_page_token'] googleplaces(lat=lat, lng=lng, radius_metres=radius_metres, search_term=search_term, key=key, pagetoken=next_token, nmbr_returned=nmbr_returned) return except KeyError: pass elif search_json_data['status'] == 'ZERO_RESULTS': pass else: try: print('Error: %s' % search_json_data['error_message']) except KeyError: print('Unknown error message - check URL') debug_list.append([lat, lng, nmbr_returned]) print('Partition %s no. %d/%d - found %d stores' % (location, req_count.partition_num, len(coord.coordset), nmbr_returned)) if nmbr_returned >= LIMIT: print('Warning possible cut-off') print('List contains %d stores with key number: %d' % (len(shops_list), (req_count.keynum // 900))) def googledetails(place_id, key): """ Function uses the miend place_ids to get further data from the details API """ detail_url = ('https://maps.googleapis.com/maps/api/place/' + 'details' + '/json?placeid=%s&key=%s') % (place_id, key) print(detail_url) google_detail_request = requests.get(detail_url) detail_json_data = google_detail_request.json() time.sleep(0.1) if detail_json_data['status'] == 'OK': try: address_components = detail_json_data['result']['address_components'] print(address_components) # At the moment care only about extracting postcode, however possible to get: # Street number, Town, etc. for x in address_components: if x['types'] == ["postal_code"]: postcode = x['long_name'].encode('ascii', 'ignore').decode('ascii') break postcode = 'Nan' except KeyError: postcode = 'NaN' try: formatted_address = detail_json_data['result']['formatted_address'].encode('ascii', 'ignore').decode('ascii') except KeyError: formatted_address = 'NaN' try: website = detail_json_data['result']['website'].encode('ascii', 'ignore').decode('ascii') except KeyError: website = 'NaN' detail = [postcode, formatted_address, website] else: detail = detail_json_data['status'].encode('ascii', 'ignore').decode('ascii') print(detail) return detail def fillindetails(f=SAVE_PATH + COMPANY_SEARCH + '_python_mined.csv'): """ Opens the produced CSV and extracts the place ID for querying """ detailed_stores_out = [] simple_stores_out = [] with open(f, 'r') as csvin: reader = csv.reader(csvin) for store in reader: req_count.increment_detail() key_number = (req_count.keynum // 950) detailed_store = googledetails(store[5], API_KEY[key_number]) print('Row number %d/%d, store info: %s' % (req_count.detailnum, len(shops_list), detailed_store)) detailed_stores_out.append(detailed_store) simple_stores_out.append(store) # OUTPUT to CSV f = open(SAVE_PATH + 'detailed_' + COMPANY_SEARCH + '_python_mined.csv', 'w', newline='') w = csv.writer(f) # Combine both lists into one combined_list = [list(itertools.chain(*a)) for a in zip(simple_stores_out, detailed_stores_out)] for one_store in combined_list: try: w.writerow(one_store) except Exception as err: print("Something went wrong: %s" % err) w.writerow("Error") f.close() def runsearch(): """ Initialises the searches for each partition produced """ print("%d Keys Remaining" % (len(API_KEY)-1)) for partition in coord.coordset: # Keys have a life-span of 1000 requests key_number = (req_count.keynum // 1000) req_count.increment_partition() googleplaces(lat=partition[0], lng=partition[1], radius_metres=RADIUS_KM*1000, search_term=COMPANY_SEARCH, key=API_KEY[key_number]) # OUTPUT to CSV f = open(SAVE_PATH + COMPANY_SEARCH + '_python_mined.csv', 'w', newline='') w = csv.writer(f) for one_store in shops_list: w.writerow(one_store) f.close() # OUTPUT LOG to CSV f = open(SAVE_PATH + 'log_' + COMPANY_SEARCH + '_python_mined.csv', 'w', newline='') w = csv.writer(f) for debug_result in debug_list: w.writerow(debug_result) f.close() # DETAIL SEARCH fillindetails() if __name__ == "__main__": # 1. CREATE PARTITIONS # Setup coordinates coord = coordinates_box() coord.createcoordinates(51.471834, -0.204326, 51.542672, -0.049488) # 2. SEARCH PARTITIONS # Setup counter req_count = counter() runsearch()