I am working on the Nominatim geolocator in Python. Unfortunately, some addresses are missing, therefore I tried to make some condition-based workaround, which would allow executing something based at least on postcode, which works well in any case.
Unfortunately, I failed for now. With the following code:
import pandas as pd
import folium
import web-browser
from geopy.geocoders import Nominatim
geolocator = Nominatim(timeout=10, user_agent="Krukarius")
def find_location(row):
place = row['Address']
place_data = newstr = place[-8:]
location = geolocator.geocode(place)
location_overall = geolocator.geocode(place_data)
if location != None:
return location.latitude, location.longitude
else:
#return 0,0
return location_overall
points = pd.read_csv("Addresses4.csv")
points[['Lat','Lng']] = points.apply(find_location, axis="columns", result_type="expand")
print(points)
points.to_csv('NewAddresses4.csv')
ValueError: Location should consist of two numerical values, but '' of type <class 'str'> is not convertible to float.
def find_location(row):
place = row['Address']
place_data = newstr = place[-8:]
location = geolocator.geocode(place)
location_overall = geolocator.geocode(place_data)
if location is not None:
return location.latitude, location.longitude
elif location_overall is not None:
return location_overall.latitude, location_overall.longitude
else:
return None, None
the if block returns the latitude and longitude of the location object if it's not None. The elif block returns the latitude and longitude of the location_overall object if it's not None. Finally, the else block returns None, None if neither location nor location_overall are available.
I think I sorted it out on my own.
The code should exactly look like this:
def find_location(row):
place = row['Address']
place_data = newstr = place[-8:]
location = geolocator.geocode(place)
location_overall = geolocator.geocode(place_data)
if location != None:
return location.latitude, location.longitude
else:
return location_overall.latitude, location_overall.longitude
where instead of the first condition, we should call another one.
Related
new to flask, i'm not so sure why I am getting this name error: 'Nontype' object has no attribute value 'name'.
(PLease ignore: "It looks like your post is mostly code; please add some more details.
It looks like your post is mostly code; please add some more details.
It looks like your post is mostly code; please add some more details.
")
Here is what it looks like in the console
File "/Users/thomashunt/projects/ct-platform-api/apis/student_api.py", line 448, in put
return StudentService.student_WorkShadow(submission)
File "/Users/thomashunt/projects/ct-platform-api/services/students.py", line 234, in student_WorkShadow
AddressService.set_address_info(submission.student_detail.location_address)
File "/Users/thomashunt/projects/ct-platform-api/services/addresses.py", line 18, in set_address_info
address_description = address.address_description(country.name)
AttributeError: 'NoneType' object has no attribute 'name'
services/students
#staticmethod
def student_WorkShadow(submission: StudentWorkShadowEdit) -> Person:
repo = PersonData()
advisor = repo.find_by_email(submission.advisor_email)
email = submission.email.lower()
student = repo.find_by_email(email)
if not student:
raise RecordNotFoundException('No Record with this email in the database')
if not advisor:
raise RecordNotFoundException('No Record with this advisor email in the database')
# Forced re-write of Address entered by Student
student.student_detail.location_address = \
AddressService.set_address_info(submission.student_detail.location_address)
submission.set_model(student)
files = StudentService.promote_student_files(advisor, submission.file_ids, student.id)
# Forced re-write of Address entered by Student
repo.save(student, advisor.id)
repo.session.commit()
student_statement = 'student workshadow details updated'
reference_fields = [EventItemReferenceField('updated_workshadowDetails', 'Updated workshadow Details'),
EventItemReferenceField('form_action', 'confidential_updated')]
reference_content = [student_statement]
MessagingActivityService.create_student_event_for_action(student.id, None, student,
True,
ActionTypes.Student.value.InternalNote,
student_statement,
reference_fields,
reference_content, files, None,
None, None, True, True)
StudentService.re_index(student)
return student
API Endpoints
#ns.route('/StudentWorkShadow')
class StudentWorkShadowEndpoint(Resource):
#SecurityService.requires_system
#ns.expect(student_workshadow_model, validate=True)
#ns.marshal_with(student_person_model)
def put(self):
logging.info('student workshadow details updated')
submission = StudentWorkShadowEdit.from_dict(request.json)
return StudentService.student_WorkShadow(submission)
services/address
import logging
from models import Address
from resources import AddressEdit
from utility import GoogleUtility
from .data import CountryData
class AddressService:
#staticmethod
def set_address_info(address: Address):
countries = CountryData()
country = countries.load_country(address.country_code)
if address.suburb is not None and address.state is not None:
address.location_description = address.suburb + ', ' + address.state
address_description = address.address_description(country.name)
maps_result = GoogleUtility.resolve_coords(address_description)
try:
first_result = maps_result[0]
print(first_result)
address.latitude = first_result['geometry']['location']['lat']
address.longitude = first_result['geometry']['location']['lng']
address.raw_location = first_result
address.formatted_address = first_result['formatted_address']
except TypeError:
print(maps_result.error)
logging.error(maps_result.error)
except IndexError:
logging.error('No result for address resolution')
return address
#staticmethod
def has_address_changed(old_address: Address, new_address: AddressEdit):
if not old_address and new_address:
return True
return not (old_address.line_1 == new_address.line_1
and old_address.line_2 == new_address.line_2
and old_address.suburb == new_address.suburb
and old_address.postcode == new_address.postcode
and old_address.country_code == new_address.country_code)
country/data outputs:
import json
from resources import Country
class CountryData:
with open('services/data/countries.json') as json_data:
source = json.load(json_data)
countries = [Country.from_dict(l) for l in source]
def load_country(self, country_code: str):
result = None
for country in self.countries:
if country.country_code == country_code:
result = country
return result
def load_state(self, country_code: str, short_title: str):
result = None
country = self.load_country(country_code)
for state in country.states:
if state.short_title == short_title:
result = state
return result
def list_states(self, country_code: str):
return self.load_country(country_code).states
My suspicion is that the value you pass for country_code does not match against any country.country_code attribute.
My advice is to put a debug print line in the method like this:
class CountryData:
...
def load_country(self, country_code: str):
result = None
for country in self.countries:
if country.country_code == country_code:
result = country
print(result, country.country_code) # this line added
return result
...
Doing this, you should be able to see if result is ever set to a value other than None, and you can observe exactly which country code triggers it. Moreover, this will print all available country codes (one per line). If your country_code is not one of these, that is the problem.
I want to extract coordinates from postcodes as a new df column.
The functionality from the geopy module is:
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.geocode('%s tn6 3rn')
print((location.latitude, location.longitude))
(51.0459837, 0.2192646)
My function to apply to this to a single value works:
def pcodeToCoor(x):
geolocator = Nominatim()
location = geolocator.geocode(x)
return ((location.latitude, location.longitude))
pcodeToCoor('%s tn6 3rn')
(51.0459837, 0.2192646)
But when passing the function to a test df:
name postcode
0 jd tn6 3rn
1 hf en6 1dg
2 ss sw17 0ju
df['coordinate'] = df['postcode'].map(pcodeToCoor)
I get AttributeError: 'NoneType' object has no attribute 'latitude. Note I can recreate this error by removing %s from the basic api functionality.
The question is, how do I use %s in my function? I imagine the answer's very simple but nothing I've tried works!
You could do something like this if you are going to use this function consistently this way, or you code code a check to see if your string starts with "%s".
def pcodeToCoor(x):
geolocator = Nominatim()
location = geolocator.geocode('%s '+x)
return ((location.latitude, location.longitude))
Edit:
def pcodeToCoor(x):
if x[0:2] != '%s':
x = '%s ' + x
geolocator = Nominatim()
location = geolocator.geocode(x)
return ((location.latitude, location.longitude))
Test:
pcodeToCoor('%s tn6 3rn')
Output:
(51.0459837, 0.2192646)
Test 2:
df['postcode'].map(pcodeToCoor)
Output:
0 (51.0459837, 0.2192646)
1 (51.7206134, -0.2042041)
2 (51.3866947, -0.1800573)
Name: postcode, dtype: object
I am trying to extract raw data from a text file and after processing the raw data, I want to export it to another text file. Below is the python code I have written for this process. I am using the "petl" package in python 3 for this purpose. 'locations.txt' is the raw data file.
import glob, os
from petl import *
class ETL():
def __init__(self, input):
self.list = input
def parse_P(self):
personids = None
for term in self.list:
if term.startswith('P'):
personids = term[1:]
personid = personids.split(',')
return personid
def return_location(self):
location = None
for term in self.list:
if term.startswith('L'):
location = term[1:]
return location
def return_location_id(self, location):
location = self.return_location()
locationid = None
def return_country_id(self):
countryid = None
for term in self.list:
if term.startswith('C'):
countryid = term[1:]
return countryid
def return_region_id(self):
regionid = None
for term in self.list:
if term.startswith('R'):
regionid = term[1:]
return regionid
def return_city_id(self):
cityid = None
for term in self.list:
if term.startswith('I'):
cityid = term[1:]
return cityid
print (os.getcwd())
os.chdir("D:\ETL-IntroductionProject")
print (os.getcwd())
final_location = [['L','P', 'C', 'R', 'I']]
new_location = fromtext('locations.txt', encoding= 'Latin-1')
stored_list = []
for identifier in new_location:
if identifier[0].startswith('L'):
identifier = identifier[0]
info_list = identifier.split('_')
stored_list.append(info_list)
for lst in stored_list:
tabling = ETL(lst)
location = tabling.return_location()
country = tabling.return_country_id()
city = tabling.return_city_id()
region = tabling.return_region_id()
person_list = tabling.parse_P()
for person in person_list:
table_new = [location, person, country, region, city]
final_location.append(table_new)
totext(final_location, 'l1.txt')
However when I use "totext" function of petl, it throws me an "Assertion Error".
AssertionError: template is required
I am unable to understand what the fault is. Can some one please explain the problem I am facing and what I should be doing ?
The template parameter to the toext function is not optional there is no default format for how the rows are written in this case, you must provide a template. Check the doc for toext here for an example: https://petl.readthedocs.io/en/latest/io.html#text-files
The template describes the format of each row that it writes out using the field headers to describe things, you can optionally pass in a prologue to write the header too. A basic template in your case would be:
table_new_template = "{L} {P} {C} {R} {I}"
totext(final_location, 'l1.txt', template=table_new_template)
I'm working with digraph defined in dot language using networkx. I need to achieve something like this:
X = networkx.read_dot('_t.dot') #loads dotfile
def navigate_through_model(model, type): #model is model, type -> string of desired 'comment'
.....
return path
if \__target\__== '\__main\__':
print navigate_through_model(X, 'regression') # I need to return path, that is going through all nodes with comment == 'regression' (it can be other attribute, comment is only for question)
And I'm kind a lost in this :(
Any help will be appreciated.
Ok finally I found (after good sleep) that I can use weight of edges to achieve the same result. So solution is easy.. set lowest weight to 'regression' path and than just generate shortest path.
networkx(X, 'start', 'end', weight='weight')
In order to do this you have to edit networkx/algorithms/shortest_paths/weighted.py
There is some kind of bug on the row 342.
vw_dist = dist[v] + edgedata.get(weight, 1)
You have to change it to:
vw_dist = dist[v] + float(edgedata.get(weight, 1)) #if you want to use floating number or just int to use integers
Ok I finally gets to this:
import networkx
X = networkx.read_dot('_t.dot')
def navigate_through_model(model, start_node, end_node, typex):
path = []
actual_node = start_node
visited_nodes = []
status = True
while actual_node != end_node:
u = networkx.all_neighbors(model, actual_node)
for line in u:
target_node = line
print target_node
try:
comm = model[actual_node][target_node][0]['comment']
if (comm == typex and target_node not in visited_nodes):
path.append((actual_node, target_node))
actual_node = target_node
visited_nodes.append(target_node)
print path
print '\n'
print visited_nodes
status = True
else:
status = False
except Exception as err:
pass
if not status:
return "not found"
return path
print navigate_through_model(X, 'start', 'end', 'regression')
Maybe it is not the best solution but it works!
I've looked through as many answers on this subject as I could find and all suggested that it's a global - local conflict. I can't see how this would apply in my case but please do explain. Here's the error :
"local variable 'CompletelyUniqueName' referenced before assignment"
and here is the code, a function I call from another script :
def geopixsum(filename):
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName
This code worked when not a function but rather just a script on its own. Again, I know this would make it it seem like a global - local issue but given the name I've assigned the variable I think I've gone to great enough lengths to avoid a conflict.
You should either define default value for CompletelyUniqueName (for case if filename.endswith('.tif') == False)
def geopixsum(filename):
CompletelyUniqueName = 0
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
Or return inside if statement
def geopixsum(filename):
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
The simplest fix:
def geopixsum(filename):
CompletelyUniqueName = 0 # or None, or anything you want to return
# if the file is not a tif
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName