How do I debug this error with Python? - python

My code that I will post below gives me this error and I can't figure out why or how to fix it. If anyone could help I would greatly appreciate it. Thanks!
Traceback (most recent call last):
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 99, in <module>
main()
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 76, in main
for final_url in pool.imap(handle_listing, listings):
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenpool.py", line 232, in next
val = self.waiters.get().wait()
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 166, in wait
return self._exit_event.wait()
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\event.py", line 120, in wait
current.throw(*self._exc)
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 192, in main
result = function(*args, **kwargs)
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 48, in handle_listing
yellow_page = BeautifulSoup(download(yellow_page_url))
File "build\bdist.win32\egg\BeautifulSoup.py", line 1519, in __init__
BeautifulStoneSoup.__init__(self, *args, **kwargs)
File "build\bdist.win32\egg\BeautifulSoup.py", line 1144, in __init__
self._feed(isHTML=isHTML)
File "build\bdist.win32\egg\BeautifulSoup.py", line 1168, in _feed
smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
File "build\bdist.win32\egg\BeautifulSoup.py", line 1770, in __init__
self._detectEncoding(markup, isHTML)
File "build\bdist.win32\egg\BeautifulSoup.py", line 1915, in _detectEncoding
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
TypeError: expected string or buffer
I don't know what it wants or what it means...
This is my code:
from gzip import GzipFile
from cStringIO import StringIO
import re
import webbrowser
import time
from difflib import SequenceMatcher
import os
import sys
from BeautifulSoup import BeautifulSoup
import eventlet
from eventlet.green import urllib2
import urllib2
import urllib
def download(url):
print "Downloading:", url
s = urllib2.urlopen(url).read()
if s[:2] == '\x1f\x8b':
ifh = GzipFile(mode='rb', fileobj=StringIO(s))
s = ifh.read()
print "Downloaded: ", url
return s
def replace_chars(text, replacements):
return ''.join(replacements.get(x,x) for x in text)
def handle_listing(listing_url):
listing_document = BeautifulSoup(download(listing_url))
# ignore pages that link to yellowpages
if not listing_document.find("a", href=re.compile(re.escape("http://www.yellowpages.com/") + ".*")):
listing_title = listing_document.title.text
# define an alphabet
alfa = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
reps = {' ':'-', ',':'', '\'':'', '[':'', ']':'', '-Suite-' + alfa[1-26] : ''}
if TITLE_MATCH.match(listing_title) is not None:
title, = TITLE_MATCH.match(listing_title).groups()
if ADDRESS_MATCH.match(listing_title) is not None:
address, = ADDRESS_MATCH.match(listing_title).groups()
yellow_page_url = "http://www.yellowpages.com/%s/%s?order=distance" % (
replace_chars(address, reps),
replace_chars(title, reps),
)
yellow_page = BeautifulSoup(download(yellow_page_url))
page_url = yellow_page.find("h3", {"class" : "business-name fn org"})
if page_url:
page_url = page_url.a["href"]
business_name = title[:title.index(",")]
page = BeautifulSoup(download(page_url))
yellow_page_address = page.find("span", {"class" : "street-address"})
if yellow_page_address:
if SequenceMatcher(None, address, yellow_page_address.text).ratio() >= 0.5:
pid, = re.search(r'p(\d{5,20})\.jsp', listing_url).groups(0)
page_escaped = replace_chars(page_url, {':':'%3A', '/':'%2F', '?':'%3F', '=':'%3D'})
final_url = "http://www.locationary.com/access/proxy.jsp?ACTION_TOKEN=proxy_jsp$JspView$SaveAction&inPlaceID=%s&xxx_c_1_f_987=%s" % (
pid, page_escaped)
return final_url
def main():
pool = eventlet.GreenPool()
listings_document = BeautifulSoup(download(START_URL))
listings = listings_document.findAll("a", href = LOCATION_LISTING)
listings = [listing['href'] for listing in listings]
for final_url in pool.imap(handle_listing, listings):
print final_url
"""
if str(final_url) is not None:
url = str(final_url)
req = urllib2.Request(url)
response = urllib2.urlopen(req)
page = response.read()
time.sleep(2)
"""
for a in range(0,1):
START_URL = 'http://www.locationary.com/place/en/US/Arkansas/Fayetteville-page2/?ACTION_TOKEN=NumericAction'
TITLE_MATCH = re.compile(r'(.*) \(\d{1,10}.{1,100}\)$')
ADDRESS_MATCH = re.compile(r'.{1,100}\((.*), .{4,14}, United States\)$')
LOCATION_LISTING = re.compile(r'http://www\.locationary\.com/place/en/US/.{1,50}/.{1,50}/.{1,100}\.jsp')
if __name__ == '__main__':
main()

A very common mistake made by novices using any language that supports exceptions is that they catch exceptions that they do not actually handle. This leads to hard-to-debug errors since it disrupts the normal flow of the program.
Specifically, catching urllib2.HTTPError in download() is preventing actual problems from being propagated to the rest of the program. Either remove the exception handler altogether, or raise at the end of the handler to maintain flow.

Related

My googlesearch is not working in multiprocess pool, it gives a MaybeEncodingError

Currently I'm trying to use multiple processing to search up book titles find their first 30 summary links with googlesearch on python
from googlesearch import search
from bs4 import BeautifulSoup
import os
from multiprocessing import Pool
import json
import requests
from urllib.parse import urlparse
SAVE_PDF_PATH = "../books_pdf"
SAVE_SUM_PATH = "../summary_txt"
def perform_search(title):
"""Perform search for the title summaries and save it's texts into files
return quadruples of (url, domain, starting depth = 0 , maxdepth) used for crawl"""
dir_path =f"{SAVE_SUM_PATH}/{title}"
#print(f"dir_path: {dir_path}")
try:
os.mkdir(dir_path) #create a directory if needed
except:
pass
query = title + " summary"
print(f"query: {query}")
results = list()
for url in search(query, num = 30, stop = 30, verify_ssl = False):
results.append(url)
return results
def main():
titles = list()
for title in os.listdir(SAVE_PDF_PATH):
titles.append(title[:-4])
search_stuff = titles[0:2]
print(f"search_stuff: {search_stuff}")
p = Pool()
results = p.map(perform_search, titles[0:2])
p.close()
p.join()
# p2 = Pool()
# results = p2.map(crawl, quads)
# p2.close()
# p2.join()
print(f"results: {results}")
if __name__ == "__main__":
main()
The thing is it worked at one point then stopped working, if I comment out the search then the multiprocessing works again.
Otherwise it gives me this error:
File "/Users/yao/Desktop/dkp/School/projects/Stuff_with_Books/summary_scraper/sum_scrape.py", line 73, in <module>
main()
File "/Users/yao/Desktop/dkp/School/projects/Stuff_with_Books/summary_scraper/sum_scrape.py", line 62, in main
results = p.map(perform_search, titles[0:2])
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 771, in get
raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x7fa4750c4490>'. Reason: 'TypeError("cannot pickle '_io.BufferedReader' object")'

how do I infinitely loop this function without an error?

I am trying to loop this function over here to retrieve vaccine alerts via WhatsApp so it constantly monitors the server when I start the script. I tried using the while True: command, but this error kept occuring - how cld i fix this?
Traceback (most recent call last):
File "/Users/ragz/cowin.py", line 70, in <module>
vaccine_check()
File "/Users/ragz/cowin.py", line 35, in vaccine_check
json_output = json.dumps(available_centers, indent=4)
File "/usr/local/Cellar/python#3.9/3.9.4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py", line 234, in dumps
return cls(
File "/usr/local/Cellar/python#3.9/3.9.4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 201, in encode
chunks = list(chunks)
File "/usr/local/Cellar/python#3.9/3.9.4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/usr/local/Cellar/python#3.9/3.9.4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type HTTPError is not JSON serializable
If anyone needs it, heres the code -
from cowin_api import CoWinAPI
import json
import datetime
import numpy as np
import os
from twilio.rest import Client
import selenium
from selenium import webdriver
import time
import io
import requests
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.webdriver.common.keys import Keys
from threading import Thread
state_id = '21'
district_id = '395'
min_age_limit = 18
time = datetime.datetime.now()
cowin = CoWinAPI()
# here im getting the centers and the vaccines
def vaccine_check():
try:
available_centers = cowin.get_availability_by_district(district_id)
#outputing it to a json file and bringing it back
json_output = json.dumps(available_centers, indent=4)
f = open(f'tests/vaccinecheck[{time.strftime("%b %d %Y %H|%M")}].json', 'w')
f.write(json_output)
f.close()
with open(f.name) as file:
data = json.load(file)
n = np.arange(100)
for x in np.nditer(n):
if data["centers"][x]["sessions"][0]["min_age_limit"] == 45:
print('')
else:
print(f'[{time.strftime("%b %d %Y %H:%M")}]', data["centers"][x]["name"], '-- vaccines:', data["centers"][x]["sessions"][0]['available_capacity'], '-- age-limit:', data["centers"][x]["sessions"][0]["min_age_limit"])
if data["centers"][x]["sessions"][0]["available_capacity"] >= 1:
twilio_send()
except IndexError: # catch the error
pass # pass will basically ignore it
def twilio_send():
client = Client()
from_whatsapp_number='whatsapp:twilio api demo num'
to_whatsapp_number='whatsapp:my phone num'
client.messages.create(body='vaccine available - book now!',
from_=from_whatsapp_number,
to=to_whatsapp_number)
while True:
vaccine_check()

WebDriverException Error

I'm running a script to collect information from various pages on a website.
#python2
from __future__ import division
from bs4 import BeautifulSoup
from pyvirtualdisplay import Display
from BeautifulSoup import SoupStrainer
import pandas as pd
import urllib,re,csv,os,urllib2,requests,itertools,pdfkit,time
import smtplib
import math
from selenium import webdriver
import requests.packages.urllib3
import requests
requests.packages.urllib3.disable_warnings()
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import WebDriverException
from datetime import datetime
os.environ["LANG"] = "en_US.UTF-8"
start_time = time.time()
os.chdir('DIRECTORY')
#import .csv with variables for fulls list
fulls = zip(orgs, terms, sites, smo_ids, year_i, year_start, year_end)
orgs2 = []
terms2 = []
sites2 = []
results2 = []
smo_ids2 = []
article_number = []
years2 = []
numbers = range(2000001)
numbers = numbers[0::200]
start_time = time.time()
display = Display(visible=0, size=(1600, 1200))
display.start()
otime = datetime.now()
startpoint = 1
for full in fulls:
site = full[2]
org = full[0]
smo_id = full[3]
term = full[1]
year = full[4]
driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
try:
driver.get(site) #get original site info
except (WebDriverException, TimeoutException) as e:
print(e.Message)
print "REFRESHING PAGE"
driver.refresh(site)
source = driver.page_source
soup = BeautifulSoup(source, "html.parser")
soup2 = soup.encode("utf-8")
try:
resultno = re.findall('<h1 id="pqResultsCount">\n(.*?) result',soup2)
resultno = ''.join(resultno)
resultno = resultno.translate(None, "(){}<>,")
resultno = int(resultno)
except ValueError, e:
resultno = int(0)
no_pages = int(math.ceil(resultno/20))
an = re.findall('{"(.*?)markedlistcheckbox:markallitems',soup2)
an = ''.join(an)
an = re.findall('markAll":false,"formats":{(.*?)},"markURL"',an)
an = ''.join(an)
an = re.sub(r'":.+?,"', '', an)
an = an.translate(None, '"')
an = an.split(':', 1)[0]
an = an.split('MSTAR_')
an.pop(0)
for i in an:
article_number.append(i)
years2.append(year)
sites2.append(site)
orgs2.append(org)
smo_ids2.append(smo_id)
terms2.append(term)
#begin encryption search
encrypt = re.findall('id="searchForm"(.*?)/></div>',soup2)
encrypt = ''.join(encrypt)
encrypt
t_ac = re.findall('name="t:ac" type="hidden" value="(.*?)/',encrypt)
t_ac = ''.join(t_ac)
t_ac
t_formdata = re.findall('name="t:formdata" type="hidden" value="(.*?)"',encrypt)
t_formdata = ''.join(t_formdata)
t_formdata
#start page 2 stuff
for page in range(2,no_pages+1):
site_ = "https://WEBSITE.com/results:gotopage/" + str(page) + "?t:ac=" + t_ac + "/?t:formdata=" + t_formdata + ""
driver.get(site_) #get subsequent page info
source = driver.page_source # Here is your populated data for the page source
soup_ = BeautifulSoup(source, "html.parser")
soup2_ = soup_.encode("utf-8")
an_ = re.findall('{"(.*?)markedlistcheckbox:markallitems',soup2_)
an_ = ''.join(an_)
an_ = re.findall('markAll":false,"formats":{(.*?)},"markURL"',an_)
an_ = ''.join(an_)
an_ = re.sub(r'":.+?,"', '', an_)
an_ = an_.translate(None, '"')
an_ = an_.split(':', 1)[0]
an_ = an_.split('MSTAR_')
an_.pop(0)
for i_ in an_:
article_number.append(i_)
years2.append(year)
sites2.append(site)
orgs2.append(org)
smo_ids2.append(smo_id)
terms2.append(term)
driver.quit()
elapsed_time = time.time() - start_time
try:
ctime_1 = ctime
except:
ctime_1 = otime
m, s = divmod(elapsed_time, 60)
h, m = divmod(m, 60)
ctime = datetime.now()
diftime = ctime - ctime_1
diftime = str(diftime)
diftime = diftime[2:7]
ctime2 = str(ctime)
ctime2 = ctime2[11:19]
print "%d:%02d:%02d | %s | %s" % (h, m, s, ctime2, diftime)
print "%d: Page %d is complete" % (startpoint, startpoint)
if startpoint in numbers:
print "Sleeping for 10 seconds"
time.sleep(10)
startpoint += 1
article_info = zip(article_number, years2, sites2, orgs2, smo_ids2, terms2)
The code runs, but at various points (sometimes 20 mins into the run, sometimes 14 hours into it), I get the following error:
Traceback (most recent call last):
File "<stdin>", line 131, in <module>
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/chrome/webdriver.py", line 69, in __init__
desired_capabilities=desired_capabilities)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 151, in __init__
self.start_session(desired_capabilities, browser_profile)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 240, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 308, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: session not created exception
from timeout: Timed out receiving message from renderer: 600.000
(Session info: chrome=64.0.3282.186)
(Driver info: chromedriver=2.35.528139 (47ead77cb35ad2a9a83248b292151462a66cd881),platform=Linux 4.13.0-36-generic x86_64)
I'm using current chrome and chromedriver, and I have tried this using selenium versions 3.9, 3.8, and 3.7. No matter what, I eventually get the above error.
Any ideas how to fix this error?

Python - unknown url type error

from tkinter import *
import tkinter as tk
import pyodbc
import urllib.request
from bs4 import BeautifulSoup,Comment
link =""
def scraper(urls):
with urllib.request.urlopen(urls) as url:
content = url.read()
soup = BeautifulSoup(content, "html.parser")
rows =soup.find_all('div',attrs={"class" : "reviewText"})
for row in soup.find_all('div',attrs={"class" : "reviewText"}):
print(row.text)
root1 = tk.Tk()
label1 = tk.Label(root1, text='product A')
input1 = StringVar()
entry1 = tk.Entry(root1,textvariable=input1)
label1.pack(side = tk.TOP)
entry1.pack()
buttonstr = tk.StringVar()
db = r"C:\Users\Goutham\Documents\keshav\testdb.accdb"
print("connecting db..")
def odbc():
'''
`enter code here`connects with odbc
'''
global link
constr = 'Driver={Microsoft Access Driver (*.mdb, *.accdb)};Dbq=' + db
conn = pyodbc.connect(constr, autocommit=True)
cur = conn.cursor()
check=input1.get()
print("fetching from access.....")
strsql = "select Url from student where PdtName='%s' " % (check,)
cur.execute(strsql)
results = cur.fetchall()
link=check
print (results,check)
conn.close()
buttonA = tk.Button(text = "hello", command = odbc)
buttonA.pack()
scraper(link)
I need this code to get input,store it in the variable -'check' and compare it with the values in the database using a SQL query.The matching values from the database are used to retrieve the URL from the database. The URL is passed as a parameter to the function scraper() which prints the extracted text.
The following error is displayed:
Traceback (most recent call last):
File "C:\Python33\module1.py", line 62, in <module>
scraper(link)
File "C:\Python33\module1.py", line 13, in scraper
with urllib.request.urlopen(urls) as url:
File "C:\Python33\lib\urllib\request.py", line 156, in urlopen
return opener.open(url, data, timeout)
File "C:\Python33\lib\urllib\request.py", line 454, in open
req = Request(fullurl, data)
File "C:\Python33\lib\urllib\request.py", line 275, in __init__
self._parse()
File "C:\Python33\lib\urllib\request.py", line 280, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Please help.
Thank you.
You are calling scraper(link) at the end of your script, and in that moment link is the empty string. That's why you get ValueError: unknown url type: ''.
Remove that statement and perform a validation of the URL format in your odbc callback function.

Why do I keep getting this title match error with my Python program?

When I run the following code, I keep getting this error:
Traceback (most recent call last):
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 94, in <module>
main()
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 71, in main
for final_url in pool.imap(handle_listing, listings):
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenpool.py", line 232, in next
val = self.waiters.get().wait()
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 166, in wait
return self._exit_event.wait()
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\event.py", line 120, in wait
current.throw(*self._exc)
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 192, in main
result = function(*args, **kwargs)
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 35, in handle_listing
title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: 'NoneType' object has no attribute 'groups'
What is wrong?
It has something to do with the Title match but I don't know how to fix it!
If you could help me I would really appreciate it!
Thanks!
from gzip import GzipFile
from cStringIO import StringIO
import re
import webbrowser
import time
from difflib import SequenceMatcher
import os
import sys
from BeautifulSoup import BeautifulSoup
import eventlet
from eventlet.green import urllib2
import urllib2
import urllib
def download(url):
print "Downloading:", url
s = urllib2.urlopen(url).read()
if s[:2] == '\x1f\x8b':
ifh = GzipFile(mode='rb', fileobj=StringIO(s))
s = ifh.read()
print "Downloaded: ", url
return s
def replace_chars(text, replacements):
return ''.join(replacements.get(x,x) for x in text)
def handle_listing(listing_url):
listing_document = BeautifulSoup(download(listing_url))
# ignore pages that link to yellowpages
if not listing_document.find("a", href=re.compile(re.escape("http://www.yellowpages.com/") + ".*")):
listing_title = listing_document.title.text
reps = {' ':'-', ',':'', '\'':'', '[':'', ']':''}
title, = TITLE_MATCH.match(listing_title).groups()
address, = ADDRESS_MATCH.match(listing_title).groups()
yellow_page_url = "http://www.yellowpages.com/%s/%s?order=distance" % (
replace_chars(address, reps),
replace_chars(title, reps),
)
yellow_page = BeautifulSoup(download(yellow_page_url))
page_url = yellow_page.find("h3", {"class" : "business-name fn org"})
if page_url:
page_url = page_url.a["href"]
business_name = title[:title.index(",")]
page = BeautifulSoup(download(page_url))
yellow_page_address = page.find("span", {"class" : "street-address"})
if yellow_page_address:
if SequenceMatcher(None, address, yellow_page_address.text).ratio() >= 0.5:
pid, = re.search(r'p(\d{5,20})\.jsp', listing_url).groups(0)
page_escaped = replace_chars(page_url, {':':'%3A', '/':'%2F', '?':'%3F', '=':'%3D'})
final_url = "http://www.locationary.com/access/proxy.jsp?ACTION_TOKEN=proxy_jsp$JspView$SaveAction&inPlaceID=%s&xxx_c_1_f_987=%s" % (
pid, page_escaped)
return final_url
def main():
pool = eventlet.GreenPool()
listings_document = BeautifulSoup(download(START_URL))
listings = listings_document.findAll("a", href = LOCATION_LISTING)
listings = [listing['href'] for listing in listings]
for final_url in pool.imap(handle_listing, listings):
print final_url
if str(final_url) is not None:
url = str(final_url)
req = urllib2.Request(url)
response = urllib2.urlopen(req)
page = response.read()
time.sleep(2)
for a in range(2,3):
START_URL = 'http://www.locationary.com/place/en/US/New_Jersey/Randolph-page' + str(a) + '/?ACTION_TOKEN=NumericAction'
TITLE_MATCH = re.compile(r'(.*) \(\d{1,10}.{1,100}\)$')
ADDRESS_MATCH = re.compile(r'.{1,100}\((.*), .{4,14}, United States\)$')
LOCATION_LISTING = re.compile(r'http://www\.locationary\.com/place/en/US/.{1,50}/.{1,50}/.{1,100}\.jsp')
if __name__ == '__main__':
main()
Quoting from your error:
title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: 'NoneType' object has no attribute 'groups'
TITLE_MATCH.match(listing_title) returns None, so you can't call .groups().
When a re .match does not find anything to match, it returns None. Since you cannot call .groups() on None, you have to check for a match first. To do that:
Change this:
title, = TITLE_MATCH.match(listing_title).groups()
address, = ADDRESS_MATCH.match(listing_title).groups()
To this:
titleMatch = TITLE_MATCH.match(listing_title)
if titleMatch:
title, = titleMatch.groups()
else:
# handle it
addressMatch = ADDRESS_MATCH.match(listing_title)
if addressMatch:
address, = addressMatch.groups()
else:
# handle it

Categories