Building SOAP request with suds and Python - XML not being generated properly - python

Here is the error:
No handlers could be found for logger "suds.client"
Traceback (most recent call last):
File "bling.py", line 134, in <module>
get_data("146.215.236.10", list)
File "bling.py", line 48, in get_data
retval = client.service.SelectCmDevice("", stuff)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 542, in __call__
return client.invoke(args, kwargs)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 602, in invoke
result = self.send(soapenv)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 649, in send
result = self.failed(binding, e)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 702, in failed
r, p = binding.get_fault(reply)
File "/usr/lib/python2.6/site-packages/suds/bindings/binding.py", line 265, in get_fault
raise WebFault(p, faultroot)
suds.WebFault: Server raised fault: 'org.xml.sax.SAXParseException: Element type "SelectItem" must be followed by either attribute specifications, ">" or "/>".'
Here is the code:
def get_data(ip, list):
"""
Connect to soap webservice
Append device name and status
To data list
"""
stuff = {}
stuff['SelectBy'] = 'Name'
count = 0
for i in list:
if "SelectItems" not in stuff:
stuff['SelectItems'] = {}
if 'SelectItem[' + str(count) + ']' not in stuff['SelectItems']:
stuff['SelectItems']['SelectItem[' + str(count) + ']'] = {}
stuff['SelectItems']['SelectItem[' + str(count) + ']']['Item'] = i
count = count + 1
t = HttpAuthenticated(**credentials)
uri = 'https://' + ip + ':8443/realtimeservice/services/RisPort?wsdl'
imp = Import('http://schemas.xmlsoap.org/soap/encoding/')
doctor = ImportDoctor(imp)
client = Client(url=uri, transport=t, doctor=doctor)
retval = client.service.SelectCmDevice("", stuff)
pprint(retval)
sys.exit(0)
for node in retval['SelectCmDeviceResult'].CmNodes:
for dev in node.CmDevices:
name = dev.Name
status = dev.Status
data.append([name,status])
I am trying to call the Cisco Risport API with Python however even when emulating how it is done in PHP I get an error and I am really trying not to port over to PHP at this point in the project.
Here is how PHP passes the data:
foreach ($devices_chunks as $chunk){
echo '.';
//Prepare RisPort request
$array["SelectBy"] = "Name";
$array["Status"] = "Registered";
//$array["Class"] = "Phone";
//$array["MaxReturnedDevices"] = "1000";
$i = 1;
foreach($chunk as $device){
$array["SelectItems"]["SelectItem[$i]"]["Item"] = $device->name;
$i++;
}
// Run RisPost Query + wait a bit as max requests is 15 per min.
$response = $soap_ris->SelectCmDevice("",$array);
I think the problem is with how suds is transposing the dict of dict to XML, any thoughts on how to fix this outside of manually building SOAP XML request?
thanks!

Related

Spotify api | my python script won't run when my Spotify app is turned off

I would like to make a script in python that retrieves what I'm listening to in real time on Spotify and allow me to send the information to an Arduino,
But the problem is that my script refuses to run when my Spotify is turned off, knowing that I have other information that passes between my PC and my Arduino and if the script does not run most of the functions I created on my Arduino does not work anymore
So I would like my script to work also when I don't listen to music
Error : `
Traceback (most recent call last):
File "C:\Users\horvik\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\models.py", line 971, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Users\horvik\AppData\Local\Programs\Python\Python310\lib\json\__init__.py", line 346, in loads
return _default_decoder.decode(s)
File "C:\Users\horvik\AppData\Local\Programs\Python\Python310\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\horvik\AppData\Local\Programs\Python\Python310\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\horvik\Desktop\project.py", line 62, in <module>
serialDataToEncode = finishcpu + finishmem + get_current_track(ACCESS_TOKEN)
File "C:\Users\horvik\Desktop\Deskcompanionv1.py", line 25, in get_current_track
json_resp = response.json()
File "C:\Users\horvik\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\models.py", line 975, in json
raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
`
I tried to use other entries from the spotify api like 'is_active' or 'is_playing' by adding them in an if/else hoping that I could not go through "response.json()" but I understood that I had to go through this one
there's probably something I'm missing that's why I'm making this post
if someone can help me.
code :
import psutil
import serial
import requests
import time
from pprint import pprint
SPOTIFY_GET_CURRENT_TRACK_URL = 'https://api.spotify.com/v1/me/player/currently-playing'
ACCESS_TOKEN = 'i use token'
serial = serial.Serial()
serial.baudrate = 9600
serial.port = "COM5"
serial.open()
# spotify section
def get_current_track(access_token):
response = requests.get(
SPOTIFY_GET_CURRENT_TRACK_URL,
headers={
"Authorization": f"Bearer {access_token}"
}
)
json_resp = response.json()
if json_resp['is_playing'] == True :
track_name = json_resp['item']['name']
artists = [artist for artist in json_resp['item']['artists']]
artist_names = ', '.join([artist['name'] for artist in artists])
spotifytracksplaying = track_name + " By " + artist_names
else :
spotifytracksplaying = "you are not lisenting music"
return spotifytracksplaying
while(1):
# cpu and ram usage section
cpu = psutil.cpu_percent(interval=1.2)
mem = psutil.virtual_memory().percent
if cpu < 10:
finishcpu = " " + str(cpu)
elif cpu < 100:
finishcpu = " " + str(cpu)
else:
finishcpu = str(cpu)
if mem < 10:
finishmem = " " + str(mem)
elif mem < 100:
finishmem = " " + str(mem)
else:
finishmem = str(mem)
# sending information
serialDataToEncode = finishcpu + finishmem + get_current_track(ACCESS_TOKEN)
serialDatatosend = serialDataToEncode.encode("UTF-8")
serial.write(serialDatatosend)
# Debugging
#print(serialDatatosend)
##print(get_current_track(ACCESS_TOKEN))
serial.close()
You can catch JSONDecodeError if resource by url dont response correct json.
from json.decoder import JSONDecodeError
try:
json_resp = response.json()
except JSONDecodeError:
# Not json. No data. Return err.
Thank you very much for your quick response, it works!
Here is my code now :
def get_current_track(access_token):
response = requests.get(
SPOTIFY_GET_CURRENT_TRACK_URL,
headers={
"Authorization": f"Bearer {access_token}"
}
)
try:
json_resp = response.json()
track_id = json_resp['item']['id']
track_name = json_resp['item']['name']
artists = [artist for artist in json_resp['item']['artists']]
link = json_resp['item']['external_urls']['spotify']
artist_names = ', '.join([artist['name'] for artist in artists])
spotifytracksplaying = track_name + " By " + artist_names
except:
spotifytracksplaying = "No music played"
return spotifytracksplaying

HTTP Error 404: Forbidden Error while scraping Facebook group post using graph api

So I was working on my academic project that includes scraping Facebook public group posts using python, I have done this more than a couple of times and it worked smoothly. But this time its giving me error:
HTTP ERROR 404 : Forbidden
I tried scraping Facebook pages and it gave me results smoothly that means the issue is not with Access Token.
I am using the same script that was run successfully previously, tried changing the user access token, app access token, type of request (GET POST) but no success. I am attaching the script that i used (found somewhere at github) also attaching the error logs
import urllib.request as ur
import json
import datetime
import csv
import time
group_id = input("Please Paste Public Group ID:")
#access_token = app_id + "|" + app_secret
access_token = input("Please Paste Your Access Token:")
def request_until_succeed(url):
req = ur.Request(url)
success = False
while success is False:
try:
response = ur.urlopen(req)
if response.getcode() == 200:
success = True
except Exception as e:
print (e)
time.sleep(5)
print ("Error for URL %s: %s" % (url, datetime.datetime.now()))
print ("Retrying.")
return response.read()
# Needed to write tricky unicode correctly to csv
def unicode_normalize(text):
return text.translate({ 0x2018:0x27, 0x2019:0x27, 0x201C:0x22, 0x201D:0x22,
0xa0:0x20 }).encode('utf-8')
def getFacebookPageFeedData(group_id, access_token, num_statuses):
# Construct the URL string; see
# http://stackoverflow.com/a/37239851 for Reactions parameters
base = "https://graph.facebook.com/v2.6"
node = "/%s/feed" % group_id
fields = "/?fields=message,link,permalink_url,created_time,type,name,id," + \
"comments.limit(0).summary(true),shares,reactions." + \
"limit(0).summary(true),from"
parameters = "&limit=%s&access_token=%s" % (num_statuses, access_token)
url = base + node + fields + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def getReactionsForStatus(status_id, access_token):
# See http://stackoverflow.com/a/37239851 for Reactions parameters
# Reactions are only accessable at a single-post endpoint
base = "https://graph.facebook.com/v2.6"
node = "/%s" % status_id
reactions = "/?fields=" \
"reactions.type(LIKE).limit(0).summary(total_count).as(like)" \
",reactions.type(LOVE).limit(0).summary(total_count).as(love)" \
",reactions.type(WOW).limit(0).summary(total_count).as(wow)" \
",reactions.type(HAHA).limit(0).summary(total_count).as(haha)" \
",reactions.type(SAD).limit(0).summary(total_count).as(sad)" \
",reactions.type(ANGRY).limit(0).summary(total_count).as(angry)"
parameters = "&access_token=%s" % access_token
url = base + node + reactions + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def processFacebookPageFeedStatus(status, access_token):
# The status is now a Python dictionary, so for top-level items,
# we can simply call the key.
# Additionally, some items may not always exist,
# so must check for existence first
status_id = status['id']
status_message = '' if 'message' not in status.keys() else \
unicode_normalize(status['message'])
link_name = '' if 'name' not in status.keys() else \
unicode_normalize(status['name'])
status_type = status['type']
status_link = '' if 'link' not in status.keys() else \
unicode_normalize(status['link'])
status_permalink_url = '' if 'permalink_url' not in status.keys() else \
unicode_normalize(status['permalink_url'])
status_author = unicode_normalize(status['from']['name'])
# Time needs special care since a) it's in UTC and
# b) it's not easy to use in statistical programs.
status_published = datetime.datetime.strptime(\
status['created_time'],'%Y-%m-%dT%H:%M:%S+0000')
status_published = status_published + datetime.timedelta(hours=-5) # EST
# best time format for spreadsheet programs:
status_published = status_published.strftime('%Y-%m-%d %H:%M:%S')
# Nested items require chaining dictionary keys.
num_reactions = 0 if 'reactions' not in status else \
status['reactions']['summary']['total_count']
num_comments = 0 if 'comments' not in status else \
status['comments']['summary']['total_count']
num_shares = 0 if 'shares' not in status else \
status['shares']['count']
# Counts of each reaction separately; good for sentiment
# Only check for reactions if past date of implementation:
# http://newsroom.fb.com/news/2016/02/reactions-now-available-globally/
reactions = getReactionsForStatus(status_id, access_token) \
if status_published > '2016-02-24 00:00:00' else {}
num_likes = 0 if 'like' not in reactions else \
reactions['like']['summary']['total_count']
# Special case: Set number of Likes to Number of reactions for pre-reaction
# statuses
num_likes = num_reactions if status_published < '2016-02-24 00:00:00' else \
num_likes
def get_num_total_reactions(reaction_type, reactions):
if reaction_type not in reactions:
return 0
else:
return reactions[reaction_type]['summary']['total_count']
num_loves = get_num_total_reactions('love', reactions)
num_wows = get_num_total_reactions('wow', reactions)
num_hahas = get_num_total_reactions('haha', reactions)
num_sads = get_num_total_reactions('sad', reactions)
num_angrys = get_num_total_reactions('angry', reactions)
# return a tuple of all processed data
return (status_id, status_message, status_author, link_name, status_type,
status_link, status_permalink_url, status_published, num_reactions, num_comments,
num_shares, num_likes, num_loves, num_wows, num_hahas, num_sads,
num_angrys)
def scrapeFacebookPageFeedStatus(group_id, access_token):
with open('%s_facebook_statuses.csv' % group_id, 'w') as file:
w = csv.writer(file)
w.writerow(["status_id", "status_message", "status_author",
"link_name", "status_type", "status_link","permalink_url",
"status_published", "num_reactions", "num_comments",
"num_shares", "num_likes", "num_loves", "num_wows",
"num_hahas", "num_sads", "num_angrys"])
has_next_page = True
num_processed = 0 # keep a count on how many we've processed
scrape_starttime = datetime.datetime.now()
print ("Scraping %s Facebook Page: %s\n" % \
(group_id, scrape_starttime))
statuses = getFacebookPageFeedData(group_id, access_token, 100)
while has_next_page:
for status in statuses['data']:
# Ensure it is a status with the expected metadata
if 'reactions' in status:
w.writerow(processFacebookPageFeedStatus(status, \
access_token))
# output progress occasionally to make sure code is not
# stalling
num_processed += 1
if num_processed % 100 == 0:
print ("%s Statuses Processed: %s" % (num_processed,
datetime.datetime.now()))
# if there is no next page, we're done.
if 'paging' in statuses.keys():
statuses = json.loads(request_until_succeed(\
statuses['paging']['next']))
else:
has_next_page = False
print ("\nDone!\n%s Statuses Processed in %s" % \
(num_processed, datetime.datetime.now() - scrape_starttime))
if __name__ == '__main__':
scrapeFacebookPageFeedStatus(group_id, access_token)
Error logs:
HTTP Error 403: Forbidden Traceback (most recent call last): File "scrape_facebook.py", line 22, in request_until_succeed
response = ur.urlopen(req) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 223, in urlopen
return opener.open(url, data, timeout) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 532, in open
response = meth(req, response) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 642, in http_response
'http', request, response, code, msg, hdrs) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 570, in error
return self._call_chain(*args) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 504, in _call_chain
result = func(*args) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 403: Forbidden
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "scrape_facebook.py", line
198, in
scrapeFacebookPageFeedStatus(group_id, access_token) File "scrape_facebook.py", line 168, in scrapeFacebookPageFeedStatus
statuses = getFacebookPageFeedData(group_id, access_token, 100) File "scrape_facebook.py", line 52, in getFacebookPageFeedData
data = json.loads(request_until_succeed(url)) File "scrape_facebook.py", line 27, in request_until_succeed
time.sleep(5) KeyboardInterrupt

Python RoboBrowser SSL Error : bad handshake: SysCallError(104, 'ECONNRESET')

I am trying to fill out a form on a web page and get some of the results back using the RoboBrowser library.
I have a file with ~200k references that may not give the adaquate anwser (the street name stored in data[1] may be different from the one required in the form and cause another page to open).
My code runs and either prints the information I wanted to get or prints "NS" if the response is not what is expected (mainly due to the street name being wrong).
However, after a random time (at first after about 1300 cycles, then about 100-300 or less), I get :
Traceback (most recent call last):
File "web_scraper.py", line 49, in <module>
result = rechCadastre(data_point,result)
File "web_scraper.py", line 16, in rechCadastre
browser.submit_form(form)
File "/usr/local/lib/python2.7/dist-packages/robobrowser/browser.py", line 343, in submit_form
response = self.session.request(method, url, **send_args)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 497, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: ("bad handshake: SysCallError(104, 'ECONNRESET')",)
I tried adding time.sleep(0.01) here and there, thinking I my be overloading the page, but it didn't help.
Does anyone have an idea ?
My code :
import csv
import re
from robobrowser import RoboBrowser
import time
def rechCadastre(data,result):
time.sleep(0.01)
form=browser.get_form(id="rech")
repetition = ''
if data[2]!='':
repetition = data[2][0]
param={'numeroVoie':data[1],'nomVoie':data[0],'ville':data[3], 'indiceRepetition':repetition}
for x in iter(param):
form[x]=param[x]
time.sleep(0.01)
browser.submit_form(form)
success = browser.select('#onglet')
if not success:
result += "NS,NS"
else:
answer = browser.select('.nomcol')[0]
parcelle= re.split('(\W+)',re.search("Parcelle n\W [0-9]{1,4}",answer.text).group(0))[4]
feuille = re.split('(\W+)',re.search("Feuille [0-9]{1,4} [A-Z]{1,4}",answer.text).group(0))[4]
result += feuille+","+parcelle
browser.back()
return result
data = []
url = "https://www.cadastre.gouv.fr/scpc/accueil.do"
browser = RoboBrowser()
browser.open(url)
infile = open("treated.csv",'rb')
reader=csv.reader(infile)
for row in reader:
data.append(row)
#compt=0
for data_point in data:
# if compt == 20:
# break
# data_point = data[i]
result = data_point[0] + "," + data_point[1] + "," + data_point[2] + "," + data_point[3] + ",,"
nd = data_point[0] == "#N/D"
rep = (data_point[2] == '') or (data_point[2] == 'BIS') or (data_point[2] == 'TER') or (data_point[2] == 'QUATER') and (data_point[2] == 'B')
acceptable = rep and (not nd)
if acceptable:
result = rechCadastre(data_point,result)
print result
# compt += 1
I am using Ubuntu 16.04.2 LTS and Python2.7
Thank you !
I managed to avoid the error by changing the waiting time time.sleep to 1 for each call.
The programme sometimes gets the error again, but really rarely (about once in 20000-30000 requests).
Thank you for your support !

GRPC: remote error and missing parameters from message

I am running a simple python server application with grpc. This is the server code:
class Classifier(grpc_cl.BetaClassifierServicer):
def __init__(self):
default_config = self.getDefaultConfig()
self.engine_config = default_config["engine"]
self.port = default_config["daemon"]["port"]
# self.engine = loadLSTM3Model(self.engine_config)
def getDefaultConfig(self):
with open("service.properties.yaml", "r") as stream:
default_config = yaml.load(stream)
return default_config
def Analyze(self, request, context):
file_name = request.sentences_file
print "This is the file to analyze ", file_name
error = grpc_cl.Error(error_code = 0, error_message = "OK")
return grpc_cl.CategoryReply(error)
The client:
channel = implementations.insecure_channel('localhost', 50051)
stub = classifier_grpc.beta_create_Classifier_stub(channel)
reply = stub.Analyze(classifier_grpc.CategoryRequest(user_context=1, sentences_file="file"), 10000)
print 'Answer', reply.error.error_message
And the .proto file with the messages:
syntax = "proto3";
service Classifier{
rpc Analyze(CategoryRequest) returns (CategoryReply){}
rpc Train(TrainRequest) returns (CategoryReply){}
}
message CategoryRequest{
int32 user_context = 1;
string sentences_file = 2;
}
message CategoryReply{
Error error = 1;
string categories_file = 2;
}
message Error{
int32 error_code = 1;
string error_message = 2;
}
Launching the server and the client, and connecting both of them to the respective port, gives me this error:
Traceback (most recent call last):
File "/home/~/service/client.py", line 19, in <module>
reply = stub.Analyze(classifier_grpc.CategoryRequest(user_context=1, sentences_file="file"), 10000)
File "/usr/local/lib/python2.7/dist-packages/grpc/framework/crust/implementations.py", line 73, in __call__
protocol_options, metadata, request)
File "/usr/local/lib/python2.7/dist-packages/grpc/framework/crust/_calls.py", line 109, in blocking_unary_unary
return next(rendezvous)
File "/usr/local/lib/python2.7/dist-packages/grpc/framework/crust/_control.py", line 412, in next
raise self._termination.abortion_error
grpc.framework.interfaces.face.face.RemoteError
Does somebody now why this happens? Also, I could extract the user_context from the CategoryRequest, but not the sentences_file string, that one is blank.
grpc.framework.interfaces.face.face.RemoteError indicates that an exception occurred on the server while processing the request.
In your case, protobuf parameters need to be specified by keyword, ie
return grpc_cl.CategoryReply(error)
should be
return grpc_cl.CategoryReply(error=error)

ImportError on RQ Worker/Redis/Flask

I am having issues with this setup. In summary, once the user presses submit on a form then the data is passed to an RQWorker and Redis to process.
The error from rqworker is
23:56:44 RQ worker u'rq:worker:HAFun.12371' started, version 0.5.6
23:56:44
23:56:44 *** Listening on default...
23:56:57 default: min_content.process_feed.process_checks(u'http://www.feedurl.com/url.xml', u'PM', u'alphanumeric', u'domain#domain.com') (9e736730-e97f-4ee5-b48d-448d5493dd6c)
23:56:57 ImportError: No module named min_content.process_feed
Traceback (most recent call last):
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/worker.py", line 568, in perform_job
rv = job.perform()
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 495, in perform
self._result = self.func(*self.args, **self.kwargs)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 206, in func
return import_attribute(self.func_name)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/utils.py", line 150, in import_attribute
module = importlib.import_module(module_name)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: No module named min_content.process_feed
Traceback (most recent call last):
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/worker.py", line 568, in perform_job
rv = job.perform()
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 495, in perform
self._result = self.func(*self.args, **self.kwargs)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 206, in func
return import_attribute(self.func_name)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/utils.py", line 150, in import_attribute
module = importlib.import_module(module_name)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: No module named min_content.process_feed
23:56:57 Moving job to u'failed' queue
I have tried starting rqworker in a variety of ways
rqworker --url redis://localhost:6379
rqworker
views.py
from min_content import app
from flask import render_template
from .forms import SubmissionForm
from flask import request
from .process_feed import process_checks #this is the function that does the checks
from redis import Redis
from rq import Queue
def process():
feedUrl = request.form['feedUrl']
source = request.form['pmsc']
ourAssignedId = request.form['assignedId']
email_address = request.form['email_address']
conn = redis.StrictRedis('localhost', 6379, 0)
q = Queue(connection=conn)
result = q.enqueue(process_checks, feedUrl,source,ourAssignedId, email_address)
return 'It\'s running and we\'ll send you an email when its done<br /><br />Do another one'
process_feed has a function called process_checks which works as expected.
I know this is working because using the below line, instead of RQ, works fine.
do_it = process_checks(feedUrl,source,ourAssignedId)
The strange thing is that this all worked perfectly well before I closed my SSH connection to the VPS.
Running ps -aux returns this which indicates the redis is running
root 11894 0.1 0.4 38096 2348 ? Ssl Oct25 0:01 /usr/local/bin/redis-server *:6379
Restarting redis does nothing, nor does restarting apache2
sudo service redis_6379 start
sudo service redis_6379 stop
sudo service apache2 restart
I followed this guide exactly and like I said, this worked until I terminated the SSH connection to my VPS
I'm running in a virtual environment if that makes any difference, I am calling this within my WSGI file
min_content.wsgi
#!/usr/bin/python
activate_this = '/var/www/min_content/min_content/venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))
import sys
import logging
logging.basicConfig(stream=sys.stderr)
sys.path.insert(0,"/var/www/min_content")
from min_content import app as application
application.secret_key = 'blah blah blah
'
I have confirmed that the Redis server is running by adding this to the script
r = redis.StrictRedis('localhost', 6379, 0)
r.set(name='teststring', value='this is a test')
test_string = r.get(name='teststring')
print test_string
Running redis-cli returns 127.0.0.1:6379>
process_feed.py
import requests
import xml.etree.ElementTree as ET
import csv
def process_checks(feedUrl,source,ourAssignedId):
feed_url = feedUrl
source = source
ourAssignedId = ourAssignedId
all_the_data = []
#grab xml from URL
try:
r = requests.get(feed_url)
except Exception as e:
print "Failed to grab from " + feed_url
return "Failed to grab from " + feed_url
root = ET.fromstring(r.text)
for advertiser in root.iter('advertiser'):
assignedId = advertiser.find('assignedId').text
if assignedId==ourAssignedId:
#only process for PMs using our assignedId
for listings in advertiser.iter('listingContentIndexEntry'):
listingUrl = listings.find('listingUrl').text
print "Processing " + listingUrl
#now grab from URL
listing_request = requests.get(listingUrl)
#parse XML from URL
#listing_root = ET.xpath(listing_request.text)
if not ET.fromstring(listing_request.text.encode('utf8')):
print "Failed to load XML for" + listingUrl
continue
else:
listing_root = ET.fromstring(listing_request.text.encode('utf8'))
#'Stayz Property ID','External Reference','User Account External Reference','Provider','Address Line1','Active','Headline','Listing URL'
stayzPropertyId = '' #the property manager enters this into the spreadsheet
if not listing_root.find('.//externalId').text:
print 'No external Id in ' + listingUrl
listingExternalId = 'None'
else:
listingExternalId = listing_root.find('externalId').text
listingExternalId = '"' + listingExternalId + '"'
userAccountExternalReference = assignedId
print userAccountExternalReference
provider = source
addressLine1 = listing_root.find('.//addressLine1').text
active = listing_root.find('active').text
if not listing_root.find('.//headline/texts/text/textValue').text:
print 'No headline in ' + listingExternalId
headline = 'None'
else:
headline = listing_root.find('.//headline/texts/text/textValue').text
headline = headline.encode('utf-8')
if not listing_root.find('.//description/texts/text/textValue').text:
print 'No description in ' + listingExternalId
description = 'None'
else:
description = listing_root.find('.//description/texts/text/textValue').text
#now check the min content
#headline length
headline_length = len(headline)
headline_length_check = 'FAIL'
if headline_length<20:
headline_length_check = 'FAIL'
else:
headline_length_check = 'TRUE'
#description length
description_length_check = 'FAIL'
description_length = len(description)
if description_length<400:
description_length_check = 'FAIL'
else:
description_length_check = 'TRUE'
#number of images
num_images = 0
num_images_check = 'FAIL'
for images in listing_root.iter('image'):
num_images = num_images+1
if num_images <6:
num_images_check = 'FAIL'
else:
num_images_check = 'TRUE'
#atleast one rate
num_rates = 0
num_rates_check = 'FAIL'
for rates in listing_root.iter('rate'):
num_rates = num_rates+1
if num_rates < 1:
num_rates_check = 'FAIL'
else:
num_rates_check = 'TRUE'
#atleast one bedroom
#atleast one bathroom
#a longitude and latitude
#now add to our list of lists
data = {'stayzPropertyId':'','listingExternalId':listingExternalId,'userAccountExternalReference':userAccountExternalReference,'provider':provider,'addressLine1':addressLine1,'active':active,'headline':headline,'listingUrl':listingUrl,'Headline > 20 characters?':headline_length_check,'Description > 400 characters?':description_length_check,'Number of Images > 6?':num_images_check,'At least one rate?':num_rates_check}
#data_dict = ['',listingExternalId,userAccountExternalReference,provider,addressLine1,active,headline,listingUrl]
all_the_data.append(data)
files_location = './files/' + source + '__' + ourAssignedId + '_export.csv'
with open(files_location,'w') as csvFile:
#with open('./files/' + source + '_export.csv','a') as csvFile:
fieldnames = ['stayzPropertyId','listingExternalId','userAccountExternalReference','provider','addressLine1','active','headline','listingUrl','Headline > 20 characters?','Description > 400 characters?','Number of Images > 6?','At least one rate?']
writer = csv.DictWriter(csvFile,fieldnames=fieldnames)
writer.writeheader()
for row in all_the_data:
try:
writer.writerow(row)
except:
print "Failed to write row " + str(row)
continue
#send email via Mailgun
return requests.post(
"https://api.mailgun.net/v3/sandboxablahblablbah1.mailgun.org/messages",
auth=("api", "key-blahblahblah"),
#files=("attachment", open(files_location)),
data={"from": "Mailgun Sandbox <postmaster#.mailgun.org>",
"to": "Me <me#me.com>",
"subject": "Feed Processed for " + ourAssignedId,
"text": "Done",
"html":"<b>Process the file</b>"})

Categories