ImportError on RQ Worker/Redis/Flask - python

I am having issues with this setup. In summary, once the user presses submit on a form then the data is passed to an RQWorker and Redis to process.
The error from rqworker is
23:56:44 RQ worker u'rq:worker:HAFun.12371' started, version 0.5.6
23:56:44
23:56:44 *** Listening on default...
23:56:57 default: min_content.process_feed.process_checks(u'http://www.feedurl.com/url.xml', u'PM', u'alphanumeric', u'domain#domain.com') (9e736730-e97f-4ee5-b48d-448d5493dd6c)
23:56:57 ImportError: No module named min_content.process_feed
Traceback (most recent call last):
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/worker.py", line 568, in perform_job
rv = job.perform()
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 495, in perform
self._result = self.func(*self.args, **self.kwargs)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 206, in func
return import_attribute(self.func_name)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/utils.py", line 150, in import_attribute
module = importlib.import_module(module_name)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: No module named min_content.process_feed
Traceback (most recent call last):
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/worker.py", line 568, in perform_job
rv = job.perform()
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 495, in perform
self._result = self.func(*self.args, **self.kwargs)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/job.py", line 206, in func
return import_attribute(self.func_name)
File "/var/www/min_content/min_content/venv/local/lib/python2.7/site-packages/rq/utils.py", line 150, in import_attribute
module = importlib.import_module(module_name)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: No module named min_content.process_feed
23:56:57 Moving job to u'failed' queue
I have tried starting rqworker in a variety of ways
rqworker --url redis://localhost:6379
rqworker
views.py
from min_content import app
from flask import render_template
from .forms import SubmissionForm
from flask import request
from .process_feed import process_checks #this is the function that does the checks
from redis import Redis
from rq import Queue
def process():
feedUrl = request.form['feedUrl']
source = request.form['pmsc']
ourAssignedId = request.form['assignedId']
email_address = request.form['email_address']
conn = redis.StrictRedis('localhost', 6379, 0)
q = Queue(connection=conn)
result = q.enqueue(process_checks, feedUrl,source,ourAssignedId, email_address)
return 'It\'s running and we\'ll send you an email when its done<br /><br />Do another one'
process_feed has a function called process_checks which works as expected.
I know this is working because using the below line, instead of RQ, works fine.
do_it = process_checks(feedUrl,source,ourAssignedId)
The strange thing is that this all worked perfectly well before I closed my SSH connection to the VPS.
Running ps -aux returns this which indicates the redis is running
root 11894 0.1 0.4 38096 2348 ? Ssl Oct25 0:01 /usr/local/bin/redis-server *:6379
Restarting redis does nothing, nor does restarting apache2
sudo service redis_6379 start
sudo service redis_6379 stop
sudo service apache2 restart
I followed this guide exactly and like I said, this worked until I terminated the SSH connection to my VPS
I'm running in a virtual environment if that makes any difference, I am calling this within my WSGI file
min_content.wsgi
#!/usr/bin/python
activate_this = '/var/www/min_content/min_content/venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))
import sys
import logging
logging.basicConfig(stream=sys.stderr)
sys.path.insert(0,"/var/www/min_content")
from min_content import app as application
application.secret_key = 'blah blah blah
'
I have confirmed that the Redis server is running by adding this to the script
r = redis.StrictRedis('localhost', 6379, 0)
r.set(name='teststring', value='this is a test')
test_string = r.get(name='teststring')
print test_string
Running redis-cli returns 127.0.0.1:6379>
process_feed.py
import requests
import xml.etree.ElementTree as ET
import csv
def process_checks(feedUrl,source,ourAssignedId):
feed_url = feedUrl
source = source
ourAssignedId = ourAssignedId
all_the_data = []
#grab xml from URL
try:
r = requests.get(feed_url)
except Exception as e:
print "Failed to grab from " + feed_url
return "Failed to grab from " + feed_url
root = ET.fromstring(r.text)
for advertiser in root.iter('advertiser'):
assignedId = advertiser.find('assignedId').text
if assignedId==ourAssignedId:
#only process for PMs using our assignedId
for listings in advertiser.iter('listingContentIndexEntry'):
listingUrl = listings.find('listingUrl').text
print "Processing " + listingUrl
#now grab from URL
listing_request = requests.get(listingUrl)
#parse XML from URL
#listing_root = ET.xpath(listing_request.text)
if not ET.fromstring(listing_request.text.encode('utf8')):
print "Failed to load XML for" + listingUrl
continue
else:
listing_root = ET.fromstring(listing_request.text.encode('utf8'))
#'Stayz Property ID','External Reference','User Account External Reference','Provider','Address Line1','Active','Headline','Listing URL'
stayzPropertyId = '' #the property manager enters this into the spreadsheet
if not listing_root.find('.//externalId').text:
print 'No external Id in ' + listingUrl
listingExternalId = 'None'
else:
listingExternalId = listing_root.find('externalId').text
listingExternalId = '"' + listingExternalId + '"'
userAccountExternalReference = assignedId
print userAccountExternalReference
provider = source
addressLine1 = listing_root.find('.//addressLine1').text
active = listing_root.find('active').text
if not listing_root.find('.//headline/texts/text/textValue').text:
print 'No headline in ' + listingExternalId
headline = 'None'
else:
headline = listing_root.find('.//headline/texts/text/textValue').text
headline = headline.encode('utf-8')
if not listing_root.find('.//description/texts/text/textValue').text:
print 'No description in ' + listingExternalId
description = 'None'
else:
description = listing_root.find('.//description/texts/text/textValue').text
#now check the min content
#headline length
headline_length = len(headline)
headline_length_check = 'FAIL'
if headline_length<20:
headline_length_check = 'FAIL'
else:
headline_length_check = 'TRUE'
#description length
description_length_check = 'FAIL'
description_length = len(description)
if description_length<400:
description_length_check = 'FAIL'
else:
description_length_check = 'TRUE'
#number of images
num_images = 0
num_images_check = 'FAIL'
for images in listing_root.iter('image'):
num_images = num_images+1
if num_images <6:
num_images_check = 'FAIL'
else:
num_images_check = 'TRUE'
#atleast one rate
num_rates = 0
num_rates_check = 'FAIL'
for rates in listing_root.iter('rate'):
num_rates = num_rates+1
if num_rates < 1:
num_rates_check = 'FAIL'
else:
num_rates_check = 'TRUE'
#atleast one bedroom
#atleast one bathroom
#a longitude and latitude
#now add to our list of lists
data = {'stayzPropertyId':'','listingExternalId':listingExternalId,'userAccountExternalReference':userAccountExternalReference,'provider':provider,'addressLine1':addressLine1,'active':active,'headline':headline,'listingUrl':listingUrl,'Headline > 20 characters?':headline_length_check,'Description > 400 characters?':description_length_check,'Number of Images > 6?':num_images_check,'At least one rate?':num_rates_check}
#data_dict = ['',listingExternalId,userAccountExternalReference,provider,addressLine1,active,headline,listingUrl]
all_the_data.append(data)
files_location = './files/' + source + '__' + ourAssignedId + '_export.csv'
with open(files_location,'w') as csvFile:
#with open('./files/' + source + '_export.csv','a') as csvFile:
fieldnames = ['stayzPropertyId','listingExternalId','userAccountExternalReference','provider','addressLine1','active','headline','listingUrl','Headline > 20 characters?','Description > 400 characters?','Number of Images > 6?','At least one rate?']
writer = csv.DictWriter(csvFile,fieldnames=fieldnames)
writer.writeheader()
for row in all_the_data:
try:
writer.writerow(row)
except:
print "Failed to write row " + str(row)
continue
#send email via Mailgun
return requests.post(
"https://api.mailgun.net/v3/sandboxablahblablbah1.mailgun.org/messages",
auth=("api", "key-blahblahblah"),
#files=("attachment", open(files_location)),
data={"from": "Mailgun Sandbox <postmaster#.mailgun.org>",
"to": "Me <me#me.com>",
"subject": "Feed Processed for " + ourAssignedId,
"text": "Done",
"html":"<b>Process the file</b>"})

Related

For my final year project I need to connect my MQTT server to a smart contract I have been attempting this through infura as I couldn't find other way

Traceback (most recent call last):
File "/usr/lib/python3.9/threading.py", line 973, in _bootstrap_inner
self.run()
File "/usr/lib/python3.9/threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 3591, in _thread_main
self.loop_forever(retry_first_connection=True)
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 1756, in loop_forever
rc = self._loop(timeout)
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 1164, in _loop
rc = self.loop_read()
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 1556, in loop_read
rc = self._packet_read()
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 2439, in _packet_read
rc = self._packet_handle()
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 3039, in _packet_handle
return self._handle_connack()
File "/home/jeevan/.venv/lib/python3.9/site-packages/paho/mqtt/client.py", line 3138, in _handle_connack
on_connect(
File "/home/jeevan/.venv/lib/python3.9/site-packages/flask_mqtt/init.py", line 238, in _handle_connect
self._connect_handler(client, userdata, flags, rc)
File "/home/jeevan/nightlight-server/test_server.py", line 34, in handle_telemetry
payload = json.loads(message.payload.decode())
AttributeError: 'dict' object has no attribute 'payload'
^Z
[1]+ Stopped python3 test_server.py
```python
from web3 import Web3, HTTPProvider, IPCProvider, WebsocketProvider
from flask import Flask
from flask_mqtt import Mqtt
import time
import json as json
import json
import time
import paho.mqtt.client as mqtt
id = '20'
client_telemetry_topic = id + '/telemetry'
server_command_topic = id + '/commands'
client_name = id + '_nightlight_server'
mqtt_client = mqtt.Client(client_name)
mqtt_client.connect('test.mosquitto.org')
app = Flask(__name__)
app.config['test.mosquitto.org'] = 'yourbroker.com' # your broker address goes here
app.config['MQTT_BROKER_PORT'] = 1883 # default port for non-tls connection
app.config['MQTT_USERNAME'] = '' # No username set for now
app.config['MQTT_PASSWORD'] = '' # no password set for now
app.config['MQTT_KEEPALIVE'] = 5 # keepalive every 5 seconds
app.config['MQTT_TLS_ENABLED'] = False # set TLS to disabled for testing purposes
mqtt_client.loop_start()
mqtt = Mqtt()
mqtt.init_app(app)
running = True
#mqtt.on_connect()
def handle_telemetry(client, userdata, message, rc):
payload = json.loads(message.payload.decode())
print("Message received:", payload)
command ={'led_on' : payload['light'] < 300 }
print("Sending messages:", command)
print('connectedquot');
mqtt.subscribe('client_telemetry_topic') #your MQTT topic here
while running == True:
#mqtt.on_message()
def handle_mqtt_message(client, userdata, message):
data = dict(
topic=message.topic,
payload=message.payload.decode()
)
light = (data[quotpayloadquot])
light = int(light)
print (light)
#our function to store the MQTT payload on Ethereum goes here as store_results(barcode)
print (quotOKquot)
mqtt_client.subscribe(client_telemetry_topic)
mqtt_client.on_message = handle_telemetry
while True:
time.sleep(2)
w3 = Web3(HTTPProvider('https://ropsten.infura.io/inufrra api goes here'))
contract_address ='0x5a3AaF5274888e309f6dE86412389aaEaC01DE8B'
wallet_private_key = 'wallet private key goes here'
wallet_address = 'wallet address goes here'
contract = w3.eth.contract(address = contract_address, abi = abi)
def store_results(x):
nonce = w3.eth.getTransactionCount(wallet_address)
#if something is failing, try print (nonce) to test connectivity to Infura here. Chain ID = 3 is Ropst>
#Below, we craft a transaction to set variable x in the smart contract to the value provided to this f>
txn_dict = contract.functions.set(x).buildTransaction({
'chainId': 3,
'gas': 140000,
'gasPrice': w3.toWei('40', 'gwei'),
'nonce': nonce,
})
#Then we sign the transaction with our private key
signed_txn = w3.eth.account.signTransaction(txn_dict, private_key=wallet_private_key)
#The signed transaction is sent to the blockchain using Infura as an endpoint
result = w3.eth.sendRawTransaction(signed_txn.rawTransaction)
# Then we wait for a receipt. It can take a short while.
tx_receipt = w3.eth.getTransactionReceipt(result)
count = 0
while tx_receipt is None and (count &lt, 30):
time.sleep(10)
tx_receipt = w3.eth.getTransactionReceipt(result)
return(tx_receipt)
if tx_receipt is None:
tx_receipt = quot;Failed"
return(tx_receipt)
```

python error in running n inputs processed at the same time using thread

I am new to python. I fetch multiple ip addresses as input from a yml file and assign it to hosts. Using for loop, I iterate through these ips. These ips are processed one by one. It will then write the result to the txt file.I want to run these inputs from for loop which should be processed at the same time and write the result in the txt file. I tried using threads but it is giving errors of writing to the file.
Here is my code:
import yaml
import requests
from itertools import cycle
import os
import http.client
import time
import sys
import signal
import ipaddress
from pathlib import Path
import socket
import time
from datetime import datetime, timedelta
import time
import subprocess
from threading import Thread, Lock
import threading
import random
from _thread import start_new_thread
class heart_beat():
#Function for writing ipv4 to a txt
def ip4write(self,ip,name):
timestamp = datetime.now().strftime("%B %d %Y, %H:%M:%S")
print (ip) #check
global ip4a
ip4a =ip
print("ip",ip)
self.ipve4(ip,name)
hbwrite.write(self,ip,name)
#Funtion for Ipv4
def ipve4(self,ip,name):
try:
global stp
cmd = "ping -c {} -W {} {}".format(count, wait_sec, ip4a).split(' ')
output = subprocess.check_output(cmd).decode().strip()
lines = output.split("\n")
global roundtime_ms,packet_transmit,u
roundtime = lines[1].split()[-2].split('=')[1]
roundtime_ms= float(roundtime) * 1000
packet_transmit = lines[-2].split(',')[0].split()[0]
total = lines[-2].split(',')[3].split()[1]
loss = lines[-2].split(',')[2].split()[0]
timing = lines[-1].split()[3].split('/')
status,result = subprocess.getstatusoutput("ping -c1 -w2 " + str(ip4a))
if status == 0:
print(str(ip4a) + " UP")
u =" UP"
stp = 1
except:
print(e)
def fields(self):
for item in data2:
if item["type"] == "icmp":
#make all variables global
global hosts,ipv4,ipv6,count,wait_sec,schedule,enabled
hosts = (item["host"])
ty_name = (item["name"])
ipv4 = (item["ipv4"])
ipv6 = (item["ipv6"])
timeout1 = (item["timeout"])
timeoutt =int(timeout1.replace("s",""))
print (timeoutt)
fields = (item["fields"])
l1 = []
for key in fields:
lis = 'fields.' + key+':' + fields[key]
print (lis)
l1.append(lis)
fieldcustom = ""
for i in l1:
fieldcustom = fieldcustom + i + "\t\t"
print (fieldcustom)
enabled = (item["enabled"])
count = 1
wait_sec = 1
#Beat function
def beat(self):
#Name for the log file
global lock
name = 'icmp.log'
try:
if(enabled == True):
for ip in hosts:
if(ipv4 == True and ipv6 ==False):
iv = ipaddress.ip_address(ip)
print ("iv",iv.version)
if (iv.version == 4):
start_new_thread(self.ip4write,(ip,name))
except requests.ConnectionError:
print("Error")
class hbwrite():
def write(self,ip,name):
if(stp == 1):
#time.sleep(input)
text_file = open(name , "a+")
text_file.write( "monitor.ip: " "%s" % str(ip) + "\t\t" + "monitor.status: "" %s" % u + "\t\t" + "monitor.id: icmp#icmp-ip#"" %s" % str(ip) + "\t\t" + "icmp.requests:"" %s" % packet_transmit +"\t\t" + "icmp.rtt.us:"" %s" % int(roundtime_ms) +"\t\t" + "beat.name:"" %s" % hostname +"\t\t" + "beat.hostname:"" %s" % hostname + "\t\t" + "beat.version:"" %s" % beatversion +"\t\t" + "tags:"" %s" % tags + "\t\t" + " %s" % fieldcustom )
text_file.write("\n")
text_file.close()
if __name__ == '__main__':
# Load the YAML file in to data
data = yaml.safe_load(open('beat.yml'))
data2 = data.get('heartbeat.monitors')
hb = heart_beat()
#For setting variables with values
hb.fields()
hb.beat()
Code in YAML file:
heartbeat.monitors:
- type: icmp
name: icmp
host: ["192.168.1.9","192.168.1.36"]
enabled: True
#schedule: "* * * * * */5"
schedule: "*/1 * 4 2 *"
#schedule: "*/2 17 29 1 * *"
ipv4: True
ipv6: False
#mode:
timeout: 20s
tags: ["res1","ip1"]
fields:
a: apple
b: green
Error i got:
Fatal Python error: could not acquire lock for <_io.BufferedWriter name='<stdout>'> at interpreter shutdown, possibly due to daemon threads
Thread 0x00007f9fd5ed3700 (most recent call first):
File "stackover.py", line 27 in ip4write
Thread 0x00007f9fd66d4700 (most recent call first):
File "stackover.py", line 30 in ip4write
Thread 0x00007f9fd6ed5700 (most recent call first):
File "/usr/lib/python3.5/subprocess.py", line 1397 in _get_handles
File "/usr/lib/python3.5/subprocess.py", line 911 in __init__
File "/usr/lib/python3.5/subprocess.py", line 693 in run
File "/usr/lib/python3.5/subprocess.py", line 626 in check_output
File "stackover.py", line 40 in ipve4
File "stackover.py", line 32 in ip4write
Current thread 0x00007f9fdab47700 (most recent call first):
Aborted (core dumped)

HTTP Error 404: Forbidden Error while scraping Facebook group post using graph api

So I was working on my academic project that includes scraping Facebook public group posts using python, I have done this more than a couple of times and it worked smoothly. But this time its giving me error:
HTTP ERROR 404 : Forbidden
I tried scraping Facebook pages and it gave me results smoothly that means the issue is not with Access Token.
I am using the same script that was run successfully previously, tried changing the user access token, app access token, type of request (GET POST) but no success. I am attaching the script that i used (found somewhere at github) also attaching the error logs
import urllib.request as ur
import json
import datetime
import csv
import time
group_id = input("Please Paste Public Group ID:")
#access_token = app_id + "|" + app_secret
access_token = input("Please Paste Your Access Token:")
def request_until_succeed(url):
req = ur.Request(url)
success = False
while success is False:
try:
response = ur.urlopen(req)
if response.getcode() == 200:
success = True
except Exception as e:
print (e)
time.sleep(5)
print ("Error for URL %s: %s" % (url, datetime.datetime.now()))
print ("Retrying.")
return response.read()
# Needed to write tricky unicode correctly to csv
def unicode_normalize(text):
return text.translate({ 0x2018:0x27, 0x2019:0x27, 0x201C:0x22, 0x201D:0x22,
0xa0:0x20 }).encode('utf-8')
def getFacebookPageFeedData(group_id, access_token, num_statuses):
# Construct the URL string; see
# http://stackoverflow.com/a/37239851 for Reactions parameters
base = "https://graph.facebook.com/v2.6"
node = "/%s/feed" % group_id
fields = "/?fields=message,link,permalink_url,created_time,type,name,id," + \
"comments.limit(0).summary(true),shares,reactions." + \
"limit(0).summary(true),from"
parameters = "&limit=%s&access_token=%s" % (num_statuses, access_token)
url = base + node + fields + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def getReactionsForStatus(status_id, access_token):
# See http://stackoverflow.com/a/37239851 for Reactions parameters
# Reactions are only accessable at a single-post endpoint
base = "https://graph.facebook.com/v2.6"
node = "/%s" % status_id
reactions = "/?fields=" \
"reactions.type(LIKE).limit(0).summary(total_count).as(like)" \
",reactions.type(LOVE).limit(0).summary(total_count).as(love)" \
",reactions.type(WOW).limit(0).summary(total_count).as(wow)" \
",reactions.type(HAHA).limit(0).summary(total_count).as(haha)" \
",reactions.type(SAD).limit(0).summary(total_count).as(sad)" \
",reactions.type(ANGRY).limit(0).summary(total_count).as(angry)"
parameters = "&access_token=%s" % access_token
url = base + node + reactions + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def processFacebookPageFeedStatus(status, access_token):
# The status is now a Python dictionary, so for top-level items,
# we can simply call the key.
# Additionally, some items may not always exist,
# so must check for existence first
status_id = status['id']
status_message = '' if 'message' not in status.keys() else \
unicode_normalize(status['message'])
link_name = '' if 'name' not in status.keys() else \
unicode_normalize(status['name'])
status_type = status['type']
status_link = '' if 'link' not in status.keys() else \
unicode_normalize(status['link'])
status_permalink_url = '' if 'permalink_url' not in status.keys() else \
unicode_normalize(status['permalink_url'])
status_author = unicode_normalize(status['from']['name'])
# Time needs special care since a) it's in UTC and
# b) it's not easy to use in statistical programs.
status_published = datetime.datetime.strptime(\
status['created_time'],'%Y-%m-%dT%H:%M:%S+0000')
status_published = status_published + datetime.timedelta(hours=-5) # EST
# best time format for spreadsheet programs:
status_published = status_published.strftime('%Y-%m-%d %H:%M:%S')
# Nested items require chaining dictionary keys.
num_reactions = 0 if 'reactions' not in status else \
status['reactions']['summary']['total_count']
num_comments = 0 if 'comments' not in status else \
status['comments']['summary']['total_count']
num_shares = 0 if 'shares' not in status else \
status['shares']['count']
# Counts of each reaction separately; good for sentiment
# Only check for reactions if past date of implementation:
# http://newsroom.fb.com/news/2016/02/reactions-now-available-globally/
reactions = getReactionsForStatus(status_id, access_token) \
if status_published > '2016-02-24 00:00:00' else {}
num_likes = 0 if 'like' not in reactions else \
reactions['like']['summary']['total_count']
# Special case: Set number of Likes to Number of reactions for pre-reaction
# statuses
num_likes = num_reactions if status_published < '2016-02-24 00:00:00' else \
num_likes
def get_num_total_reactions(reaction_type, reactions):
if reaction_type not in reactions:
return 0
else:
return reactions[reaction_type]['summary']['total_count']
num_loves = get_num_total_reactions('love', reactions)
num_wows = get_num_total_reactions('wow', reactions)
num_hahas = get_num_total_reactions('haha', reactions)
num_sads = get_num_total_reactions('sad', reactions)
num_angrys = get_num_total_reactions('angry', reactions)
# return a tuple of all processed data
return (status_id, status_message, status_author, link_name, status_type,
status_link, status_permalink_url, status_published, num_reactions, num_comments,
num_shares, num_likes, num_loves, num_wows, num_hahas, num_sads,
num_angrys)
def scrapeFacebookPageFeedStatus(group_id, access_token):
with open('%s_facebook_statuses.csv' % group_id, 'w') as file:
w = csv.writer(file)
w.writerow(["status_id", "status_message", "status_author",
"link_name", "status_type", "status_link","permalink_url",
"status_published", "num_reactions", "num_comments",
"num_shares", "num_likes", "num_loves", "num_wows",
"num_hahas", "num_sads", "num_angrys"])
has_next_page = True
num_processed = 0 # keep a count on how many we've processed
scrape_starttime = datetime.datetime.now()
print ("Scraping %s Facebook Page: %s\n" % \
(group_id, scrape_starttime))
statuses = getFacebookPageFeedData(group_id, access_token, 100)
while has_next_page:
for status in statuses['data']:
# Ensure it is a status with the expected metadata
if 'reactions' in status:
w.writerow(processFacebookPageFeedStatus(status, \
access_token))
# output progress occasionally to make sure code is not
# stalling
num_processed += 1
if num_processed % 100 == 0:
print ("%s Statuses Processed: %s" % (num_processed,
datetime.datetime.now()))
# if there is no next page, we're done.
if 'paging' in statuses.keys():
statuses = json.loads(request_until_succeed(\
statuses['paging']['next']))
else:
has_next_page = False
print ("\nDone!\n%s Statuses Processed in %s" % \
(num_processed, datetime.datetime.now() - scrape_starttime))
if __name__ == '__main__':
scrapeFacebookPageFeedStatus(group_id, access_token)
Error logs:
HTTP Error 403: Forbidden Traceback (most recent call last): File "scrape_facebook.py", line 22, in request_until_succeed
response = ur.urlopen(req) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 223, in urlopen
return opener.open(url, data, timeout) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 532, in open
response = meth(req, response) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 642, in http_response
'http', request, response, code, msg, hdrs) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 570, in error
return self._call_chain(*args) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 504, in _call_chain
result = func(*args) File "C:\Users\Usama\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py",
line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 403: Forbidden
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "scrape_facebook.py", line
198, in
scrapeFacebookPageFeedStatus(group_id, access_token) File "scrape_facebook.py", line 168, in scrapeFacebookPageFeedStatus
statuses = getFacebookPageFeedData(group_id, access_token, 100) File "scrape_facebook.py", line 52, in getFacebookPageFeedData
data = json.loads(request_until_succeed(url)) File "scrape_facebook.py", line 27, in request_until_succeed
time.sleep(5) KeyboardInterrupt

Building SOAP request with suds and Python - XML not being generated properly

Here is the error:
No handlers could be found for logger "suds.client"
Traceback (most recent call last):
File "bling.py", line 134, in <module>
get_data("146.215.236.10", list)
File "bling.py", line 48, in get_data
retval = client.service.SelectCmDevice("", stuff)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 542, in __call__
return client.invoke(args, kwargs)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 602, in invoke
result = self.send(soapenv)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 649, in send
result = self.failed(binding, e)
File "/usr/lib/python2.6/site-packages/suds/client.py", line 702, in failed
r, p = binding.get_fault(reply)
File "/usr/lib/python2.6/site-packages/suds/bindings/binding.py", line 265, in get_fault
raise WebFault(p, faultroot)
suds.WebFault: Server raised fault: 'org.xml.sax.SAXParseException: Element type "SelectItem" must be followed by either attribute specifications, ">" or "/>".'
Here is the code:
def get_data(ip, list):
"""
Connect to soap webservice
Append device name and status
To data list
"""
stuff = {}
stuff['SelectBy'] = 'Name'
count = 0
for i in list:
if "SelectItems" not in stuff:
stuff['SelectItems'] = {}
if 'SelectItem[' + str(count) + ']' not in stuff['SelectItems']:
stuff['SelectItems']['SelectItem[' + str(count) + ']'] = {}
stuff['SelectItems']['SelectItem[' + str(count) + ']']['Item'] = i
count = count + 1
t = HttpAuthenticated(**credentials)
uri = 'https://' + ip + ':8443/realtimeservice/services/RisPort?wsdl'
imp = Import('http://schemas.xmlsoap.org/soap/encoding/')
doctor = ImportDoctor(imp)
client = Client(url=uri, transport=t, doctor=doctor)
retval = client.service.SelectCmDevice("", stuff)
pprint(retval)
sys.exit(0)
for node in retval['SelectCmDeviceResult'].CmNodes:
for dev in node.CmDevices:
name = dev.Name
status = dev.Status
data.append([name,status])
I am trying to call the Cisco Risport API with Python however even when emulating how it is done in PHP I get an error and I am really trying not to port over to PHP at this point in the project.
Here is how PHP passes the data:
foreach ($devices_chunks as $chunk){
echo '.';
//Prepare RisPort request
$array["SelectBy"] = "Name";
$array["Status"] = "Registered";
//$array["Class"] = "Phone";
//$array["MaxReturnedDevices"] = "1000";
$i = 1;
foreach($chunk as $device){
$array["SelectItems"]["SelectItem[$i]"]["Item"] = $device->name;
$i++;
}
// Run RisPost Query + wait a bit as max requests is 15 per min.
$response = $soap_ris->SelectCmDevice("",$array);
I think the problem is with how suds is transposing the dict of dict to XML, any thoughts on how to fix this outside of manually building SOAP XML request?
thanks!

<class 'urllib2.URLError'>: <urlopen error [Errno 2] No such file or directory>

What i am trying to do is to show tweets on a webpage using python cgi script.
this is my code.
This code is working fine in terminal. but showing errors listed below.
#!/usr/bin/python
import cgitb
cgitb.enable(False, '/var/www/twitter/errors')
import oauth2
import time
import urllib2
import json
url1 = "https://api.twitter.com/1.1/search/tweets.json"
params = {
"oauth_version": "1.0",
"oauth_nonce": oauth2.generate_nonce(),
"oauth_timestamp": int(time.time())
}
consumer_key=''
consumer_secret=''
access_token=''
access_secret=''
consumer = oauth2.Consumer(key=consumer_key, secret=consumer_secret)
token = oauth2.Token(key=access_token, secret=access_secret)
params["oauth_consumer_key"] = consumer.key
params["oauth_token"] = token.key
prev_id = int("435458631669415936")
for i in range(1):
url = url1
params["q"] = "fifa"
params["count"] = 15
# params["geocode"] = ""
# params["lang"] = "English"
params["locale"] = "en"
params["result_type"] = "popular" # Example Values: mixed, recent, popular
# params["until"] = ""
# params["since_id"] = ""
# params["max_id"] = str(prev_id)
req=oauth2.Request(method="GET",url=url,parameters=params)
signature_method=oauth2.SignatureMethod_HMAC_SHA1()
req.sign_request(signature_method,consumer,token)
headers=req.to_header()
url=req.to_url()
# print headers
# print url
response=urllib2.Request(url)
data=json.load(urllib2.urlopen(response))
if data["statuses"] == []:
print "end of data"
break
else:
prev_id = int(data["statuses"][-1]["id"]) - 1
print prev_id, i
print data["statuses"]
#f = open("outfile_" + str(i) + ".txt", "w")
json.dump(data["statuses"], f)
f.close()
time.sleep(5)
this is my code and i m facing this problem (i have changed the secrets and tokens for security):
global URLError = <class 'urllib2.URLError'>, err = error(2, 'No such file or directory')
<class 'urllib2.URLError'>: <urlopen error [Errno 2] No such file or directory>
args = (error(2, 'No such file or directory'),)
errno = None
filename = None
message = ''
reason = error(2, 'No such file or directory')
strerror = None
change this line to see errors inside the web page. It's possible the given logging directory doesn't exist, or exists but isn't writable by the webserver user:
cgitb.enable(True) # '/var/www/twitter/errors')

Categories