Can not call build_in function - python

In the following program, I tried to call self.start_requests() in self.after_login(), but not succeed. I rewrite the content of function self.start_requests() instead and it works.
My question is that I don't understand why I can just directly call function self.start_requests()?
__author__ = 'parallels'
import scrapy
from scrapy import Request
from bs4 import BeautifulSoup
def start_requests(usrname, password):
return Request(url="",
cookies={'name':usrname, 'password':password},dont_filter = True)
class heibanke2(scrapy.Spider):
name = "herbanke2"
# start_urls = [""]
password = 4
def start_requests(self):
return [Request("", callback = self.post_login,dont_filter = True)]
def post_login(self, response):
print 'Preparing login'
print "current password:" , str(self.password)
return [scrapy.FormRequest.from_response(response,
formdata = {
'username': "JoseLyn",
'password': str(self.password)
callback = self.after_login
def after_login(self,response):
print "after_login"
with open("body" + str(self.password),"wb") as f:
soup = BeautifulSoup(response.body,"lxml")
if "JoseLyn" not in soup.h3.string:
self.password += 1
# self.start_requests()
return [Request("", callback = self.post_login,dont_filter = True)]
print "password found:", str(self.password)
print "next mission at:", '' + soup.a['href']
Thank you in advance!


Python missing 1 required keyword-only argument [duplicate]

This question already has answers here:
Client.__init__() missing 1 required keyword-only argument: 'intents'
(4 answers)
Closed 6 months ago.
I have a Discord Bot for UKHotDeals, but it throws an error.
This is written for Python 3.x.
The original repository can be find in here:
Traceback (most recent call last):
File "C:\Users\USER\Desktop\Hotukdeals-Discord-Notifier-master\", line 179, in <module>
client = MyClient(channel_id)
File "C:\Users\USER\Desktop\Hotukdeals-Discord-Notifier-master\", line 31, in __init__
super().__init__(*args, **kwargs)
TypeError: Client.__init__() missing 1 required keyword-only argument: 'intents'
I can't get where I'm missing something in the code, which is this:
import discord
import asyncio
import requests
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import json
with open('settings.json') as data:
settings = json.load(data)
min_upvotes = int(settings["min_upvotes"])
max_upvotes = int(settings["max_upvotes"])
base_url = settings["base_url"]
pages_to_index = int(settings["pages_to_index"])
discord_api_key = settings["discord_api_token"]
min_price = float(settings["min_price"])
max_price = float(settings["max_price"])
channel_id = int(settings["discord_channel_id"])
time_interval_seconds = int(settings["time_interval_seconds"])
class MyClient(discord.Client):
def __init__(self, channel, *args, **kwargs):
self.outOfStock = []
self.checkUrls = []
self.channelID = channel
super().__init__(*args, **kwargs)
# create the background task and run it in the background
self.bg_task = self.loop.create_task(self.my_background_task())
# Check deals
def checkDealsBeautifulSoup(self, url):
# Imports
import requests
from bs4 import BeautifulSoup
import json
import random
# Loads JSON and vars
with open('settings.json') as data:
settings = json.load(data)
min_upvotes = int(settings["min_upvotes"])
max_upvotes = int(settings["max_upvotes"])
min_price = float(settings["min_price"])
max_price = float(settings["max_price"])
# Loads proxies
with open('proxies.txt', 'r') as proxies:
proxies = proxies.readlines()
# Picks random proxy
proxy = random.choice(proxies)
returnMsgs = []
newArray = []
# Reads already used things
with open('data/usedLinks.txt', 'r') as data:
usedArray = data.readlines()
# Sets up proxy
proxies = {
"http": "http://" + proxy,
"https": "https://" + proxy,
page = requests.get(url, proxies=proxy)
soup = BeautifulSoup(page.text, 'html.parser')
var = False
# Tries to get things
listings = soup.find_all(
'article', attrs={'data-handler': 'history'})
upvotes = soup.find_all('span', attrs={'class': 'cept-vote-temp'})
pricing = soup.find_all('span', attrs={'class': 'thread-price'})
urls = soup.find_all(
'a', attrs={'class': 'cept-thread-image-link'})
var = True
var = False
if var == True:
upvotesIndex = 0
index = 0
for x in range(0, len(listings)):
upvote = upvotes[upvotesIndex].text.strip().replace(
" ", "").replace("°", "").replace("\n", "")
if "Deal" in upvote or "alerts" in upvote:
upvotesIndex += 1
upvote = upvotes[upvotesIndex].text.strip().replace(
" ", "").replace("°", "").replace("\n", "")
upvote = 0
price = pricing[index].text.strip().replace("£", "")
price = 0
url = urls[index].get('href')
url = None
if price != "FREE":
price = float(price.replace(",", ""))
price = 0
price = 0
if min_price <= price <= max_price:
if min_upvotes <= int(upvote) <= max_upvotes:
if url != None:
if url + "\n" not in usedArray:
# Return Message
message = url + " Satisfies your deal criteria. It is at " + \
str(upvote) + \
" degrees and costs £" + str(price)
upvotesIndex += 1
index += 1
# Saves new logged files
with open('data/usedLinks.txt', 'a') as fileObj:
for line in newArray:
fileObj.write(line + "\n")
# Returns stuff
return returnMsgs
# On start
async def on_ready(self):
print('Logged in as')
# On message
async def on_message(self, message):
if ==
# Background manager
async def my_background_task(self):
await self.wait_until_ready()
channel = self.get_channel(int(channel_id))
while not self.is_closed():
for page in range(0, int(pages_to_index)):
print('checking page ' + str(page))
res = self.checkDealsBeautifulSoup(
base_url + "?page=" + str(page))
if res != []:
for msg in res:
await channel.send(msg)
await asyncio.sleep(int(time_interval_seconds))
# Main
client = MyClient(channel_id)
channel_id and discord_api_key correctly set in settings.json like this:
"min_upvotes": "500",
"max_upvotes": "1000",
"base_url": "",
"pages_to_index": "10",
"discord_api_token": "asdAxNasdDkxNzQ1NDcasdasd4ODU1OTAxOQ.GxasdNr.Hasdv7k9Iladsdvasd67jasdasdCXHF4",
"min_price": "0",
"max_price": "500",
"discord_channel_id": "5712311231233167",
"time_interval_seconds": "1800"
Looking at this other thread, the discord client now uses an Intent object in its constructor
client = discord.Client(intents=discord.Intents.default())
For you, you would have to fix the following call when instantiating your own instance
It seems to be that the error comes from discord.Client,
class MyClient(discord.Client):
def __init__(self, channel, *args, **kwargs):
self.outOfStock = []
self.checkUrls = []
self.channelID = channel
super().__init__(*args, **kwargs)
check this:

get_access_token() takes 0 positional arguments but 1 was given

I'm not very experienced programming in Python but I've also seen in several posts that this problem might be solved by adding self to the method definition. The problem is that I actually already did this right from the beginning. This is the code I'm using:
class api_manager():
import requests, time, pandas as pd
#from ipython.display import display, HTML
# heroes = []
# items = []
# token = ''
# url = ""
def __init__(self, api_identifier = 1):
test_api_url = ""
live_api_url = ""
self.heroes = []
self.items = []
self.token = ''
self.url = ""
if api_identifier == 1:
self.url = live_api_url
self.url = test_api_url
def get_access_token(self):
with open("conf/access.config") as file:
self.token =":")[1]
def initialize_heroes(self):
response = self.requests.get(self.url + "GetHeroes/v1/?format=JSON&language=en_us&key=" + self.token)
hero_list = response.json()
for hero_id in range(len(hero_list['result']['heroes'])):
self.heroes.append([hero_list['result']['heroes'][hero_id]['id'], hero_list['result']['heroes'][hero_id]['localized_name'], hero_list['result']['heroes'][hero_id]['name'].replace('npc_dota_hero_', "").replace("_", " ")])
heroes_df = self.pd.DataFrame(self.heroes, columns=["ID", "Hero", "Hero Tag"])
self.pd.set_option('display.max_colwidth', -1)
#display(HTML(heroes_df.to_html(index = False)))
def initialize_items(self):
response = self.requests.get(self.url + "GetGameItems/v1/?format=JSON&language=en_us&key=" + self.token)
item_list = response.json()
for item_id in range(len(item_list['result']['items'])):
self.items.append([item_list['result']['items'][item_id]['id'], item_list['result']['items'][item_id]['localized_name'], response.json()['result']['items'][item_id]['name']])
items_df = self.pd.DataFrame(self.items, columns=["ID", "Item", "Item Tag"])
self.pd.set_option('display.max_colwidth', -1)
#display(HTML(items_df.to_html(index = False)))
def get_match_details(match_id, self):
response = self.requests.get(self.url + "GetMatchDetails/V001/?format=JSON&language=en_us&key=" + self.token + "&match_id=" + str(match_id))
def get_match_details_in_range(match_id, match_id_upper_bound, self):
for next_match_id in range(match_id, match_id_upper_bound):
response = self.requests.get(self.url + "GetMatchDetails/V001/?format=JSON&language=en_us&key=" + self.token + "&match_id=" + str(next_match_id))
And this is the error I'm getting in Python3 console run in Windows cmd:
>>> instance = api_manager()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "E:\Dropbox\DotA 2 WebAPI Development\Executable Python Files\", line 22, in __init__
TypeError: get_access_token() takes 0 positional arguments but 1 was given
I don't know what exactly I'm doing wrong. I already tried different things in my code but never got it to actually work. There was one moment when the code executed properly and I could actually call the method get_match_details() but I got an error because the url parameter was not set for whatever reason, although it should have been initialized right in the beginning of __init__().
Thanks in advance for your help and let me know if you need any additional information.

urlopen error when scrapy spider is run from a script

I've written a script that runs a scrapy spider that is located inside a different directory. The script takes in user input, parses it and adds it to a url to be scraped. The script seemed to be working earlier but now I'm getting the following error:
URLError: <urlopen error [Errno 101] Network is unreachable>
ERROR: Unable to read instance data, giving up
The code for the spider works properly when run with the scrapy crawl command, but isn't working when run from a script for some reason.
Here is the code for the function that runs the spider from the script (located within the spider file):
def spiderCrawl(bandname):
aSpider = MySpider3()
configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
runner = CrawlerRunner()
d = runner.crawl(aSpider)
d.addBoth(lambda _: reactor.stop())
function that creates the url:
def create_link(self, bandname):
tc_url = "" + bandname + "-tickets.html"
start_urls = [tc_url]
Also, below is an image of the terminal with the error message. The fact that a random bandname was entered suggests that the url wasn't even read in the first place. What could be the problem, here? Any help would be appreciated, thanks.
So it seems that the problem was that my create_link method inside of the spider class wasn't properly adding the link to the start_urls list, but the script does seem to be running the spider when I use the raw_input statement inside of the spider file as opposed to the script. What would be the proper way to pass the argument of the user's input to the spider file to be added as a link? I have the code for the spider and the script running the spider below to make the post more complete:
script code
from ticket_city_scraper.ticket_city_scraper import *
from ticket_city_scraper.ticket_city_scraper.spiders import tc_spider
bandname = raw_input("Enter bandname\n") # I took out this line and added it to the spider file to make the script work
spider file
class MySpider3(CrawlSpider):
handle_httpstatus_list = [416]
name = 'comparator'
allowed_domains = [""]
start_urls = [tc_url]
tickets_list_xpath = './/div[#class = "vevent"]'
def create_link(self, bandname):
tc_url = "" + bandname + "-tickets.html"
self.start_urls = [tc_url]
#return tc_url
tickets_list_xpath = './/div[#class = "vevent"]'
def parse_json(self, response):
loader = response.meta['loader']
jsonresponse = json.loads(response.body_as_unicode())
ticket_info = jsonresponse.get('B')
price_list = [i.get('P') for i in ticket_info]
if len(price_list) > 0:
str_Price = str(price_list[0])
ticketPrice = unicode(str_Price, "utf-8")
loader.add_value('ticketPrice', ticketPrice)
ticketPrice = unicode("sold out", "utf-8")
loader.add_value('ticketPrice', ticketPrice)
return loader.load_item()
def parse_price(self, response):
print "parse price function entered \n"
loader = response.meta['loader']
event_City = response.xpath('.//span[#itemprop="addressLocality"]/text()').extract()
eventCity = ''.join(event_City)
loader.add_value('eventCity' , eventCity)
event_State = response.xpath('.//span[#itemprop="addressRegion"]/text()').extract()
eventState = ''.join(event_State)
loader.add_value('eventState' , eventState)
event_Date = response.xpath('.//span[#class="event_datetime"]/text()').extract()
eventDate = ''.join(event_Date)
loader.add_value('eventDate' , eventDate)
ticketsLink = loader.get_output_value("ticketsLink")
json_id_list= re.findall(r"(\d+)[^-]*$", ticketsLink)
json_id= "".join(json_id_list)
json_url = "" + json_id + "/ticketblocks?P=0,99999999&q=0&per_page=250&page=1&sort=p.asc&f.t=s&_=1436642392938"
yield scrapy.Request(json_url, meta={'loader': loader}, callback = self.parse_json, dont_filter = True)
def parse(self, response):
# """
selector = HtmlXPathSelector(response)
# iterate over tickets
for ticket in
loader = XPathItemLoader(ComparatorItem(), selector=ticket)
# define loader
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
# iterate over fields and add xpaths to the loader
loader.add_xpath('eventName' , './/span[#class="summary listingEventName"]/text()')
loader.add_xpath('eventLocation' , './/div[#class="divVenue location"]/text()')
loader.add_xpath('ticketsLink' , './/a[#class="divEventDetails url"]/#href')
#loader.add_xpath('eventDateTime' , '//div[#id="divEventDate"]/#title') #datetime type
#loader.add_xpath('eventTime' , './/*[#class = "productionsTime"]/text()')
print "Here is ticket link \n" + loader.get_output_value("ticketsLink")
ticketsURL = "" + loader.get_output_value("ticketsLink")
ticketsURL = urljoin(response.url, ticketsURL)
yield scrapy.Request(ticketsURL, meta={'loader': loader}, callback = self.parse_price, dont_filter = True)
def spiderCrawl(bandname):
# process = CrawlerProcess({
# 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
# })
# process.crawl(aSpider)
# process.start()
aSpider = MySpider3()
configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
runner = CrawlerRunner()
d = runner.crawl(aSpider)
d.addBoth(lambda _: reactor.stop())
I could only guess since you didn't provide a MCVE. However I'd say in your function create_link, this line:
start_urls = [tc_url]
should really be:
self.start_urls = [tc_url]

Getting 403 error when trying to parse dropbox events page with python and mechanize

I use this script to get a list of all file updates to a certain directory. I then parse that list to get a list of time slots I have been active in that directory. That way I can quickly see how much time I have spent on the project and know what to charge my client.
I have written a small python script, adapted from this:
I added the bottom function to retrieve a specific events page from
I have used the script before 2 months ago and it worked well, but now I am getting 403: forbidden errors on:
eventSrc =
Probably DropBox tries to block scrapers like mine to push programmers to use their API instead, but unfortunately the API doesn't support listing the events.
Can anybody help me out to get it working again?
This is the python code to create the connection:
import mechanize
import urllib
import re
import json
class DropboxConnection:
""" Creates a connection to Dropbox """
email = ""
password = ""
root_ns = ""
token = ""
browser = None
def __init__(self, email, password): = email
self.password = password
def login(self):
""" Login to Dropbox and return mechanize browser instance """
# Fire up a browser using mechanize
self.browser = mechanize.Browser()
self.browser.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120722 Firefox/14.0.1')]
# Browse to the login page'')
# Enter the username and password into the login form
isLoginForm = lambda l: l.action == "" and l.method == "POST"
self.browser = None
raise(Exception('Unable to find login form'))
self.browser['login_email'] =
self.browser['login_password'] = self.password
self.browser['t'] = "1230"
# Send the form
response = self.browser.submit()
def get_constants(self):
""" Load constants from page """
home_src ='').read()
self.root_ns = re.findall(r"root_ns: (\d+)", home_src)[0]
self.token = re.findall(r"TOKEN: '(.+)'", home_src)[0]
raise(Exception("Unable to find constants for AJAX requests"))
def upload_file(self, local_file, remote_dir, remote_file):
""" Upload a local file to Dropbox """
if(not self.is_logged_in()):
raise(Exception("Can't upload when not logged in"))'')
# Add our file upload to the upload form
isUploadForm = lambda u: u.action == "" and u.method == "POST"
raise(Exception('Unable to find upload form'))
self.browser.form.find_control("dest").readonly = False
self.browser.form.set_value(remote_dir, "dest")
self.browser.form.add_file(open(local_file, "rb"), "", remote_file)
# Submit the form with the file
def get_dir_list(self, remote_dir):
""" Get file info for a directory """
if(not self.is_logged_in()):
raise(Exception("Can't download when not logged in"))
req_vars = "ns_id=" + self.root_ns + "&referrer=&t=" + self.token
req = urllib2.Request('' + remote_dir, data=req_vars)
req.add_header('Referer', '' + remote_dir)
dir_info = json.loads(
dir_list = {}
for item in dir_info['file_info']:
# Eliminate directories
if(item[0] == False):
# get local filename
absolute_filename = item[3]
local_filename = re.findall(r".*\/(.*)", absolute_filename)[0]
# get file URL and add it to the dictionary
file_url = item[8]
dir_list[local_filename] = file_url
return dir_list
def get_download_url(self, remote_dir, remote_file):
""" Get the URL to download a file """
return self.get_dir_list(remote_dir)[remote_file]
def download_file(self, remote_dir, remote_file, local_file):
""" Download a file and save it locally """
fh = open(local_file, "wb")
fh.write(, remote_file)).read())
def is_logged_in(self):
""" Checks if a login has been established """
return True
return False
def getEventsPage(self, n):
if(not self.is_logged_in()):
raise(Exception("Can't get event page when not logged in"))
url = ''
values = {'cur_page': n, 'ns_id': 'false'}
data = urllib.urlencode(values)
req = mechanize.Request(url, data)
# print url + '?' + data
eventSrc =
return eventSrc
And this is the loop that parses the events pages:
from dbupload import DropboxConnection
from getpass import getpass
from bs4 import BeautifulSoup
import re
import parsedatetime.parsedatetime as pdt
import parsedatetime.parsedatetime_consts as pdc
c = pdc.Constants()
p = pdt.Calendar(c)
email = "" # raw_input("Enter Dropbox email address:")
password = getpass("Enter Dropbox password:")
dateFile = open('all_file_updates.txt', "wb")
# Create the connection
conn = DropboxConnection(email, password)
print("Connection failed")
print("Connection succesful")
n = 250
found = 0
while(n >= 0):
eventsPageSrc = conn.getEventsPage(n)
soup = BeautifulSoup(eventsPageSrc)
table = soup.find("table", {"id": "events"})
for row in table.findAll('tr'):
link = row.find("a", href=re.compile('^'))
if(link != None):
dateString = row.find("td", attrs={'class': 'modified'}).string
date = p.parse(dateString)
dateFile.write('Date: ' + str(date) + ' file: ' + link.string + '\n')
found = found + 1
n = n - 1
print 'page: ' + str(n) + ' Total found: ' + str(found)
In def get_constants(self): change
self.token = re.findall(r"TOKEN: '(.+)'", home_src)[0]
self.token = re.findall(r'TOKEN: "(.+)"', home_src)[0]
dropbox has changed the way it stores constants
Hope it helps.

Python: instance has no attribute 'sendAMail'

I just get the error:
AttributeError: SecondLife instance has no attribute 'sendAMail'
Whats wrong?
(I checked the formating and this is not the error.
I checked the syntax and also not the error.)
What in the script happens is that an url gets open with cookies and i want some information from it.
import urllib2, cookielib, re
import ClientForm
import re
import smtplib
kurse = ['Entwicklung von Multimediasystemen', 'Computergrafik', 'Gestaltung von Multimediasystemen', 'Verteilte Systeme']
class SecondLife:
def __init__(self, usernames, password):
self.username = usernames
self.password = password
self.url = ''
cookiejar = cookielib.LWPCookieJar()
cookiejar = urllib2.HTTPCookieProcessor(cookiejar)
# debugger = urllib2.HTTPHandler(debuglevel=1)
opener = urllib2.build_opener(cookiejar)
def sendAMail(self, smtp_server, user, password, listener, subject, text):
smtp = smtplib.SMTP(smtp_server)
msg = "SUBJECT: " + subject + "\n\n" + text
smtp.sendmail("", listener, msg)
def login(self):
response = urllib2.urlopen(self.url)
forms = ClientForm.ParseResponse(response, backwards_compat=False)
# forms[0] is 'GET', forms[1] is 'POST'
form = forms[0]
form['username'] = self.username
form['password'] = self.password
except Exception, e:
print 'The following error occured: \n"%s"' % e
print 'A good idea is to open a browser and see if you can log in from there.'
print 'URL:', self.url
exit() = urllib2.urlopen('submit')).read()
def friends_online(self):
final = ""
final_asi = ""
leistungsstand = ""
match ="asi=\w*\d*\"",
if match:
final =
final_asi = re.sub("asi=", "", final)
final_asi = re.sub("\"", "", final_asi)
print "vorher: " + final
print "nachher: " + final_asi
leistungsstand_url = "" + final_asi
leistungsstand = urllib2.urlopen(leistungsstand_url).read()
print "not match"
# Ausloggen
logout = ""
website = open("lsf.html", "w")
for kurs in kurse:
print kurs
if (, "fajfjsjj Entwicklung von Multimediasystemen hahahah")):
self.sendAMail("", "user", "passw", "", "kurs" , "Eine neue Note ist im LSF eingetragen.")
#self.final_asi.replace(new, "asi=","")
#print "Final " + asi
SL = SecondLife('xyz', 'xyz')
Works for me: printing out self.sendAMail from within an instance gives
<bound method SecondLife.sendAMail of <__main__.SecondLife instance at 0x101d91e18>>
I think it is a formatting issue, though. If I copy and paste your code and look at the whitespace, I see mixed use of spaces and tabs. In particular:
In [20]: [line for line in d if 'def' in line]
[' def __init__(self, usernames, password):\n',
' \tdef sendAMail(self, smtp_server, user, password, listener, subject, text):\n',
' def login(self):\n',
' def friends_online(self):\n']
The \t before def sendAMail looks very suspicious. I'm 75% sure the inconsistent whitespace is what's causing the problem. Try running your script using python -tt, which will throw an error about inconsistent tab usage.
