I tried to setup a new random proxy for each run on FireFox. Itried many ways,but only this one works but can't figure how to make it random:
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", "Host")
profile.set_preference("network.proxy.http_port", port)
browser = webdriver.Firefox(profile)
I tried this example but not worked:
from selenium.webdriver.common.proxy import *
myProxy = "xx.xx.xx.xx:xxxx"
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'ftpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': '' # set this value as desired
})
driver = webdriver.Firefox(proxy=proxy)
driver.get("http://www.google.com")
This is the best way for me because i can use:
myProxy = random.choice(open('data.txt').readlines())
I tried to get proxies from text file this work but don't know how to randomize:
with open('IPs.txt') as proxylist:
for line in proxylist:
proxyserv, proxyport = line.split(':')
proxy= proxyserv , proxyport
And lastlly i tried:
def random_line():
line_num = 0
selected_line = ''
with open('IPs.txt') as f:
while 1:
line = f.readline()
if not line: break
line_num += 1
if random.uniform(0, line_num) < 1:
selected_line = line
return selected_line.strip()
This one get random line but can't figure out how to parse the result to
X= IP
Y= PORT
and then:
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", "RANDOM IP")
profile.set_preference("network.proxy.http_port", Random PORT)
browser = webdriver.Firefox(profile)
Port needs to be an integer, you may want to use:
import random
myProxy = random.choice(open('IPs.txt').readlines())
parts = myProxy.strip().split(":") # strip removes spaces and line breaks
host = parts[0]
port = int(parts[1]) # port needs to be an integer
Related
in this code below I just read the proxies from a txt file then I choose a random proxy to connect
but It keep to connect with my original IP please help
while True:
proxy = set()
with open("proxies.txt", "r") as f:
file_lines1 = f.readlines()
for line1 in file_lines1:
proxy.add(line1.strip())
session = req.session()
proxies = {
'http': 'http://' + random.choice(list(proxy))
}
print(proxies)
ip = session.get('https://api.ipify.org', proxies=proxies).text
print(ip)
It only works if I have one user#domain.com:password line in accounts.txt
but as soon as i add more than one email:password lines it gives error, e.g
user1#first.com:password
user2#second.com:password
user3#third.com:password
user4#fourth.com:password
all gives error even if the emails are good or bad, so it's not authenticating.
If it's one line alone it authenticates / connects and tell if the email is good or bad
user1#first.com:password
It's only accurate to first line if only one combo is there but gives error to all as soon as I edit the txt and add more lines of combos
i want it to be able to connect and give connect or notconnect to more than one combolist up to 100k and more, guess there should be an array in the smtp server
Here's my code:
import smtplib
import socks
import codecs
import unicodedata
import random
from multiprocessing.pool import ThreadPool
# PROXY_TYPE_HTTP
# PROXY_TYPE_SOCKS5
proxy_type = socks.PROXY_TYPE_SOCKS5
use_proxies = False
thead_count = 1
use_encrpytion = False
accounts = []
accounts_checked = 0
accounts_valid = []
accounts_invalid = []
proxies = []
def check_account(email, password):
try:
if (use_proxies):
proxy = random.choice(proxies)
proxy_host = proxy.split(':')[0]
proxy_port = int(proxy.split(':')[1])
socks.setdefaultproxy(proxy_type, proxy_host, proxy_port)
socks.wrapmodule(smtplib)
mailserver = smtplib.SMTP("mail." + email[email.index('#') + 1 : ],587)
mailserver.ehlo()
if (use_encrpytion):
mailserver.starttls()
mailserver.login(str(email), str(password))
mailserver.quit()
return True
except smtplib.SMTPAuthenticationError:
return False
def get_status(account):
global accounts_checked, accounts
if (':' not in account):
return False
email = account.split(':')[0]
password = account.split(':')[1]
valid = check_account(email, password)
if (valid):
print("Valid: ", account)
f1 = open("connect.txt", "a+")
f1.write(account)
f1.close()
accounts_valid.append(account)
else:
f2 = open("not_connect.txt", "a+")
f2.write(account)
f2.close()
accounts_invalid.append(account)
accounts_checked += 1
print("(" + str(accounts_checked) + "/" + str(len(accounts)) + ")")
return valid
if __name__ == "__main__":
if (use_proxies):
print("Reading \"proxies.txt\"...")
with open("proxies.txt") as f:
for line in f:
if (':' in line):
proxies.append(line)
print("Found " + str(len(proxies)) + " proxies.")
print("Reading \"accounts.txt\"...")
with codecs.open("accounts.txt", encoding='utf-8') as f:
for line in f:
line = unicodedata.normalize('NFKD', line).encode('ascii','ignore').decode('ascii')
if (':' in line):
accounts.append(line.replace("\n", "").replace("\t", ""))
print("Found " + str(len(accounts)) + " accounts.")
print("Creating thread pool...")
pool = ThreadPool(thead_count)
results = pool.map(get_status, accounts)
pool.close()
pool.join()
print("Done checking, writing output...")
print("Completed!")
So I've found this code on GitHub for gathering IPs from: https://free-proxy-list.net/ and rotate them. But I get an error message when I try to run it.
I am using ChromeDriver 2.41 because I was first getting a differernt error regarding the Socks integer. Using ChromeDriver 2.41 has solved that, but I still can't get past this 'pxy' reference.
ALSO, pycharm is alerting me that 'pd' has a redeclared definition without usage. I'd really appreciate some help with the 'pxy' and 'pd' errors!
This is the code :
from selenium import webdriver
from selenium.webdriver.chrome.options import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy, ProxyType
import time
co = webdriver.ChromeOptions()
co.add_argument("log-level=3")
co.add_argument("--headless")
def get_proxies(co=co):
driver = webdriver.Chrome(chrome_options=co)
driver.get("https://free-proxy-list.net/")
PROXIES = []
proxies = driver.find_elements_by_css_selector("tr[role='row']")
for p in proxies:
result = p.text.split(" ")
if result[-1] == "yes":
PROXIES.append(result[0]+":"+result[1])
driver.close()
return PROXIES
ALL_PROXIES = get_proxies()
def proxy_driver(PROXIES, co=co):
prox = Proxy()
if PROXIES:
pxy = PROXIES[-1]
else:
print("--- Proxies used up (%s)" % len(PROXIES))
PROXIES = get_proxies()
prox.proxy_type = ProxyType.MANUAL
prox.http_proxy = pxy
prox.socks_proxy = pxy
prox.ssl_proxy = pxy
capabilities = webdriver.DesiredCapabilities.CHROME
prox.add_to_capabilities(capabilities)
driver = webdriver.Chrome(chrome_options=co, desired_capabilities=capabilities)
return driver
# --- YOU ONLY NEED TO CARE FROM THIS LINE ---
# creating new driver to use proxy
pd = proxy_driver(ALL_PROXIES)
# code must be in a while loop with a try to keep trying with different proxies
running = True
while running:
try:
mycodehere()
# if statement to terminate loop if code working properly
something()
# you
except:
new = ALL_PROXIES.pop()
# reassign driver if fail to switch proxy
pd = proxy_driver(ALL_PROXIES)
print("--- Switched proxy to: %s" % new)
This is the error I get:
Traceback (most recent call last):
File "scripts2.py", line 65, in <module>
pd = proxy_driver(ALL_PROXIES)
File "scripts2.py", line 53, in proxy_driver
prox.http_proxy = pxy
UnboundLocalError: local variable 'pxy' referenced before assignment
I'm a little confused because I thought 'pxy' is assigned under if PROXIES?
Try changing this below lines
current code:
if PROXIES:
pxy = PROXIES[-1] # script will fail if this condition not met
else:
print("--- Proxies used up (%s)" % len(PROXIES))
PROXIES = get_proxies()
Updated code:
# make sure to reset pxy to either null or empty
pxy = ''
if (PROXIES is None):
#print("--- Proxies used up (%s)" % len(PROXIES))
PROXIES = get_proxies()
pxy = PROXIES[-1]
# I would check if pxy is empty or not before doing assignment
if (pxy!=''):
#Then do the logic here
So I am trying to download and write a csv file onto my computer from a site that requires my Email Address and password as authentication for the site. I have the following code:
import cStringIO
import pycurl
import urllib
url = 'http://www.riglocator.ca/report=rig%2Frig%2D150226%2Ecsv'
def GetPage(url, proxy=None):
if proxy:
port = 8888
proxy = proxy.replace("socks://", "")
if ":" in proxy:
port = int(proxy.rsplit(":", 1)[1])
proxy = proxy.rsplit(":", 1)[0]
try:
buf = cStringIO.StringIO()
c = pycurl.Curl()
c.setopt(c.URL, url)
c.setopt(c.WRITEFUNCTION, buf.write)
c.setopt(c.CONNECTTIMEOUT, 5)
c.setopt(c.TIMEOUT, 8)
if proxy:
c.setopt(pycurl.PROXY, proxy)
c.setopt(pycurl.PROXYPORT, port)
c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
c.setopt(pycurl.USERPWD, 'john#mail.com:password123')
c.setopt(c.FOLLOWLOCATION, True)
c.perform()
c.close()
results = buf.getvalue()
buf.close()
except:
results = ""
return results
GetPage(url,"socks://127.0.0.1:8888")
def loader():
csv_url = GetPage(url,"socks://127.0.0.1:8888")
r = urllib.urlopen(csv_url)
print(r)
csv = r.read()
csv_str = str(csv)
lines = csv_str.split('\\n')
dest_url = r'mapfile.csv'
fx = open(dest_url, 'w')
for line in lines:
fx.write(line + '\n')
fx.close()
loader()
But this still returns the HTML code from the login page, any suggestions?
I am getting this error:
File "C:/Users/cevans/PycharmProjects/RigLocatorMapPull/rigmapscrape.py", line 55, in <module>
loader()
File "C:/Users/cevans/PycharmProjects/RigLocatorMapPull/rigmapscrape.py", line 44, in loader
r = urllib.urlopen(csv_url)
File "C:\Python27\lib\urllib.py", line 87, in urlopen
return opener.open(url)
File "C:\Python27\lib\urllib.py", line 208, in open
return getattr(self, name)(url)
File "C:\Python27\lib\urllib.py", line 463, in open_file
return self.open_local_file(url)
File "C:\Python27\lib\urllib.py", line 477, in open_local_file
raise IOError(e.errno, e.strerror, e.filename)
IOError: [Errno 2] The system cannot find the path specified: ''
Process finished with exit code 1
Here is a link to some code I wrote to grab a file with pycurl, it should do basically what you need to do. You just need to add the option c.setopt(pycurl.USERPWD, 'username:userpass') do my code to set your username and password.
http://prestongarrison.com/proper-python-pycurl-example/
#This is a solution using the Mechanize browser library which takes the url,
#changes it to the current date, submits the username/password in a form,
#downloads a csv and writes it to a folder location:
__author__ = 'cevans'
import mechanize
import os
import cookielib
import datetime, string
USERNAME = 'xxxx'
PASSWORD = 'xxxxx'
OLDURL = 'http://www.oldurl.com/report050301'
folder = r'\\Driver'
def loader():
#Takes current date and changes URL to grab correct datefile (Schedule only runs on day of week)
cdate = str(datetime.date.today().strftime("%y%m%d"))
DATAURL = string.replace(OLDURL,'150301',cdate)
# Browser and Cookie Jar
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(False)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(True)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
# Opens site:
r = br.open(DATAURL)
html = r.read()
br.select_form(nr=0)
br.form['nauthemail']= USERNAME
br.form['password']=PASSWORD
br.submit()
r = br.open(DATAURL)
#Read and write file to csv, in folder
csv = r.read()
csv_str = str(csv)
lines = csv_str.split('\\n')
fname = 'map-'+ cdate
base_filename=fname
filename_suffix = '.csv'
folder1 = os.path.join(folder, base_filename + filename_suffix)
dest_url = folder1
fx = open(dest_url, 'w')
for line in lines:
fx.write(line + '\n')
fx.close()
loader()
Here's the code I have for setting it:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.resourceTimeout"] = ("5000")
driver = webdriver.PhantomJS(desired_capabilities=dcap)
However in my super long script, it doesn't seem to timeout when my internet is slow and a page takes longer than 5 seconds to load.
There is so little documentation on PhantomJS time outs, and even less of it is for Python, so I figure maybe this isn't even the way to do it.
Has anyone successfully set a timeout with PhantomJS in Python?
Thanks!
instead of targeting phantomjs resource timeout, you can set a timeout for Driver like below example of Firefox driver:
browser = webdriver.Firefox()
browser.set_page_load_timeout(30)
you can change 30 to any numeric
Thanks
You could use the default Python sleep method to retry a few times.
import time
time.sleep()
FIREFOX = 'firefox'
PHANTOM = 'phantom'
NO_IMAGES = False
NEED_IMAGES = True
opened_pages_counter = 0
driver = None
details = {}
user_agent_mozilla = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
print("[x] UNIVERSAL_DRIVER module loaded")
if platform.system() == 'Windows':
PHANTOMJS_PATH = 'd:/bin/phantomjs/bin/phantomjs.exe'
else:
PHANTOMJS_PATH = './phantomjs'
#atexit.register
def cleanup():
print("universal_driver:: on_exit")
closeDriver()
# PHANTOMJS_PATH = 'd:/bin/phantomjs/bin/phantomjs.exe'
# driver_test = buildDriver(FIREFOX, NO_IMAGES )
# timeout need to be in seconds
# driver_test = buildDriver(FIREFOX, NEED_IMAGES, timeout=100, width=100, height=50)
def buildDriver(driverType, needImages, **kwargs):
global driver, details
closeDriver()
timeout = 60
width=800
height = 600
x=0
y=0
for key in kwargs:
print("another keyword arg: %s: %s" % (key, kwargs[key]))
if key=="timeout": timeout = int(float(kwargs[key]))
if key == "width": width = int(float(kwargs[key]))
if key == "height": height = int(float(kwargs[key]))
if key == "x": x = int(float(kwargs[key]))
if key == "y": y = int(float(kwargs[key]))
details['driverType'] = driverType
details['needImages'] = needImages
if driverType == FIREFOX:
if driver == None:
firefox_profile = webdriver.FirefoxProfile()
if needImages == False:
firefox_profile.set_preference('permissions.default.image', 2)
firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
firefox_profile.set_preference("http.response.timeout", timeout)
firefox_profile.set_preference("dom.max_script_run_time", timeout)
driver = webdriver.Firefox(firefox_profile=firefox_profile)
driver.set_window_size(width, height)
return driver
if driverType == PHANTOM:
if driver == None:
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = user_agent_mozilla
dcap["phantomjs.page.settings.resourceTimeout"] = timeout*1000 # in mls
# dcap["phantomjs.page.settings.loadImages"] = needImages
# driver = webdriver.PhantomJS(desired_capabilities=dcap)
service_argsA = []
if needImages == False:
service_argsA = ['--load-images=no']
driver = webdriver.PhantomJS(PHANTOMJS_PATH, desired_capabilities=dcap, service_args=service_argsA)
driver.set_window_size(width, height)
return driver
def openPage(url): # need to prevent of opening pages twice
global driver
global opened_pages_counter
if driver == None:
driver = buildDriver(PHANTOM, NO_IMAGES)
if driver.current_url != url:
driver.get(url)
opened_pages_counter = opened_pages_counter + 1
uprint("universal_driver::", details['driverType'], ", needImages:", details['needImages'], " ; page opened:", url)
def closeDriver():
global driver
if driver == None:
return
driver.close()
driver.quit()
driver = None
uprint("universal_driver:: driver closed")
def uprint(*objects, sep=' ', end='\n', file=sys.stdout):
enc = file.encoding
if enc == 'UTF-8':
print(*objects, sep=sep, end=end, file=file)
else:
f = lambda obj: str(obj).encode(enc, errors='backslashreplace').decode(enc)
print(*map(f, objects), sep=sep, end=end, file=file)