For a project I am trying to automate the uploading of plugins into WordPress, I am successfully logging in using requests but when attempting to upload a plugin I am getting a 403.
import requests
import re
import bs4 as bs
host ='testdomain.xx'
user = 'user'
pword = 'pass'
wp_admin = 'http://' + host + '/wp-admin/'
wp_login = 'http://' + host + '/wp-login.php'
wpplugin = 'http://' + host + '/wp-admin/update.php?action=upload-plugin'
with requests.Session() as session:
headers1 = { 'Cookie':'wordpress_test_cookie=WP Cookie check','User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7' }
datas ={'log':user,'pwd':pword,'wp-submit':'Log In','redirect_to':wp_admin, 'testcookie':'1'
}
session.post(wp_login, headers=headers1, data=datas)
session_cookie = session.cookies
cookie_dict = session_cookie.get_dict()
wp = session.get('http://'+host+'/wp-admin/plugins.php')
soup = bs.BeautifulSoup(wp.text,'html.parser')
inputsoup = (soup.find_all('input',{"id":"_wpnonce"}))
inputsoup = str(inputsoup).split()
for i in inputsoup:
if 'value' in i:
wp_nonce = i.replace('value="','').replace('"/>]','')
url = 'http://'+host+'/wp-admin/update.php?action=upload-plugin'
data = {
'_wp_http_referer': ("/wordpress/wp-admin/plugin-install.php"),
'_wpnonce': (wp_nonce),
'install-plugin-submit': ('Install Now'),
'pluginzip': ('plugin.zip', open('./plugin.zip','rb'),
'application/octet-stream')
}
upload_result = session.post(url, data=data)
print(upload_result)
Related
I m trying to get data using requests web scraping from this web site https://enlinea.sunedu.gob.pe/verificainscripcion, the parameter is a doc in the example 06950413, and the captcha, also a hide parameter called _token, I got using xpath, so in the case of the captcha I get the image using xpath too and also I downloaded the image in a imagenes folder, after that I wait the captcha using the input() while I type the captcha letters in a captcha.txt, next I type the captcha i hit enter to continue but, I got a response json captcha error. this is my code:
from time import sleep
import requests
from lxml import html
from PIL import Image # pip install Pillow
import io
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebkit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36",
"Host": "enlinea.sunedu.gob.pe",
"Origin": "https://enlinea.sunedu.gob.pe",
"Referer": "https://enlinea.sunedu.gob.pe/verificainscripcion",
}
session = requests.Session()
login_form_url = 'https://enlinea.sunedu.gob.pe/verificainscripcion'
login_form_res = session.get(login_form_url, headers=headers)
sleep(5)
parser = html.fromstring(login_form_res.text)
special_token = parser.xpath('//input[#name="_token"]/#value')
print('token:', special_token[0])
span_image = parser.xpath('//div[#class="pull-right"]/span[#id="captchaImgPriv"]/img')[0].get("src")
print(span_image)
image_content = requests.get(span_image).content
image_file = io.BytesIO(image_content)
image = Image.open(image_file).convert('RGB')
file_path = './imagenes/captcha.jpg'
with open(file_path, 'wb') as f:
image.save(f, "JPEG", quality=85)
input()
login_url = 'https://enlinea.sunedu.gob.pe/consulta'
login_data = {
"doc": "06950413",
"opcion": 'PUB',
"_token": special_token[0],
"icono": '',
"captcha": open('captcha.txt').readline().strip()
}
print(login_data)
rep = session.post(
login_url,
data=login_data,
headers=headers
)
print(rep.text)
Thanks in advance.
The issue was that you didn't use the session when you built the captcha image request. Requesting the image gives a cookie that should be sent back with the form :
The following script use beautifulsoup instead of xpath/lxml, download the captcha, shows the captcha, waits for user input and gets the data :
import requests
from bs4 import BeautifulSoup
from PIL import Image
import io
import json
host = "https://enlinea.sunedu.gob.pe"
doc = "06950413"
s = requests.Session()
r = s.get(f"{host}/verificainscripcion")
soup = BeautifulSoup(r.text, "html.parser")
payload = dict([
(t["name"],t.get("value",""))
for t in soup.find("form", {"id": "consultaForm"}).find_all("input")
])
payload["doc"] = doc
image_content = s.get(f'{host}/simplecaptcha').content
image_file = io.BytesIO(image_content)
image = Image.open(image_file).convert('RGB')
image.show()
captcha = input("please enter captcha : ")
payload["captcha"] = captcha
print(payload)
r = s.post("https://enlinea.sunedu.gob.pe/consulta", data = payload)
data = json.loads(r.text)
print(data)
output :
please enter captcha : YLthP
{'doc': '06950413', 'opcion': 'PUB', '_token': 'gfgUTJrqPcmM9lyFHqW0u5aOdoF4gNSJm60kUNRu', 'icono': '', 'nombre': '', 'captcha': 'YLthP'}
[{"ID":"1519729","NOMBRE":"OSORIO DELGADILLO, FLOR DE MARIA","DOC_IDENT":"DNI 06950413","GRADO":"<b>BACHILLER EN EDUCACION ","TITULO_REV":"<b>BACHILLER EN EDUCACION","GRADO_REV":null,"DIPL_FEC":"26\/06\/1987","RESO_FEC":"-","ESSUNEDU":"0","UNIV":"UNIVERSIDAD INCA GARCILASO DE LA VEGA ASOCIACI\u00d3N CIVIL","PAIS":"PERU","COMENTARIO":"-","TIPO":"N","TIPO_GRADO":"B","DIPL_TIP_EMI":null,"TIPO_INSCRI":null,"NUM_DIPL_REVA":null,"NUM_ORD_PAG":null,"V_ORIGEN":null,"NRO_RESOLUCION_NULIDAD":null,"FLG_RESOLUCION_NULIDAD":null,"FECHA_RESOLUCION_NULIDAD":null,"MODALIDAD_ESTUDIO":"-"}]
I am trying to create a script that will submit a form and return me the results. I am able to pull the form information from the URL but I am not able to update the fields of the form or get a response.
I currently have:
import requests
from bs4 import BeautifulSoup as bs
url = 'https://dos.elections.myflorida.com/campaign-finance/contributions/'
response = requests.get(url)
soup = bs(response.text)
form_info = soup.find_all('action')
print(form_info[0]['action'])
Which works and returns:
'/cgi-bin/contrib.exe'
This form should be able to be submitted with the defaults, so I then try:
session = requests.Session()
BASE_URL = 'https://dos.elections.myflorida.com'
headers = {'User-Agent': "Mozilla/5.0" , 'referer' :'{}/campaign-finance/contributions/'.format(BASE_URL)}
data = {'Submit' : 'Submit'}
res = session.post( '{}/cgi-bin/contrib.exe'.format(BASE_URL), data = data, headers = headers )
And I get a 502 Response. I did the referer and url in the form they are in because of this post.
https://dos.elections.myflorida.com/campaign-finance/contributions/
and the results redirect me to:
https://dos.elections.myflorida.com/cgi-bin/contrib.exe
The solution by SIM worked, thanks!!
Try the following to get the required content using default search:
import requests
from bs4 import BeautifulSoup
link = 'https://dos.elections.myflorida.com/campaign-finance/contributions/'
post_url = 'https://dos.elections.myflorida.com/cgi-bin/contrib.exe'
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
r = s.get(link)
soup = BeautifulSoup(r.text,"lxml")
payload = {i['name']:i.get('value','') for i in soup.select('input[name]')}
payload['election'] = '20201103-GEN'
payload['search_on'] = '1'
payload['CanNameSrch'] = '2'
payload['office'] = 'All'
payload['party'] = 'All'
payload['ComNameSrch'] = '2'
payload['committee'] = 'All'
payload['namesearch'] = '2'
payload['csort1'] = 'NAM'
payload['csort2'] = 'CAN'
payload['queryformat'] = '2'
r = s.post(post_url,data=payload)
print(r.text)
I am trying to crawl my person information from a website that requires login.
My work is below:
#coding:utf-8
import os, time
import urllib.request
import http.cookiejar
import requests
from bs4 import BeautifulSoup
# =================== urls ===================
login_url = 'https://passport.jd.com/uc/login'
info_url = 'http://i.jd.com/user/info'
# =================== post_data gathering ===================
login = urllib.request.urlopen(login_url )
loginSoup = BeautifulSoup(login,'html.parser')
uuid = loginSoup.find_all('form')[0].find_all('input')[0]['value']
clrName = loginSoup.find_all('form')[0].find_all('input')[6]['name']
clrValue = loginSoup.find_all('form')[0].find_all('input')[6]['value']
# jd login page captcha
checkPicUrl = loginSoup.find_all('div', id = 'o-authcode')[0].find_all('img')[0]['src2']
print(checkPicUrl)
# print(get_html('http:'+checkPicUrl))
image = urllib.request.urlopen('http:'+ checkPicUrl + '&yys=' + str(int(time.time() *1000)))
if image.getcode() == 200:
urllib.request.urlretrieve('http:'+ checkPicUrl , "checkPic.jpg")
else:
print('unable to get image!')
os.startfile('E:\Projects\Python\jd_scrapy\checkPic.jpg')
checkCode = input('enter captcha: ')
# # =================== cookie ===================
# # Storing cookies in cj variable
# cj = http.cookiejar.CookieJar()
# # Defining a handler for later http operations with cookies(cj_).
# opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
# urllib.request.install_opener(opener)
# =================== login ===================
login_info = {
'chkRememberMe': 'on',
'loginname':'<USERSNAME>',
'nloginpwd':'<PASSWORD>',
'loginpwd':'<PASSWORD>',
'machineNet':'',
'machineCpu':'',
'machineDisk':'',
str(clrName):str(clrValue),
'uuid':uuid,
'authcode': checkCode
}
post_data = urllib.parse.urlencode(login_info).encode(encoding = 'UTF-8')
session = requests.session()
# header
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
}
#login
passport = session.post(login_url, data = post_data, headers = headers)
print(passport) # <Response [200]>
# ============== page after login ==============
info_page = urllib.request.urlopen(info_url)
info = info_page.read()
print(info)
The program runs fine with whatever captcha I enter, it returns . I expected to see the raw html page after login but it shows nothing.
I don't think this program really login me into the account. Can anyone tell me how?
I want to save some images from a website using python urllib2 but when I run the code it saves something else.
This is my code:
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
url = "http://m.jaaar.com/"
r = urllib2.Request(url, headers=headers)
page = urllib2.urlopen(r).read()
soup = BeautifulSoup(page)
imgTags = soup.findAll('img')
imgTags = imgTags[1:]
for imgTag in imgTags:
imgUrl = "http://www.jaaar.com" + imgTag['src']
imgUrl = imgUrl[0:-10] + imgUrl[-4:]
fileName = "khabarnak-" + imgUrl[-12:]
print fileName
imgData = urllib2.urlopen(imgUrl).read()
print imgUrl
output = open("C:\wamp\www\py\pishkhan\\" + fileName,'wb')
output.write(imgData)
output.close()
Any suggestions?
The site is returning a standard image back to you because you are scraping the site. Use the same 'trick' of setting the headers when retrieving the image:
imgRequest = urllib2.Request(imgUrl, headers=headers)
imgData = urllib2.urlopen(imgRequest).read()
Ok, so I found a basic script to log in to facebook using python a while back. It didn't work - but after some tweaking (mainly around updating the post strings) it worked well for quite a while. Now it's stopped again - I suspect because facebook have changed their site a little.
I've tried making further tweaks having captured a login in Firefox and making sure I mimic as many of the post values etc as possible.
I need to log in to the site directly as I have a bunch of scripts that collect data that's available through a browser, but not through the API.
Having spent days trying to fix this I'm still drawing a blank... what am I missing?
import sys
import re
import urllib
import urllib2
import cookielib
import json
def main():
# Check the arguments
user = sys.argv[1]
passw = sys.argv[2]
# Initialize the needed modules
CHandler = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
browser = urllib2.build_opener(CHandler)
browser.addheaders = [('Referer', 'http://login.facebook.com'),
('Content-Type', 'application/x-www-form-urlencoded'),
('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)')]
urllib2.install_opener(browser)
res = browser.open('http://m.facebook.com/index.php')
pg=res.read()
mxt = re.search('name="li" value="(\w+)"', pg)
mxt2 = re.search('name="m_ts" value="(\w+)"', pg)
mxt3 = re.search('name="lsd" value="(\w+)"', pg)
mxt4 = re.search('name="locale" value="(\w+)"', pg)
li = mxt.group(1)
m_ts = mxt2.group(1)
lsd = mxt3.group(1)
locale = mxt4.group(1)
res.close()
# Initialize the POST data
data = urllib.urlencode({
'lsd' : lsd,
'charset_test' : urllib.unquote_plus('%E2%82%AC%2C%C2%B4%2C%E2%82%AC%2C%C2%B4%2C%E6%B0%B4%2C%D0%94%2C%D0%84'),
'version' : '1',
'm_ts' : m_ts,
'li' : li,
'locale' : locale,
'signup_layout' : 'header_button',
'laststage' :'first',
'post_form_id' : pfi,
'email' : user,
'pass' : passw,
'login' : 'Log in'
})
url='https://login.facebook.com/login.php?login_attempt=1&non_com_login=&'+ data
res = urllib2.urlopen(url)
print ('%s' % url)
res.close()
# Get Access Token
res = browser.open('http://developers.facebook.com/docs/reference/api')
conft = res.read()
# For Debugging
fh = open('debug.html', 'w')
fh.write(conft)
fh.close
mat = re.search('access_token=(.*?)"', conft)
acct = mat.group(1)
print ('Using access token: %s' % acct)
For the record, here is the working answer for the above.
#!/usr/bin/python
import mechanize
browser = mechanize.Browser()
browser.set_handle_robots(False)
cookies = mechanize.CookieJar()
browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.41 Safari/534.7')]
browser.open("http://m.facebook.com/")
browser.select_form(nr=0)
browser.form['email'] = 'YOUR_LOGIN'
browser.form['pass'] = 'YOUR_PASSWORD'
response = browser.submit()