python unable to identify else statements - python

from requests import *
import json
import base64
import urllib
from cmd import Cmd
url = "http://api.response.htb/"
url_digest = "cab532f75001ed2cc94ada92183d2160319a328e67001a9215956a5dbf10c545"
def get(url, url_digest): data = {
"url": url,
"url_digest": url_digest,
"method": "GET",
"session": "5f7bf45b02c832cf5b40c15ab6d365af",
"session_digest": "a2b9ac69ab85795d13d12857a709a024cd729dcdf2c3fd3bb21ed514bc9990ac"
}
headers = {'Content-Type': 'application/json'}
url_proxy = "http://proxy.response.htb/fetch"
s = Session()
res = s.post(url_proxy, json=data, headers=headers)
body = json.loads(res.text)['body']
body = base64.b64decode(body)
if "zip" in url:
f = open("file.zip", "wb")
f.write(body)
f.close()
print("Done saving file :-");
else: print body
def url_de(url):
s = Session()
res = s.get('http://www.response.htb/status/main.js.php',
cookies={'PHPSESSID': url})
x = res.text.find("session_digest':'")
y = res.text.find("'};")
return res.text[x+17:y]
class pr(Cmd):
prompt = "==> "
def default(self, url): url_digest = url_de(url)
get(url, url_digest)
def do_exit(self, a): exit()
pr().cmdloop()
at line 32 vs code is giving an error message as expected expression pylance and unable to proceed further. please anyone help me to solve this error. i am getting two error one is in else and another is at return statement at line 43. so if anyone can able to identify the error and help me out to solve this please help me.

Indentation is significant in Python.
You have one line after your if indented, then lines which are not indented. This means the conditional is finished. You then have an else by itself, which is not permitted.
You likely meant:
if "zip" in url:
f = open("file.zip", "wb")
f.write(body)
f.close()
print("Done saving file :-");
else:
print(body)
But this would be improved by using a context manager:
if "zip" in url:
with open("file.zip", "wb") as f:
f.write(body)
print("Done saving file :-");
else:
print(body)

This is your code scope
Just indent 28-30 and 38-43 line , then part 1,2 will into if scope
part 3,4 into func scope

Related

Not getting all possible variables from splitting a web-scraped string

I can't get my program to get every string possible from a split.
Here is one thing I tried:
var2 = "apple banana orange"
for var in var2.split():
#Here I would put what I want to do with the variable, but I put print() to show what happens
print(var)
I got:
applebananaorange
Full Code:
import requests
response = requests.get('https://raw.githubusercontent.com/Charonum/JSCode/main/Files.txt')
responsecontent = str(response.content)
for file in responsecontent.split("\n"):
file = file.replace("b'", "")
file = file.replace("'", "")
file = file.replace(r"\n", "")
if file == "":
pass
else:
print(file)
url = 'https://raw.githubusercontent.com/Charonum/JSCode/main/code/windows/' + file + ""
wget.download(url)
What should I do?
It looks like one of the files in the list is not available. It is good practice to always wrap input/output operations with a try/except to control problems like this. The code below downloads all available files and informs you which files could not be downloaded:
import requests
import wget
from urllib.error import HTTPError
response = requests.get('https://raw.githubusercontent.com/Charonum/JSCode/main/Files.txt')
responsecontent = str(response.content)
for file in responsecontent.split("\\n"):
file = file.replace("b'", "")
file = file.replace("'", "")
file = file.replace(r"\n", "")
if file == "":
pass
else:
url = 'https://raw.githubusercontent.com/Charonum/JSCode/main/code/windows/' + file + ""
print(url)
try:
wget.download(url)
except HTTPError:
print(f"Error 404: {url} not found")
It seems to work for me replacing the for statement with this one:
for file in responsecontent.split("\\n"):
...
Instead of responsecontent = str(response.content) try:
responsecontent = response.text
and then for file in responsecontent.split().

Write output data to csv

I'm writing a short piece of code in python to check the status code of a list of URLS. The steps are
1. read the URL's from a csv file.
2. Check request code
3. Write the status code request into the csv next to the checked URL
The first two steps I've managed to do but I'm stuck with writing the output of the requests into the same csv, next to the urls. Please help.
import urllib.request
import urllib.error
from multiprocessing import Pool
file = open('innovators.csv', 'r', encoding="ISO-8859-1")
urls = file.readlines()
def checkurl(url):
try:
conn = urllib.request.urlopen(url)
except urllib.error.HTTPError as e:
print('HTTPError: {}'.format(e.code) + ', ' + url)
except urllib.error.URLError as e:
print('URLError: {}'.format(e.reason) + ', ' + url)
else:
print('200' + ', ' + url)
if __name__ == "__main__":
p = Pool(processes=1)
result = p.map(checkurl, urls)
with open('innovators.csv', 'w') as f:
for line in file:
url = ''.join(line)
checkurl(urls + "," + checkurl)
The .readlines() operation leaves the file object at the end of file. When you attempt to loop through the lines of file again, without first rewinding it (file.seek(0)) or closing and opening it again (file.close() followed by opening again), there are no lines remaining. Always recommended to use with open(...) as file construct to ensure file is closed when operation is finished.
Additionally, there appears to be an error in your input to checkurl. You have added a list (urls) to a string (",") to a function (checkurl).
You probably meant for this section to read
with open('innovators.csv', 'w') as f:
for line in urls:
url = ''.join(line.replace('\n','')) # readlines leaves linefeed character at end of line
f.write(url + "," + checkurl(url))
The checkurl function should return what you are intending to place into the csv file. You are simply printing to standard output (screen). Thus, replace your checkurl with
def checkurl(url):
try:
conn = urllib.request.urlopen(url)
ret='0'
except urllib.error.HTTPError as e:
ret='HTTPError: {}'.format(e.code)
except urllib.error.URLError as e:
ret='URLError: {}'.format(e.reason)
else:
ret='200'
return ret
or something equivalent to your needs.
Save the status in a dict. and convert it to dataframe. Then simply send it to a csv file. str(code.getcode()) will return 200 if the url is connecting else it will return an exception, for which i assigned status as '000'. So your csv file will contain url,200 if URL is connecting and url,000 if URL is not connecting.
status_dict={}
for line in lines:
try:
code = urllib.request.urlopen(line)
status = str(code.getcode())
status_dict[line] = status
except:
status = "000"
status_dict[line] = status
df = pd.Dataframe(status_dict)
df.to_csv('filename.csv')

Python Requests PUT to update image in Prestashop

I'm trying to update an existing image from a product in prestashop. I'm using Python and Requests and the following code:
import requests
import io
import mimetypes
from PIL import Image
from StringIO import StringIO
api_key = 'test'
url = "https://.../api/images/products/249/445"
file_name = 't3_6kxvzv.jpg'
fd = io.open(file_name, "rb")
content = fd.read()
fd.close()
def encode_multipart_formdata():
"""Encode files to an http multipart/form-data.
:param files: a sequence of (type, filename, value)
elements for data to be uploaded as files.
:return: headers and body.
"""
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L = []
L.append('--' + BOUNDARY)
L.append(
'Content-Disposition: form-data; \
name="%s"; filename="%s"' % ("image", file_name))
L.append('Content-Type: %s' % get_content_type(file_name))
L.append('')
L.append(content)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
headers = {
'Content-Type': 'multipart/form-data; boundary=%s' % BOUNDARY
}
return headers, body
def get_content_type(file_name):
"""Retrieve filename mimetype.
:param filename: file name.
:return: mimetype.
"""
return mimetypes.guess_type(file_name)[0] or 'application/octet- stream'
header, body = encode_multipart_formdata()
r = requests.put(url, data=body, auth=(api_key,""), headers= header)
# also tried data = content
r = requests.get(url, auth=(api_key,""))
i = Image.open(StringIO(r.content))
i.show()
I tried various PUT and POST requests with
data = content
but getting only a 400 status code.
I then tried to GET the existing image, which works fine.
The api_key has all the necessary setting to allow PUT and POST.
I then tried to read into how prestapyt is solving this problem, however after importing prestapyt I couldn't follow their documentation to add an image to a product using:
prestashop.add("https://...api/images/products/249/445", files[('image',file_name,content)])
produces:
KeyError: ('image', 't3_6kxvzv.jpg', '\xff\xd8\xff\xe0\x00\x10JFI...
I tried then to modify the encode_multipart_formdata and get_content_type functions to produce a similar solution, but cannot get past the 400 status code.
I would very much prefer to use Requests and try to understand how to update a picture to using prestapyt and a turn-key solution.
Thank you for your time!
Documentation I used:
Prestashop http://doc.prestashop.com/display/PS16/Chapter+9+-+Image+management
prestapyt https://github.com/prestapyt/prestapyt
UPDATE:
I was able to use Requests and POST to add an image to a product via:
url_2 = "https:/.../api/images/products/249"
r = requests.post(url_2, data=body, auth=(api_key,""), headers=header)
Still not able to use PUT to change or update an image.
This answer comes a little bit late, but here it is. You're reinventing the wheel, altough it's being interesting seeing how: now I understand how to build a multipart form data from scratch. I tried your code and it fails since youre joining str and bytes in your encode_multipart_formdata function:
L.append(content)
That line will raise a TypeError exception.
Requests can post multipart form data in a very simple way:
files = {'image': ('../imagepath/imagename.jpg', open('../imagepath/imagename.jpg', 'rb'), 'image/jpg')}
body, content_type = requests.models.RequestEncodingMixin._encode_files(files, {})
headers = {
"Content-Type": content_type
}
r=requests.post( url + "images/products/" + str(product_id),data=body, headers=headers)
print(r.text)
This has been tested with Python 3.7 and PrestaShop 1.7.8.2.
Couldn't get PUT to work, so instead used DELETE and POST
import requests
import io
import mimetypes
import xml.etree.ElementTree as ET
import sys
api = ''
urls =["https://.../api/images/products/22",
"https://.../api/images/products/31",
"https://.../api/images/products/37",
"https://.../api/images/products/46",
"https://.../api/images/products/212"]
def encode_multipart_formdata(file_name,content):
"""Encode files to an http multipart/form-data.
:param files: a sequence of (type, filename, value)
elements for data to be uploaded as files.
:return: headers and body.
"""
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L = []
L.append('--' + BOUNDARY)
L.append(
'Content-Disposition: form-data; \
name="%s"; filename="%s"' % ("image", file_name))
L.append('Content-Type: %s' % get_content_type(file_name))
L.append('')
L.append(content)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
headers = {
'Content-Type': 'multipart/form-data; boundary=%s' % BOUNDARY
}
return headers, body
def get_content_type(file_name):
"""Retrieve filename mimetype.
:param filename: file name.
:return: mimetype.
"""
return mimetypes.guess_type(file_name)[0] or 'application/octet-stream'
def get_image_url(url):
"""get from a given url the image url"""
r = requests.get(url, auth=(api,""))
tree = ET.fromstring(r.content)
return tree.find("image").find("declination").get("{http://www.w3.org/1999/xlink}href")
def delete_image(url):
"""deletes the image on prestashop given by url"""
url2 = get_image_url(url)
requests.delete(url2, auth=(api,""))
def load_image(file_name):
"""loads image to upload"""
fd = io.open(file_name, "rb")
content = fd.read()
fd.close()
return content, file_name
def upload_image(url, file_name):
"""uploads new image to a given url"""
content, file_name = load_image(file_name)
header, body = encode_multipart_formdata(file_name, content)
requests.post(url, data=body, auth=(api,""), headers=header)
if __name__ == "__main__":
file_name = sys.argv[1]
content, file_name = load_image(file_name)
for i in urls:
delete_image(i)
upload_image(i,file_name)
Workaround works fine, still don't understand why there has to be such a complicated way and why PUT doesn't work.

Create and read a Temporary file in a Django Command

I need to read a flow url as csv then this is what I do:
class Command(BaseCommand):
help = 'Admin command to import feed'
def _download_flow(self, url):
req = requests.get(url, stream=True)
if req.status_code == 200:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
for line in req.iter_lines():
tmp.write(line)
return tmp
raise Exception('error:{}'.format(req.status_code))
def handle(self, *args, **options):
catalog = self._download_flow(options['url'])
with open(catalog.name, 'rU') as csvfile:
reader = csv.DictReader(
csvfile,
delimiter=';',
quotechar='"')
for row in reader:
raise Exception(row)
catalog.close()
Basically, from an url, I create a temporary csv file. Then, now I want to parse this file to work with lines but I don't know why my exception is not raised. (My file has content, i've checked).
Do you have any clue to help me ?
Thanks
The problem came from the _download() method, the correct way to construct the file is:
def _download_flow(self, url):
req = requests.get(url, stream=True)
if req.status_code == 200:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
for chunk in req.iter_content():
tmp.write(chunk)
return tmp
raise Exception('error:{}'.format(req.status_code))

Check response using urllib2

I am trying access a page by incrementing the page counter using opencorporates api. But the problem is there are times when useless data is there. For example in the below url for jurisdiction_code = ae_az I get webpage showing just this:
{"api_version":"0.2","results":{"companies":[],"page":1,"per_page":26,"total_pages":0,"total_count":0}}
which is technically empty. How to check for such data and skip over this to move on to next jurisdiction?
This is my code
import urllib2
import json,os
f = open('codes','r')
for line in f.readlines():
id = line.strip('\n')
url = 'http://api.opencorporates.com/v0.2/companies/search?q=&jurisdiction_code={0}&per_page=26&current_status=Active&page={1}?api_token=ab123cd45'
i = 0
directory = id
os.makedirs(directory)
while True:
i += 1
req = urllib2.Request(url.format(id, i))
print url.format(id,i)
try:
response = urllib2.urlopen(url.format(id, i))
except urllib2.HTTPError, e:
break
content = response.read()
fo = str(i) + '.json'
OUTFILE = os.path.join(directory, fo)
with open(OUTFILE, 'w') as f:
f.write(content)
Interpret the response you get back (you already know it's json) and check if the data you want is there.
...
content = response.read()
data = json.loads(content)
if not data.get('results', {}).get('companies'):
break
...
Here's your code written with Requests and using the answer here. It is nowhere near as robust or clean as it should be, but demonstrates the path you might want to take. The rate limit is a guess, and doesn't seem to work. Remember to put your actual API key in.
import json
import os
from time import sleep
import requests
url = 'http://api.opencorporates.com/v0.2/companies/search'
token = 'ab123cd45'
rate = 20 # seconds to wait after rate limited
with open('codes') as f:
codes = [l.strip('\n') for l in f]
def get_page(code, page, **kwargs):
params = {
# 'api_token': token,
'jurisdiction_code': code,
'page': page,
}
params.update(kwargs)
while True:
r = requests.get(url, params=params)
try:
data = r.json()
except ValueError:
return None
if 'error' in data:
print data['error']['message']
sleep(rate)
continue
return data['results']
def dump_page(code, page, data):
with open(os.path.join(code, str(page) + '.json'), 'w') as f:
json.dump(data, f)
for code in codes:
try:
os.makedirs(code)
except os.error:
pass
data = get_page(code, 1)
if data is None:
continue
dump_page(code, 1, data['companies'])
for page in xrange(1, int(data.get('total_pages', 1))):
data = get_page(code, page)
if data is None:
break
dump_page(code, page, data['companies'])
I think that actually this example is not "technically empty." It contains data and is therefore technically not empty. The data just does not include any fields that are useful to you. :-)
If you want your code to skip over responses that have uninteresting data, then just check whether the JSON has the necessary fields before writing any data:
content = response.read()
try:
json_content = json.loads(content)
if json_content['results']['total_count'] > 0:
fo = str(i) + '.json'
OUTFILE = os.path.join(directory, fo)
with open(OUTFILE, 'w') as f:
f.write(content)
except KeyError:
break
except ValueError:
break
etc. You might want to report the ValueError or the KeyError, but that's up to you.

Categories