Python read html addresses from a file and post all results

Python read html addresses from a file and post all results - python

target = open("addresses.txt", 'r+')
for line in target:
number = requests.get(line)
print (number)
this is clearly wrong but I'm stuck it should extract the addresses from .txt check on the net via api and print the result
So that it should read the content of each address i.e
0
0
my addresses.txt contain
http://chainz.cryptoid.info/cure/api.dws?Key=3972cc3ec73f&q=getbalance&a=BPzWE91tLTGRAqTByE4AvbX79vgYGGc9ye
http://chainz.cryptoid.info/cure/api.dws?Key=3972cc3ec73f&q=getbalance&a=BPzWE91tLTGRAqTByE4AvbX79vgYGGc9pt

Do this:
with open("addresses.txt") as addresses:
for address in addresses.readlines():
response = requests.get(address)
print(response)

Try something like:
import requests
with open("addresses.txt", 'r') as target:
for line in target:
r = requests.get(line)
print(r.text)

Related

Timestring passed into URL to output JSON file - Python API Call

I'm getting the following error for my python scraper:
import requests
import json
symbol_id = 'COINBASE_SPOT_BTC_USDT'
time_start = '2022-11-20T17:00:00'
time_end = '2022-11-21T05:00:00'
limit_levels = 100000000
limit = 100000000
url = 'https://rest.coinapi.io/v1/orderbooks/{symbol_id}/history?time_start={time_start}limit={limit}&limit_levels={limit_levels}'
headers = {'X-CoinAPI-Key' : 'XXXXXXXXXXXXXXXXXXXXXXX'}
response = requests.get(url, headers=headers)
print(response)
with open('raw_coinbase_ob_history.json', 'w') as json_file:
json.dump(response.json(), json_file)
with open('raw_coinbase_ob_history.json', 'r') as handle:
parsed = json.load(handle)
with open('coinbase_ob_history.json', 'w') as coinbase_ob:
json.dump(parsed, coinbase_ob, indent = 4)
<Response [400]>
And in my written json file, I'm outputted
{"error": "Wrong format of 'time_start' parameter."}
I assume a string goes into a url, so I flattened the timestring to a string. I don't understand why this doesn't work. This is the documentation for the coinAPI call I'm trying to make with 'timestring'. https://docs.coinapi.io/?python#historical-data-get-4

Incorrect syntax for python. To concatenate strings, stick them together like such:
a = 'a' + 'b' + 'c'

string formatting is invalid, and also need use & in between different url params
# python3
url = f"https://rest.coinapi.io/v1/orderbooks/{symbol_id}/history?time_start={time_start}&limit={limit}&limit_levels={limit_levels}"
# python 2
url = "https://rest.coinapi.io/v1/orderbooks/{symbol_id}/history?time_start={time_start}&limit={limit}&limit_levels={limit_levels}".format(symbol_id=symbol_id, time_start=time_start, limit=limit, limit_levels=limit_levels)
https://docs.python.org/3/tutorial/inputoutput.html
https://docs.python.org/2/tutorial/inputoutput.html

How to add strings in the file at the end of all lines

I am trying to download files using python and then add lines at the end of the downloaded files, but it returns an error:
f.write(data + """<auth-user-pass>
TypeError: can't concat str to bytes
Edit: Thanks, it works now when I do this b"""< auth-user-pass >""", but I only want to add the string at the end of the file. When I run the code, it adds the string for every line.
I also tried something like this but it also did not work: f.write(str(data) + "< auth-user-pass >")
here is my full code:
import requests
from multiprocessing.pool import ThreadPool
def download_url(url):
print("downloading: ", url)
# assumes that the last segment after the / represents the file name
# if url is abc/xyz/file.txt, the file name will be file.txt
file_name_start_pos = url.rfind("/") + 1
file_name = url[file_name_start_pos:]
save_path = 'ovpns/'
complete_path = os.path.join(save_path, file_name)
print(complete_path)
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(complete_path, 'wb') as f:
for data in r:
f.write(data + """<auth-user-pass>
username
password
</auth-user-pass>""")
return url
servers = [
"us-ca72.nordvpn.com",
"us-ca73.nordvpn.com"
]
urls = []
for server in servers:
urls.append("https://downloads.nordcdn.com/configs/files/ovpn_legacy/servers/" + server + ".udp1194.ovpn")
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, urls)
for r in results:
print(r)

EDIT: Thanks, it works now when I do this b"""< auth-user-pass >""", but I only want to add the string at the end of the file. When I run the code, it adds the string for every line.
Try this:
import requests
from multiprocessing.pool import ThreadPool
def download_url(url):
print("downloading: ", url)
# assumes that the last segment after the / represents the file name
# if url is abc/xyz/file.txt, the file name will be file.txt
file_name_start_pos = url.rfind("/") + 1
file_name = url[file_name_start_pos:]
save_path = 'ovpns/'
complete_path = os.path.join(save_path, file_name)
print(complete_path)
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(complete_path, 'wb') as f:
for data in r:
f.write(data)
return url
servers = [
"us-ca72.nordvpn.com",
"us-ca73.nordvpn.com"
]
urls = []
for server in servers:
urls.append("https://downloads.nordcdn.com/configs/files/ovpn_legacy/servers/" + server + ".udp1194.ovpn")
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, urls)
with open(complete_path, 'ab') as f:
f.write(b"""<auth-user-pass>
username
password
</auth-user-pass>""")
for r in results:
print(r)

You are using binary mode, encode your string before concat, that is replace
for data in r:
f.write(data + """<auth-user-pass>
username
password
</auth-user-pass>""")
using
for data in r:
f.write(data + """<auth-user-pass>
username
password
</auth-user-pass>""".encode())

You open the file as a write in binary.
Because of that you cant use normal strings like the comment from #user56700 said.
You either need to convert the string or open it another way(ex. 'a' = appending).
Im not completly sure but it is also possible that the write binary variant of open the data of the file deletes. Normally open with write deletes existing data, so its quite possible that you need to change it to 'rwb'.

Script to check status code of URLs using Python

I want to write script that accept multiple URLs through list or text file and append some string at the end of each URL and check https status code (200, 401 and 403)of each URL and save in separate files.
Here's my code so far:
lst = {'back.sql',
'backup.sql',
'accounts.sql',
'backups.sql',
'clients.sql',
'customers.sql',
'data.sql',
'database.sql',
'database.sqlite',
'users.sql',
'db.sql',
'db.sqlite',
'db_backup.sql',
'dbase.sql',
'dbdump.sql',
'setup.sql',
'sqldump.sql',
'dump.sql',
'mysql.sql',
'sql.sql',
'temp.sql'
}
url_test = 'http://www.Holiday.com/%s/' #This can be modified to accept multiple URLs
for i in lst:
url = url_test %i
print(url) #This can be modified to save results for each http status code

If you want, check status code you have to request each page one by one
from requests import get
lst = {'back.sql',
'backup.sql',
'accounts.sql',
'backups.sql',
'clients.sql',
'customers.sql',
'data.sql',
'database.sql',
'database.sqlite',
'users.sql',
'db.sql',
'db.sqlite',
'db_backup.sql',
'dbase.sql',
'dbdump.sql',
'setup.sql',
'sqldump.sql',
'dump.sql',
'mysql.sql',
'sql.sql',
'temp.sql'
}
url_test = ['http://www.Holiday.com/%s/'] #Create list of url
result_dict = dict()
for i in lst:
for url_from_list in url_test:
url = url_from_list %i
# request and get status code from each page one by one
result_dict[url] = get(url).status_code
result_dict will be a dictionary which will contain url as key and response code as value
Then save it to file
with open('filename.txt', 'w') as file:
for url, status_code in result_dict.items():
line = url+" "+str(status_code)+"\n"
file.write(line)

loading json from text file

I am trying to run this code but it creates error.
import json
import requests
import pprint
data = []
with open('data.txt') as o1:
for line in o1:
data.append(json.loads(line))
print(data)
print(" \n")
print(data)
url = 'http://xyz.abcdfx.in/devicedata'
body_json=json.dumps(data)
headers = {'Content-Type':'application/json'}
d = requests.post(url, data = body_json, headers=headers)
pprint.pprint(d.json())
it shows
Value Error: No json object could be Decoded
I am new to programming and not able to figure out what is the problem.

It seems like you are trying to parse the json file line by line, but the json objects may (and usually are) span more than one line. You need to have the entire file in order to parse it:
with open('data.txt') as o1:
data = json.loads(o1.read()) # read ALL the file and parse. no loops
print(data)

i solved my problem using this:
data =[]
with open('data.txt') as f:
for line in f:
data = json.loads(line)
print(data)
url = 'http://xyz.abcdfx.cn/devicedata'
body_json=json.dumps(data)
headers = {'Content-Type':'application/json'}
d = requests.post(url, data = body_json, headers=headers)
pprint.pprint(d.json())

URLOpen Error while combining url with word from wordlist

Hey guys im making a Python Webcrawler at the Moment. So i have a link, which last chars are: "search?q=" and after that im using my wordlist which i have loaded before into a list. But when i try to open that with : urllib2.urlopen(url) it throws me an Error (urlopen error no host given) . But when i open that link with urllib normally (so typing the word which is normally automatic pasted in) it just works fine. So can you tell me why this is happening?
Thanks and regards
Full error:
File "C:/Users/David/PycharmProjects/GetAppResults/main.py", line 61, in <module>
getResults()
File "C:/Users/David/PycharmProjects/GetAppResults/main.py", line 40, in getResults
usock = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 402, in open
req = meth(req)
File "C:\Python27\lib\urllib2.py", line 1113, in do_request_
raise URLError('no host given')
urllib2.URLError: <urlopen error no host given>
Code:
with open(filePath, "r") as ins:
wordList = []
for line in ins:
wordList.append(line)
def getResults():
packageID = ""
count = 0
word = "Test"
for x in wordList:
word = x;
print word
url = 'http://www.example.com/search?q=' + word
usock = urllib2.urlopen(url)
page_source = usock.read()
usock.close()
print page_source
startSequence = "data-docid=\""
endSequence = "\""
while page_source.find(startSequence) != -1:
start = page_source.find(startSequence) + len(startSequence)
end = page_source.find(endSequence, start)
print str(start);
print str(end);
link = page_source[start:end]
print link
if link:
if not link in packageID:
packageID += link + "\r\n"
print packageID
page_source = page_source[end + len(endSequence):]
count+=1
So when i print the string word it outputs the correct word from the wordlist

I solved the Problem. I simply using now the urrlib instead of urllib2 and anything works fine thank you all :)

Note that urlopen() returns a response, not a request.
You may have a broken proxy configuration; verify that your proxies are working:
print(urllib.request.getproxies())
or bypass proxy support altogether with:
url = urllib.request.urlopen(
"http://www.example.com/search?q="+text_to_check
proxies={})
Sample way to combining URL with word from Wordlist. It combines the list words to get the images from the url and downloads it. Loop it around to access the whole list you have.
import urllib
import re
print "The URL crawler starts.."
mylist =["http://www.ebay","https://www.npmjs.org/"]
wordlist = [".com","asss"]
x = 1
urlcontent = urllib.urlopen(mylist[0]+wordlist[0]).read()
imgUrls = re.findall('img .*?src="(.*?)"',urlcontent)
for imgUrl in imgUrls:
img = imgUrl
print img
urllib.urlretrieve(img,str(x)+".jpg")
x= x + 1
Hope this helps, else post your code and error logs.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python read html addresses from a file and post all results - python

Do this: with open("addresses.txt") as addresses: for address in addresses.readlines(): response = requests.get(address) print(response)

Try something like: import requests with open("addresses.txt", 'r') as target: for line in target: r = requests.get(line) print(r.text)

Related

Timestring passed into URL to output JSON file - Python API Call

How to add strings in the file at the end of all lines

Script to check status code of URLs using Python

loading json from text file

URLOpen Error while combining url with word from wordlist

Categories

Resources