I am working with an encrypted file, but I can't manage to create a loop with for in order to read it before it get closed and removed.
My intention is to read the data given in the encrypted file and loop it to assign each line to a variable.
Whenever I execute my code, Python just goes straight to finish, without working with the decrypted info; I believe it is because the with command close it before the loop starts.
This is what I want, not working, no errors either:
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with tempfile.TemporaryFile() as fp:
fp.write(encrypted)
for url in fp: #Python ignores the tempfile. I belive it is closed in the previous line.
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
This is the working code (Sorry, tried to make it shorter but couldn't):
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with open(output_file, 'wb') as fp:
fp.write(encrypted)
with open(output_file) as fp:
for url in fp:
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Header for both examples (apart to make it shorter):
#python 3.6.6
from urllib.request import urlopen
import urllib.request
from os.path import join as pjoin
import re, os, sys, tempfile, six, ctypes, time, fileinput
from cryptography.fernet import Fernet
print("[*] Checking list.dat for consistency . . .")
key = b'wTmVBRLytAmlfkctCuEf59K0LDCXa3sGas3kPg3r4fs=' #Decrypt list.dat
input_file = 'List.dat'
output_file = 'List.txt'
List.txt content:
errors
classes
stdlib
Any hints?
The problem is that once you have written to the file, the "file pointer" is at the end of the file. There's nothing to read.
You can use the seek method to reposition the file pointer at the beginning. Alternatively, closing and re-opening the file (as in your working code) will position the pointer at the beginning of the file.
#LarryLustig pretty much answered why your code wasn't working, but IMO if you eliminate the temp file altogether (which shouldn't be necessary) you don't even need to worry about the cursor. See below commented changes on your desired code.
# We'll use os.linesep to get the line terminator string for your os.
import os
...
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
# decode your decrypted bytes into strings. Change 'utf-8' into whichever file encoding you're using if necessary.
decrypted = fernet.decrypt(data).decode('utf-8')
# Don't write to a temp file
# Iterate directly on each line of the extracted data
for url in decrypted.split(os.linesep):
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Alternatively, if you know for sure what is the line terminator used in the file (e.g. \r\n, or \n) then you can eliminate using os.linesep altogether.
Related
I am trying to download files using python and then add lines at the end of the downloaded files, but it returns an error:
f.write(data + """<auth-user-pass>
TypeError: can't concat str to bytes
Edit: Thanks, it works now when I do this b"""< auth-user-pass >""", but I only want to add the string at the end of the file. When I run the code, it adds the string for every line.
I also tried something like this but it also did not work: f.write(str(data) + "< auth-user-pass >")
here is my full code:
import requests
from multiprocessing.pool import ThreadPool
def download_url(url):
print("downloading: ", url)
# assumes that the last segment after the / represents the file name
# if url is abc/xyz/file.txt, the file name will be file.txt
file_name_start_pos = url.rfind("/") + 1
file_name = url[file_name_start_pos:]
save_path = 'ovpns/'
complete_path = os.path.join(save_path, file_name)
print(complete_path)
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(complete_path, 'wb') as f:
for data in r:
f.write(data + """<auth-user-pass>
username
password
</auth-user-pass>""")
return url
servers = [
"us-ca72.nordvpn.com",
"us-ca73.nordvpn.com"
]
urls = []
for server in servers:
urls.append("https://downloads.nordcdn.com/configs/files/ovpn_legacy/servers/" + server + ".udp1194.ovpn")
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, urls)
for r in results:
print(r)
EDIT: Thanks, it works now when I do this b"""< auth-user-pass >""", but I only want to add the string at the end of the file. When I run the code, it adds the string for every line.
Try this:
import requests
from multiprocessing.pool import ThreadPool
def download_url(url):
print("downloading: ", url)
# assumes that the last segment after the / represents the file name
# if url is abc/xyz/file.txt, the file name will be file.txt
file_name_start_pos = url.rfind("/") + 1
file_name = url[file_name_start_pos:]
save_path = 'ovpns/'
complete_path = os.path.join(save_path, file_name)
print(complete_path)
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(complete_path, 'wb') as f:
for data in r:
f.write(data)
return url
servers = [
"us-ca72.nordvpn.com",
"us-ca73.nordvpn.com"
]
urls = []
for server in servers:
urls.append("https://downloads.nordcdn.com/configs/files/ovpn_legacy/servers/" + server + ".udp1194.ovpn")
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, urls)
with open(complete_path, 'ab') as f:
f.write(b"""<auth-user-pass>
username
password
</auth-user-pass>""")
for r in results:
print(r)
You are using binary mode, encode your string before concat, that is replace
for data in r:
f.write(data + """<auth-user-pass>
username
password
</auth-user-pass>""")
using
for data in r:
f.write(data + """<auth-user-pass>
username
password
</auth-user-pass>""".encode())
You open the file as a write in binary.
Because of that you cant use normal strings like the comment from #user56700 said.
You either need to convert the string or open it another way(ex. 'a' = appending).
Im not completly sure but it is also possible that the write binary variant of open the data of the file deletes. Normally open with write deletes existing data, so its quite possible that you need to change it to 'rwb'.
There is an algorithm in the end of the text. It reads lines from the file SP500.txt. File contains strings and it looks like:
AAA
BBB
CCC
Substitutes these strings in the get request and saves the entire url to a file url_requests.txt. For the example:
https://apidate.com/api/api/AAA.US?api_token=XXXXXXXX&period=d
https://apidate.com/api/api/BBB.US?api_token=XXXXXXXX&period=d
https://apidate.com/api/api/CCC.US?api_token=XXXXXXXX&period=d
and then processes each request via the API and adds all responses to get requests to responses.txt.
I don't know how to save the response from each request from the file url_requests.txt into separate csv file instead of responses.txt (now they are all written to this file, and not separately). In this case, it is important to name each file with the corresponding line from the file SP500.txt. For example:
AAA.csv `(which contains data from the request response https://apidate.com/api/api/AAA.US?api_token=XXXXXXXX&period=d)`
BBB.csv `(which contains data from the request response https://apidate.com/api/api/BBB.US?api_token=XXXXXXXX&period=d)`
CCC.csv `(which contains data from the request response https://apidate.com/api/api/CCC.US?api_token=XXXXXXXX&period=d)`
So, algorithm is:
import requests
# to use strip to remove spaces in textfiles.
import sys
# two variables to squeeze a string between these two so it will become a full uri
part1 = 'https://apidate.com/api/api/'
part2 = '.US?api_token=XXXXXXXX&period=d'
# open the outputfile before the for loop
text_file = open("url_requests.txt", "w")
# open the file which contains the strings
with open('SP500.txt', 'r') as f:
for i in f:
uri = part1 + i.strip(' \n\t') + part2
print(uri)
text_file.write(uri)
text_file.write("\n")
text_file.close()
# open a new file textfile for saving the responses from the api
text_file = open("responses.txt", "w")
# send every uri to the api and write the respones to a textfile
with open('url_requests.txt', 'r') as f2:
for i in f2:
uri = i.strip(' \n\t')
batch = requests.get(i)
data = batch.text
print(data)
text_file.write(data)
text_file.write('\n')
text_file.close()
And I know how to save csv from this response. It is like:
import csv
import requests
url = "https://apidate.com/api/api/AAA.US?api_token=XXXXXXXX&period=d"
response = requests.get(url)
with open('out.csv', 'w') as f:
writer = csv.writer(f)
for line in response.iter_lines():
writer.writerow(line.decode('utf-8').split(','))
To save in different names you have to use open() and write() inside for-loop when you read data.
It would good to read all names to list and later generate urls and also keep on list so you would not have to read them.
When I see code which you use to save csv then it looks like you get csv from server so you could save all at once using open() write() without csv module.
I see it in this way.
import requests
#import csv
# --- read names ---
all_names = [] # to keep all names in memory
with open('SP500.txt', 'r') as text_file:
for line in text_file:
line = line.strip()
print('name:', name)
all_names.append(line)
# ---- generate urls ---
url_template = 'https://apidate.com/api/api/{}.US?api_token=XXXXXXXX&period=d'
all_uls = [] # to keep all urls in memory
with open("url_requests.txt", "w") as text_file:
for name in all_names:
url = url_template.format(name)
print('url:', url)
all_uls.append(url)
text_file.write(url + "\n")
# --- read data ---
for name, url in zip(all_names, all_urls):
#print('name:', name)
#print('url:', url)
response = requests.get(url)
with open(name + '.csv', 'w') as text_file:
text_file.write(response.text)
#writer = csv.writer(text_file)
#for line in response.iter_lines():
# writer.writerow(line.decode('utf-8').split(',')
You could calculate a filename for every string i, and open (create) a file each time.
Something like this:
import sys
import requests
# two variables to squeeze a string between these two so it will become a full uri
part1 = 'https://apidate.com/api/api/'
part2 = '.US?api_token=XXXXXXXX&period=d'
# open the outputfile before the for loop
text_file = open("url_requests.txt", "w")
uri_dict = {}
with open('SP500.txt', 'r') as f:
for i in f:
uri = part1 + i.strip(' \n\t') + part2
print(uri)
text_file.write(uri)
text_file.write("\n")
uri_dict[i] = uri
text_file.close()
for symbol, uri in uri_dict:
batch = requests.get(uri)
data = batch.text
print(data)
#create the filename
filename = symbol+".csv"
#open (create) the file and save the data
with open(filename, "w") as f:
f.write(data)
f.write('\n')
You could also get rid of url_requests.csv, which becomes useless (until you have other uses for it).
I am querying an API from a website. The API will be down for maintenance from time to time and also, there may not be data available for querying at times. I have written the code to keep forcing the program to query the API even after an error, however it doesn't seem to be working.
The following is the code:
import threading
import json
import urllib
from urllib.parse import urlparse
import httplib2 as http #External library
import datetime
import pyodbc as db
import os
import gzip
import csv
import shutil
def task():
#Authentication parameters
headers = { 'AccountKey' : 'secret',
'accept' : 'application/json'} #this is by default
#API parameters
uri = 'http://somewebsite.com/' #Resource URL
path = '/something/TrafficIncidents?'
#Build query string & specify type of API call
target = urlparse(uri + path)
print(target.geturl())
method = 'GET'
body = ''
#Get handle to http
h = http.Http()
#Obtain results
response, content = h.request(target.geturl(), method, body, headers)
api_call_time = datetime.datetime.now()
filename = "traffic_incidents_" + str(datetime.datetime.today().strftime('%Y-%m-%d'))
createHeader = 1
if os.path.exists(filename + '.csv'):
csvFile = open(filename + '.csv', 'a')
createHeader = 0
else:
#compress previous day's file
prev_filename = "traffic_incidents_" + (datetime.datetime.today()-datetime.timedelta(days=1)).strftime('%Y-%m-%d')
if os.path.exists(prev_filename + '.csv'):
with open(prev_filename + '.csv' , 'rb') as f_in, gzip.open(prev_filename + '.csv.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(prev_filename + '.csv')
#create new csv file for writing
csvFile = open(filename + '.csv', 'w')
#Parse JSON to print
jsonObj = json.loads(content)
print (json.dumps(jsonObj, sort_keys=True, indent=4))
with open("traffic_incidents.json","w") as outfile:
#Saving jsonObj["d"]
json.dump(jsonObj, outfile, sort_keys=True, indent=4,ensure_ascii=False)
for i in range(len(jsonObj["value"])):
jsonObj["value"][i]["IncidentTime"] = jsonObj["value"][i]["Message"].split(' ',1)[0]
jsonObj["value"][i]["Message"] = jsonObj["value"][i]["Message"].split(' ',1)[1]
jsonObj["value"][i]["ApiCallTime"] = api_call_time
#Save to csv file
header = jsonObj["value"][0].keys()
csvwriter = csv.writer(csvFile,lineterminator='\n')
if createHeader == 1:
csvwriter.writerow(header)
for i in range(len(jsonObj["value"])):
csvwriter.writerow(jsonObj["value"][i].values())
csvFile.close()
t = threading.Timer(120,task)
t.start()
while True:
try:
task()
except IndexError:
pass
else:
break
I get the following error and the program stops:
"header = jsonObj["value"][0].keys()
IndexError: list index out of range"
I would like the program to keep running even after the IndexError has occured.
How can I edit the code to achieve that?
My requirement is to open a properties file and update the file, for update purpose i need to search for a specific string which stores the url information. For this purpose i have written the below code in python:
import os
owsURL="https://XXXXXXXXXXXXXX/"
reowsURL = "gStrOwsEnv = " + owsURL + "/" + "OWS_WS_51" + "/"
fileName='C:/Users/XXXXXXXXXXX/tempconf.properties'
if not os.path.isfile(fileName):
print("!!! Message : Configuraiton.properties file is not present ")
else:
print("+++ Message : Located the configuration.properties file")
with open(fileName) as f:
data = f.readlines()
for m in data:
if m.startswith("gStrOwsEnv"):
print("ok11")
m = m.replace(m,reowsURL)
after executing the program i am not able to update the properties file.
Any help is highly appreciated
Sample Content of file:
# ***********************************************
# Test Environment Details
# ***********************************************
# Application URL pointing to test execution
#gStrApplicationURL =XXXXXXXXXXXXXXXX/webservices/person
#gStrApplicationURL = XXXXXXXXXXXXXX/GuestAPIService/ProxyServices/
# FOR JSON
#gStrApplicationURL = XXXXXXXXXXXXXX
#SOAP_gStrApplicationURL =XXXXXXXXXXXXXXXXXXXXXXX
#(FOR WSDL PARSING)
version = 5
#v9
#SOAP_gStrApplicationURL = XXXXXXXXXXX/XXXXXXXXX/XXXXXXXXX/
#v5
SOAP_gStrApplicationURL = XXXXXXXXXXXXXXX/OWS_WS_51/
gStrApplicationXAIServerPath=
gStrEnvironmentName=XXXXXXXXX
gStrOwsEnv = XXXXXXXXXXXXXXXXXXXX/OWS_WS_51/
gStrConnectEnv = XXXXXXXXXXXXXXXXX/OWSServices/Proxy/
gStrSubscriptionKey =XXXXXXXXXXXXXXXXXXXXXX
I'm pretty sure that this is not the best way of doing that, but this is still one way:
with open(input_file_name, 'r') as f_in, open(output_file_name, 'w') as f_out:
for line in f_in:
if line.startswith("gStrOwsEnv"):
f_out.write(reowsURL)
else:
f_out.write(line)
That script copy every line of input_file_name into output_file_name except the lines that you want to change.
This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...