I am trying to run the following python code
Technology: Python, Selenium scraper
Device: Windows device
Getting error......
Traceback (most recent call last):
File "scraper.py", line 35, in for row in cp_url:
ValueError: I/O operation on closed file.
#!/usr/bin/python3
# Description: The Python code below will search selenium in Google.
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#EVERYTIME CHANGE THE DRIVER PATH TO THE CHROME DRIVER FOR LATEST CHROME VERSION
driver = webdriver.Chrome(
executable_path="D:\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
contents = []
filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Can not delete the file as it doesn't exists")
f = open("output1.csv", "a")
f.write("website," + "htmltag," + "type," + "id," + "classname," + "for," + "href," + "alt," + "type," + "src,"
+ "name," + "width," + "height," + "data-src,"+ 'inner-text,' + 'action,' + 'value,' + "\n")
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
links = row[0]
contents.append(links)
driver.get(links)
with open('xpathtags.csv', 'rt') as cp2_csv:
cp_url2 = csv.reader(cp2_csv)
for row1 in cp_url2:
print(row[0])
(xtype, xpathtext) = row1[0].split(';')
print(xtype, xpathtext)
contents.append(xtype)
contents.append(xpathtext)
elems = driver.find_elements_by_xpath(xpathtext)
for elem in elems:
f = open('output1.csv', 'a', encoding='utf-8')
f.write( links + ", "+ xtype + ","
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('id')) + ', '
+ str(elem.get_attribute('class')) + ', '
+ str(elem.get_attribute('for')) + ', '
+ str(elem.get_attribute('href')) + ', '
+ str(elem.get_attribute('alt')) + ', '
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('src')) + ', '
+ str(elem.get_attribute('name')) + ', '
+ str(elem.get_attribute('width')) + ', '
+ str(elem.get_attribute('height')) + ', '
+ str(elem.get_attribute('data-src')) + ', '
+ str(elem.get_attribute('innerText').strip()) + ', '
+ str(elem.get_attribute('action')) + ', '
+ str(elem.get_attribute('value')) + ', '
+ '\n')
f.close()
driver.close()
I am using the following CSV files
A) inputlinks1.csv
www.flipkart.com
www.ebay.com
B) xpathtags.csv
Link;//a[#href]
Button;//button
Image;//img
Heading1;//h1
Heading2;//h2
Heading3;//h3
Heading4;//h4
C) Output.csv is a blank file
I am getting the following error
Traceback (most recent call last):
File "scraper.py", line 35, in <module>
for row in cp_url:
ValueError: I/O operation on closed file.
I can't test it but I think your problem is that you have wrong indentations
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
so you run for-loop outside with...as... and with...as... automatically closes file.
You should run for-loop inside with...as...
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
Or you could use standard open() and close()
cp_csv = open('inputLinks1.csv', 'rt')
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
cp_csv.close()
Had to make a few changes to your code to get it working.
After fixing indentation, it threw another error w.r.t inputlinks1.csv file.
Changed it to-
https://www.flipkart.com
https://www.ebay.com
And always try to use with open when handling files.
Code snippet:-
contents = []
filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Can not delete the file as it doesn't exists")
with open("output1.csv", "a") as f:
f.write("website," + "htmltag," + "type," + "id," + "classname," + "for," + "href," + "alt," + "type," + "src,"
+ "name," + "width," + "height," + "data-src,"+ 'inner-text,' + 'action,' + 'value,' + "\n")
with open('inputLinks1.csv', 'r') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
links = row[0]
print(links)
contents.append(links)
driver.get(links)
with open('xpathtags.csv', 'r') as cp2_csv:
cp_url2 = csv.reader(cp2_csv)
for row1 in cp_url2:
print(row[0])
(xtype, xpathtext) = row1[0].split(';')
print(xtype, xpathtext)
contents.append(xtype)
contents.append(xpathtext)
elems = driver.find_elements_by_xpath(xpathtext)
for elem in elems:
with open('output1.csv', 'a', encoding='utf-8') as f:
f.write( links + ", "+ xtype + ","
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('id')) + ', '
+ str(elem.get_attribute('class')) + ', '
+ str(elem.get_attribute('for')) + ', '
+ str(elem.get_attribute('href')) + ', '
+ str(elem.get_attribute('alt')) + ', '
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('src')) + ', '
+ str(elem.get_attribute('name')) + ', '
+ str(elem.get_attribute('width')) + ', '
+ str(elem.get_attribute('height')) + ', '
+ str(elem.get_attribute('data-src')) + ', '
+ str(elem.get_attribute('innerText').strip()) + ', '
+ str(elem.get_attribute('action')) + ', '
+ str(elem.get_attribute('value')) + ', '
+ '\n')
driver.close()
I have a text file from which I need to extract some values, but several times I get more than one value between the beginning and the last line that starts with 0089 (CONVENIO) in the quadrant.
With my code I can only make it write in the txt the title I defined and it keeps repeating the first agreement found, but I need it to scroll through the text and bring me new information from the other quadrants.
I need the loop as it can have multiple covenants, for this reason I can't directly "anchor" the lines in a size forecast.
import re
import os
inicio = (' YM-INFRA-CASH MANAGMENT DEST.: 001-0001-CENTRAL ')
lista = []
contador = 3
banco = ' 0089'
convenio = ''
with open(caminho + '/Downloads/TESTE.txt', 'r') as arquivo:
for line in arquivo:
if line.strip() == inicio.strip():
localizar = arquivo.readlines()
inicio = localizar[contador]
van = inicio[13:17]
nomevan = inicio[20:50].strip()
inicio = localizar[contador + 1]
ag = inicio[13:17]
nomeag = inicio[20:50].strip()
inicio = localizar[contador + 2]
cliente = inicio[13:50].strip()
contadorum = 9
while localizar[contadorum][1:5] == '0033':
convenio = localizar[contadorum][1:22].strip()
narqrem = localizar[contadorum][22:34].strip()
bytesrem = localizar[contadorum][34:51].strip()
narqret = localizar[contadorum][51:63].strip()
bytesret = localizar[contadorum][63:81].strip()
totalbytes = localizar[contadorum][81:99].strip()
percrateio = localizar[contadorum][99:112].strip()
print(van, nomevan)
print(ag, nomeag)
print(cliente)
print(convenio, narqrem, bytesrem, narqret, bytesret, totalbytes, percrateio)
lista.append(convenio + narqrem + bytesrem + narqret + bytesret + totalbytes + percrateio +'\n')
with open(caminho + '/Downloads/testefim.txt', 'w') as consolidado:
consolidado.write('CONVENIO' + ';' + 'N ARQ REMES' + ';' + 'BYTES REMES' + ';' + 'N ARQ.RET.' + ';' + 'BYTES RET.' + ';' + 'TOTAL BYTES' + ';' + '% RATEIO' + '\n')
for linha in lista:
consolidado.write(convenio + ';' + narqrem + ';' + bytesrem + ';' + narqret + ';' + bytesret + ';' + totalbytes + ';' + percrateio + '\n')
consolidado.close()
else:
pass
arquivo.close()
I would like to collect different type of datas into a file. Here is a part of the code.
val = str(float(data[-1]))
val_dB = float(val)
val_dB = math.log(val_dB, 10) * 10
myfile = open('../../../MLI_values/mli_value.txt', 'a')
myfile.write(date_ID + " " + val + val_dB + "\n")
myfile.close()
But it gives back an error:
myfile.write(date_ID + " " + val + val_dB + "\n")
TypeError: cannot concatenate 'str' and 'float' objects
How can I solve it to put them together? (into columns) into a file?
Change:
myfile.write(date_ID + " " + val + val_dB + "\n")
to:
myfile.write(date_ID + " " + val + " " + str(val_dB) + "\n")
I have 30911 html files. I need to do webscraping and then save the info into a txt file named index.txt.
It should look like
filename1, title, t1, date, p1
filename2, title, t1, date, p1
filename3, title, t1, date, p2
and so on...
I only want filename, but output gave me path+filename.
Your problem is that filename is filepath in reality, in order to get the filename you could use os module
os.path.basename('filepath')
so in order to write to the file:
indexFile.write(os.path.basename(filename)+ ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
You can use:
path = 'C:/Users/.../.../output/'
#read html files
for filename in glob.glob(os.path.join(path, '*.html')):
soup = bs4.BeautifulSoup(open(filename).read(), "lxml")
title = soup.find('h1')
ticker = soup.find('p')
d_date = soup.find_all('div', {"id": "a-body"})[0].find_all("p")[2]
try:
def find_participant(tag):
return tag.name == 'p' and tag.find("strong", text=re.compile(r"Executives|Corporate Participants"))
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
except:
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(filename + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + 'No participants' + '\n')
else:
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(os.path.basename(filename) + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
indexFile.close()
ntpath is another module used to get base name from path.
>>> import ntpath
>>> ntpath.basename('C:/Users/.../output1/' + 'index.txt')
'index.txt'
I need to encrypt 3 .bin files which contain 2 keys for Diffie-Hellman. I have no clue how to do that, all I could think of was what I did in the following Python file. I have an example what the output should look like but my code doesn't seem to produce the right keys. The output file server.ini is used by a client to connect to a server.
import base64
fileList = [['game_key.bin', 'Game'], ['gate_key.bin', 'Gate'], ['auth_key.bin', 'Auth']]
iniList = []
for i in fileList:
file = open(i[0], 'rb')
n = list(file.read(64))
x = list(file.read(64))
file.close()
n.reverse()
x.reverse()
iniList.append(['Server.' + i[1] + '.N "' + base64.b64encode("".join(n)) + '"\n', 'Server.' + i[1] + '.X "' + base64.b64encode("".join(x)) + '"\n'])
iniList[0].append('\n')
#time for user Input
ip = '"' + raw_input('Hostname: ') + '"'
dispName = 'Server.DispName ' + '"' + raw_input('DispName: ') + '"' + '\n'
statusUrl = 'Server.Status ' + '"' + raw_input('Status URL: ') + '"' + '\n'
signupUrl = 'Server.Signup ' + '"' + raw_input('Signup URL: ') + '"' + '\n'
for l in range(1, 3):
iniList[l].append('Server.' + fileList[l][1] + '.Host ' + ip + '\n\n')
for l in [[dispName], [statusUrl], [signupUrl]]:
iniList.append(l)
outFile = open('server.ini', 'w')
for l in iniList:
for i in l:
outFile.write(i)
outFile.close()
The following was in my example file:
# Keys are Base64-encoded 512 bit RC4 keys, as generated by DirtSand's keygen
# command. Note that they MUST be quoted in the commands below, or the client
# won't parse them correctly!
I also tried it without inverting n and x