My request are downloading the images, but not properly. The images are empty. I know I am close to getting it correct, but I am not sure what I am missing--any help would be greatly appreciated!!!
fname ='testData.csv'
s = "dos2unix {}".format(fname)
inner_quote_grabber = re.compile(r'("[^"]*")', flags=re.M)
parenth_grabber = re.compile(r'\(([^)]*)\)', flags=re.M)
new_rows = []
matter = [0, 3,4]
file_counter = 0
file_prefix = 'images/'
file_out = 'outfile.csv'
with open(fname, 'r') as c:
reader = csv.reader(c, delimiter=',')
for row in reader:
t_row = []
#print( len(row), row)
for i in matter:
t_row.append(row[i])
last_row = []
print(row[7])
for group in parenth_grabber.findall(row[7]):
print('grabbing ', group)
file_counter += 1
click_clack = file_prefix + 'file_' + str(file_counter) +'.jpg'
print('group:', group)
req = requests.get(group)
print('status_code', req.status_code)
last_row.append(click_clack)
with open(click_clack, 'wb') as f:
req.raw.decode_content = True
shutil.copyfileobj(req.raw, f)
#f.write(text)
file_counter += 1
t_row.append(', '.join(last_row))
with open(file_out, 'a') as f:
writer = csv.writer(f)
writer.writerow(t_row)
Related
I'd like to ask, how to delete all rows with the same email in csv file? Idea is to pick random rows, and if that row picked and printed out, delete all rows with it's email from file. Now code deletes only rows that are picked and printed. But if there were more rows with the same email it wouldn't delete them. How to fix it?
Full code: https://pastebin.com/qzHm4NSA
Data structure: https://ibb.co/wWXfL6X
def generate():
global winningRows
filename = enterFile()
noOfWinners = 5
winningNumbers = []
while len(winningNumbers) < noOfWinners:
luckyNumber = random.randint(1, totalEntries)
if luckyNumber not in winningNumbers:
winningNumbers.append(luckyNumber)
with open(filename, newline='\n') as entriesCSV:
entriesDict = csv.DictReader(entriesCSV,dialect="excel")
allRows = [row for row in entriesDict]
winningRows = [row for row in allRows if int(row["#"]) in winningNumbers]
nonWinningRows = [row for row in allRows if int(row["#"]) not in winningNumbers]
for row in winningRows:
winnerName = row["Name"]
winnerID = row["ID"]
winnerEmail = row["Email"]
print(f"The winner is {winnerName}, ID {winnerID}, email {winnerEmail}")
with open(filename, "w", newline='\n') as entriesCSV:
writer = csv.DictWriter(entriesCSV, fieldnames=["#", "Name", "ID", "Email"])
writer.writeheader()
writer.writerows(nonWinningRows)
Maintain a list of emails of the picked winners and then use it to filter out rows of non winners. For that,
Just modify the code segment as follows and that will solve your problem:
def generate():
global winningRows
filename = enterFile()
noOfWinners = 5
winningNumbers = []
nonWinningRows = []
winnerEmails = [] #change 1
while len(winningNumbers) < noOfWinners:
luckyNumber = random.randint(1, totalEntries)
if luckyNumber not in winningNumbers:
winningNumbers.append(luckyNumber)
with open(filename, newline='\n') as entriesCSV:
entriesDict = csv.DictReader(entriesCSV,dialect="excel")
allRows = [row for row in entriesDict]
winningRows = [row for row in allRows if int(row["#"]) in winningNumbers]
for row in winningRows:
if row["Email"] not in winnerEmails: #change 2
winnerName = row["Name"]
winnerID = row["ID"]
winnerEmail = row["Email"]
print(f"The winner is {winnerName}, ID {winnerID}, email {winnerEmail}")
winnerEmails.append(winnerEmail) #change 3
nonWinningRows = [row for row in allRows if int(row["#"]) not in winningNumbers and row["Email"] not in winnerEmails] #change 4
with open(filename, "w", newline='\n') as entriesCSV:
writer = csv.DictWriter(entriesCSV, fieldnames=["#", "Name", "ID", "Email"])
writer.writeheader()
writer.writerows(nonWinningRows)
In my code many CSV files are being generated. I want them as a single csv file. The five parties of all years has to be executed under one csv file. Basically I am trying to do web crawling and trying to create a time series of polls for those (CDU, SPD, FDP, GRUNEN, LEFT)parties.
import scrapy
import re
import csv
class VoteSpider(scrapy.Spider):
name = 'VoteSpider'
start_urls = ['https://www.wahlrecht.de/umfragen/forsa.htm']
def __init__(self):
self.CDU = {}
self.SPD = {}
self.FDP = {}
self.Green = {}
self.left = {}
def parse(self, response):
regex = r"[forsa]+[\/]+[0-9]+.htm"
tableBody = response.xpath('//*[#class="wilko"]//tbody')
hxs = scrapy.Selector(response)
all_links = hxs.xpath('*//a/#href').extract()
yearLinks = []
for link in all_links:
matches = re.search(regex, link, re.MULTILINE)
if matches:
yearLinks.append(link)
for link in yearLinks:
newlink = "https://www.wahlrecht.de/umfragen/"+ link
yield scrapy.Request(url = newlink, callback=self.parseLink, meta={'name':link})
self.parseTable(tableBody)
def parseTable(self,tableBody):
CDU= []
SPD = []
FDP= []
Green= []
left= []
rows = tableBody.xpath('//tr')
del rows[:5]
for row in rows:
CDU.append(row.xpath('td//text()')[2].extract())
SPD.append(row.xpath('td//text()')[3].extract())
Green.append(row.xpath('td//text()')[4].extract())
FDP.append(row.xpath('td//text()')[5].extract())
left.append(row.xpath('td//text()')[6].extract())
with open('CDU'+'Current'+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('SPD'+'Current'+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('left'+'Current'+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('Green'+'Current'+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('FDP'+'Current'+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
self.CDU['Current'] = []
self.SPD['Current'] = []
self.Green['Current'] = []
self.FDP['Current'] = []
self.left['Current'] = []
self.CDU['Current'].append(CDU)
self.SPD['Current'].append(SPD)
self.Green['Current'].append(Green)
self.FDP['Current'].append(FDP)
self.left['Current'].append(left)
def parseLink(self, response):
CDU= []
SPD = []
FDP= []
Green= []
left= []
name = response.meta.get('name')
yearNumber = re.findall('\d+',name)[0]
x = 0
if yearNumber == '2007':
x = 4
elif yearNumber == '1998':
x = 3
elif yearNumber == '1999':
x = 3
elif yearNumber == '2000':
x = 3
elif yearNumber == '2001':
x = 3
elif yearNumber == '2002':
x = 3
elif yearNumber == '2003':
x = 3
elif yearNumber == '2004':
x = 3
elif yearNumber == '2005':
x = 5
elif yearNumber == '2006':
x = 3
elif yearNumber == '2008':
x = 4
elif yearNumber == '2013':
x = 4
tableBody = response.xpath('//*[#class="wilko"]//tbody')
rows = tableBody.xpath('//tr')
del rows[:x]
for row in rows:
CDU.append(row.xpath('td//text()')[2].extract())
SPD.append(row.xpath('td//text()')[3].extract())
Green.append(row.xpath('td//text()')[4].extract())
# print(row.xpath('td//text()').extract())
FDP.append(row.xpath('td//text()')[5].extract())
left.append(row.xpath('td//text()')[6].extract())
with open('CDU'+yearNumber+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('SPD'+yearNumber+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('left'+yearNumber+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('Green'+yearNumber+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
with open('FDP'+yearNumber+'.csv', 'w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
self.CDU[yearNumber]= []
self.SPD[yearNumber] = []
self.Green[yearNumber] = []
self.FDP[yearNumber] = []
self.left[yearNumber] = []
self.CDU[yearNumber].append(CDU)
self.SPD[yearNumber].append(SPD)
self.Green[yearNumber].append(Green)
self.FDP[yearNumber].append(FDP)
self.left[yearNumber].append(left)
I want the expected output to be as, all CDU, SPD, GRUNEN, FDP, LEFT parties of all years under one CSV file
Instead of opening multiple files, you can append to a single file, like so:
...
with open('ALL'+yearNumber+'.csv', 'a+') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(CDU)
writer.writerows(SPD)
writer.writerows(left)
writer.writerows(Green)
writer.writerows(FDP)
...
I am trying to make a .csv splitter.
I should take x number og lines and make new csv files with that number of lines in them from the original csv file.
import os
import csv
fileDir = ('C:\\somedir\\')
fName = input('Write the file name here, without extention: ')
lineNo = int(input('Number of lines pr csv file: '))
fNameF = (fName + '.csv')
filehandler = (fileDir + fNameF)
def split(filehandler, delimiter=',', row_limit=lineNo,
output_name_template='fName_%s.csv', output_path=fileDir, keep_headers=True):
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
It runs the script, but nothing happens, could someone help me?
I think you have not called the function split() at all. And you have also passed all the values in function as parameters. Check if you get output with the following code.
import os
import csv
fileDir = ('C:\\somedir\\')
fName = input('Write the file name here, without extention: ')
lineNo = int(input('Number of lines pr csv file: '))
fNameF = (fName + '.csv')
filehandler = (fileDir + fNameF)
def split(filehandler, delimiter, row_limit,
output_name_template, output_path, keep_headers):
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
split(filehandler,',', lineNo,'fName_%s.csv', fileDir, True)
There is a csv file, say A.csv, having content:
Place,Hotel,Food,Fare
Norway,Regal,NonVeg,5000
Poland,Jenny,Italiano,6000
Norway,Suzane,Vegeterian,4000
Norway,Regal,NonVeg,5000
I have to parse this csv and obtain an output by passing arguments in command prompt.
Example 1:
mycode.py Place
Desired output is:
Place,Fare
Norway,14000
Poland,6000
Example 2:
mycode.py Place Hotel
Desired output is:
Place,Hotel,Fare
Norway,Regal,10000
Poland,Jenny,6000
Norway,Suzane,4000
So it is clear from the above example that no matter what you pass as argument it gives you the sum of the Fare header for the common ones.
Below is my code and I am able to pass arguments and get an output, but I am stuck in sum of Fare. Can any one help me with this.
import sys
import csv
import collections
d = collections.defaultdict(list)
Data = []
Result = []
Final = []
Argvs = []
argv_len = len(sys.argv)
index = 0
input = ''
file = open('A.csv', 'rb')
try:
reader = csv.reader(file)
for row in reader:
Data.append(row)
for x in range(1, argv_len):
Argvs.append(sys.argv[x])
Argvs.append('Fare')
for input in Argvs:
for y in range(0, len(Data[0])):
if(input == Data[0][y]):
for z in range(1, len(Data)):
Result.append(Data[z][y])
break
Final.append(Result)
Result = []
New = []
NewFinal = []
for x in range(0, len(Final[0])):
for y in range(0, len(Final)):
New.append(Final[y][x])
NewFinal.append(New)
New = []
out = {}
for a in NewFinal:
out.setdefault(a[0],[]).append(int(a[-1]))
with open("output.csv", "wb") as csv_file:
writer = csv.writer(csv_file, dialect='excel', delimiter=',')
writer.writerow(Argvs)
for k,v in out.iteritems():
writer.writerow((k,sum(v)))
except Exception,e:
print str(e)
finally:
file.close()
I edit the code and tried to group it. Now I am able to get the aggregate of the Fare but not the desired output.
So when I am passing:
mycode.py Place Hotel
Instead of:
Place,Hotel,Fare
Norway,Regal,10000
Poland,Jenny,6000
Norway,Suzane,4000
I am getting:
Place,Hotel,Fare
Norway,14000
Poland,6000
Finally i managed to get my desired output.
Below i am sharing the final code. \
import sys
import csv
Data = []
Result = []
Final = []
Argvs = []
argv_len = len(sys.argv)
index = 0
input = ''
file = open('A.csv', 'rb')
try:
reader = csv.reader(file)
for row in reader:
Data.append(row)
for x in range(1, argv_len):
Argvs.append(sys.argv[x])
Argvs.append('Fare')
for input in Argvs:
for y in range(0, len(Data[0])):
if(input == Data[0][y]):
for z in range(1, len(Data)):
Result.append(Data[z][y])
break
Final.append(Result)
Result = []
New = []
NewFinal = []
for x in range(0, len(Final[0])):
for y in range(0, len(Final)):
New.append(Final[y][x])
NewFinal.append(New)
New = []
out = {}
for a in NewFinal:
count_val = a[-1]
del a[-1]
key_val = ','.join(a)
out.setdefault(key_val.strip('"'),[]).append(int(count_val))
with open("output.csv", "wb") as csv_file:
writer = csv.writer(csv_file, delimiter=',',quotechar=' ')
writer.writerow(Argvs)
for k,v in out.iteritems():
writer.writerow((k,sum(v)))
except Exception,e:
print str(e)
finally:
file.close()
I'm trying to compare CSV files, I have 2 CSV Files - CSVFileOne, CSVFileTwo.
my desired output is printing CSVFileOne but only the rows that not existing in CSVFileTwo.
My code:
input_file = CSVFileOne
ABGSOne = []
with open(input_file, encoding='UTF-8') as fone:
rowsOne = csv.reader(fone,delimiter=",",lineterminator="\n")
next(rowsOne, None)
for rowOne in rowsOne:
abbgone = {}
abbgone['astringOne'] = row[0]
abbgone['bstringOne'] = row[1]
abbgone['cstringOne'] = row[2]
abbgone['dstringOne'] = row[3]
ABGSOne.append(abbgone)
input_fileTwo = CSVFileTwo
ABGSTwo = []
with open(input_fileTwo, encoding='UTF-8') as ftwo:
rowsTwo = csv.reader(ftwo,delimiter=",",lineterminator="\n")
next(rowsTwo, None)
for rowTwo in rowsTwo:
abbgtwo = {}
abbgtwo['astringTwo'] = row[0]
abbgtwo['bstringTwo'] = row[1]
abbgtwo['cstringTwo'] = row[2]
abbgtwo['dstringTwo'] = row[3]
ABGSOne.append(abbgTwo)
for abbgone in ABGSOne:
if abbgone['bstringOne'] == abbgtwo['bstringTwo']:
print('abbgone['bstringOne']
try this out .
with open('CSVFileOne.csv', 'r') as t1, open('CSVFileTwo.csv', 'r') as t2:
fileone = t1.readlines()
filetwo = t2.readlines()
with open('Desired.csv', 'w') as outFile:
for line in filetwo:
if line not in fileone:
outFile.write(line)