Download txt with ';' delimiter and convert to .CSV with python - python

I have to download this .txt file: Link
Then I have to parse it to .csv and remove all header.
I tried to do this, but it doesn't work for me, this is my code:
import urllib
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
In this code I didn't do anything with header because even converting is not working yet.

You might use this code for Python 2:
import urllib
import csv
import urllib2
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
#urllib.request.urlretrieve(url_of_file, outfilename)
response = urllib2.urlopen(url_of_file)
output = open(outfilename,'wb')
output.write(response.read())
output.close()
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
Python 3:
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv"
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, encoding='utf-8') as infile, open(csvfile, 'w', newline='') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)

There is no request namespace in urllib.
Replace this line
urllib.request.urlretrieve(url_of_file, outfilename)
With this
urllib.urlretrieve(url_of_file, outfilename)
UPDATED:
You need to import urllib.request, not just urllib.
Also, you need to open the files in text mode, not binary mode (which is 'rb', or 'wb').
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "r") as infile, open(csvfile, 'w') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)

Related

How save file with utf-16-le bom encoding?

I can save file to utf-16-le, but i dont understand how save with bom it.
import csv
with open('filename.csv', mode='a', newline='', encoding='utf-16-le') as employee_file:
writer = csv.writer(employee_file, delimiter=";")
row = ['Job1', 'M']
writer.writerow(row)
I can add '\ufeff to start of file, but i search alternative variant
def addUTF8Bom(filename):
f = codecs.open(filename, 'r', 'utf-16-le')
content = f.read()
f.close()
f2 = codecs.open(filename, 'w', 'utf-16-le')
f2.write(u'\ufeff')
f2.write(content)
f2.close()

Expected String or Buffer Error

Can anyone please tell me whats wrong with this block of code. It is for reading line starting with 'READ' on a csv file. The error I am getting is
return _compile(pattern, flags).match(string)
TypeError: expected string or buffer
import csv
import re
f1 = open("1.csv", "rb")
reader = csv.reader(f1)
header = reader.next()
f2 = open("out.csv", "wb")
writer = csv.writer(f2)
writer.writerow(header)
for row in reader:
if re.match(r'^.*READ $', row):
writer.writerow(row)
f1.close()
f2.close()
Indent your code number 1...
This may work...
import csv
import re
f1 = open("1.csv", "rb")
reader = csv.reader(f1)
header = reader.next()
f2 = open("out.csv", "wb")
writer = csv.writer(f2)
writer.writerow(header)
for row in reader:
if re.match(r'^.*READ $', str(row)):
writer.writerow(row)
f1.close()
f2.close()

Python 3 Encoding Issues when converting to CSV from JSON

Python3
I have looked at the other solutions but they havent seem to have covered the situation I am. I have been charged with writing a script to take JSON and convert it to a CSV file.
I have a good chunk of this done but have encountered an issue when I write the data. The data I received does not match what was written. Below is an example. I am lost on how I can get this to preserve the encoding.
I should mention that the default encoding is UTF-8
Input: necesitará
Output: necesitará
import csv
import json
import sys
import sys
print(sys.getdefaultencoding())
stuff = open('data.json')
jsonStuff = json.loads(stuff.read(), encoding="utf-8")
with open('output.csv', 'w', newline='\n', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile, delimiter=",",quotechar='"',quoting=csv.QUOTE_MINIMAL)
for element in jsonStuff:
row = ""
key = element['key']
values = element['valuesRow']
row = element['key']
# values[0]['value'], values[1]['value'], values[2]['value'], values[3]['value'],
writer.writerow([element['key'], values[3]['value']])
Remove encoding='utf-8' in open('output.csv', 'w', newline='\n', encoding='utf-8') should fix it.
data.json (utf-8): {"first": "necesitará", "second": "bodø"}
The following ...
import csv
import json
with open('data.json') as stuff, open('output.csv', 'w', newline='\n', encoding='utf-8') as csvfile:
jsonStuff = json.loads(stuff.read(), encoding="utf-8")
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
first = jsonStuff['first']
second = jsonStuff['second']
writer.writerow([first, second])
... gives output.csv: necesitará,bodø
However ...
import csv
import json
with open('data.json') as stuff, open('output.csv', 'w', newline='\n') as csvfile:
jsonStuff = json.loads(stuff.read(), encoding="utf-8")
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
first = jsonStuff['first']
second = jsonStuff['second']
writer.writerow([first, second])
... produces output.csv: necesitará,bodø
That said. There is no reason to use json.loads() when you have json.load(), and most of what you've defined are the defaults. I'd simply do ...
import csv
import json
with open('data.json') as jsonfile, open('output.csv', 'w') as csvfile:
json_data = json.load(jsonfile)
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
first = json_data['first']
second = json_data['second']
writer.writerow([first, second])

Writing to a file after parsing

I have written a small python code where it will read a sample csv file and copy its first column to a temp csv file. Now when I try to compare that temporary file with another text file and try to write result to another file called result file, The file is created but with empty content.
But when i tested it in chunks, It is working fine
import csv
f = open("sample.csv", "r")
reader = csv.reader(f)
data = open("temp1.csv", "w")
w = csv.writer(data)
for row in reader:
my_row = []
my_row.append(row[0])
w.writerow(my_row)
with open('temp1.csv', 'r') as file1:
with open('serialNumber.txt', 'r') as file2:
same = set(file1).intersection(file2)
with open('result.txt', 'w') as file_out:
for line in same:
file_out.write(line)
print line
sample.csv
M11435TDS144,STB#1,Router#1
M11543TH4292,STB#2,Router#1
M11509TD9937,STB#3,Router#1
M11543TH4258,STB#4,Router#1
serialNumber.txt
G1A114042400571
M11543TH4258
M11251TH1230
M11435TDS144
M11543TH4292
M11509TD9937
You should close the output file (temp1.csv) before you can read data from it.
import csv
f = open("sample.csv", "r")
reader = csv.reader(f)
data = open("temp1.csv", "w")
w = csv.writer(data)
for row in reader:
my_row = []
my_row.append(row[0])
w.writerow(my_row)
data.close() # <--- Should close it before reading it in the same program !!
with open('temp1.csv', 'r') as file1:
with open('serialNumber.txt', 'r') as file2:
same = set(file1).intersection(file2)
with open('result.txt', 'w') as file_out:
for line in same:
file_out.write(line)
print line
Points regarding code:
data file handle is not closed. data.close() after writing to temp1.csv.
In your code, same = set(file1).intersection(file2), you are directly passing file handle file2 to intersection. It expects list. This is exact problem is. It should be same = set(file1.readlines()).intersection(file2.readlines())
Working Code:
import csv
f = open("sample.csv", "r")
reader = csv.reader(f)
data = open("temp1.csv", "wb")
w = csv.writer(data)
for row in reader:
my_row = []
if len(row) != 0:
my_row.append(row[0])
w.writerow(my_row)
#File should be closed
data.close()
with open('temp1.csv', 'r') as file1:
with open('serialNumber.txt', 'r') as file2:
tmp_list = (file1.readlines())
ser_list = (file2.readlines())
same = set(file1.readlines()).intersection(file2.readlines())
with open('result.txt', 'w') as file_out:
for line in same:
file_out.write(line)
Content of temp1.csv:
M11435TDS144
M11543TH4292
M11509TD9937
M11543TH4258
Content of result.txt :
M11543TH4258
M11543TH4292
M11435TDS144
You can use with for opening files sample.csv and temp1.csv as below.
import csv
with open("sample.csv") as f:
with open("temp1.csv",'wb') as data:
reader = csv.reader(f)
w = csv.writer(data)
for row in reader:
my_row = []
my_row.append(row[0])
w.writerow(my_row)
with open('temp1.csv', 'r') as file1:
with open('serialNumber.txt', 'r') as file2:
same = set(file1.readlines()).intersection(file2.readlines())
with open('result.txt', 'w') as file_out:
for line in same:
file_out.write(line)

Python Edit CSV headers

I have the following data from a csv file called temp.
Item,Description,Base Price,Available
2000-000-000-300,AC - CF/M Series Green For Black Hood,299.99,3
2000-000-000-380,AC - CF/M Series Green For White Hood,299.99,3
I need to change the headers to read
Item Number,Item Description,List Price,QTY Available
I have been searching similar questions on here and haven't a solution that I can understand since I am relatively new to python programming. So far I have:
import csv
import os
inputFileName = "temp.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(inputFileName) as inFile, open(outputFileName, "w") as outfile:
r = csv.reader(inFile)
w = csv.writer(outfile)
Which I know only reads the original file and then will write to _modified. How do I select the current headers and then change them so that they write to the new file?
The headers are just another row of CSV data. Just write them as a new row to the output followed by the rest of the data from the input file.
import csv
import os
inputFileName = "temp.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(inputFileName, 'rb') as inFile, open(outputFileName, 'wb') as outfile:
r = csv.reader(inFile)
w = csv.writer(outfile)
next(r, None) # skip the first row from the reader, the old header
# write new header
w.writerow(['Item Number', 'Item Description', 'List Price', 'QTY Available'])
# copy the rest
for row in r:
w.writerow(row)
For Python 3, use:
with open(inputFileName, newline='') as inFile, open(outputFileName, 'w', newline='') as outfile:
and you may have to specify an encoding for your data.
Another solution is to use the fileinput module to update the file in place:
import fileinput
for line in fileinput.input('temp', inplace=True):
if fileinput.isfirstline():
print 'Item Number,Item Description,List Price,QTY Available'
else:
print line,
You could use fileinput for this:
import fileinput
import sys
import os
inputFileName = "temp.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(outputFileName, "w") as outfile:
for line in fileinput.input(
[inputFileName],
inplace=False):
if fileinput.isfirstline():
outfile.write('Item Number,Item Description,List Price,QTY Available\n')
else:
outfile.write(line)

Categories