python create a unexpected blank row below header

python create a unexpected blank row below header - python

My code below create csv file and then adding random data to it.
Somehow, the first part of the code create a blank row below header.
Can anyone please help to fix the code to remove this blank row?
Thank you,
Hary
header_list = ["Firm", "hour", "unit sold", "product code", "dollar value"]
for i in range(3):
# create file with header -----------
with open(f'D:\\2000 transactions\\location_id_' + str(i) + '.csv', 'w', newline='') as file:
dw = csv.DictWriter(file, delimiter=',',fieldnames=header_list)
dw.writeheader() # this line create a blank row below header
# adding data to file ---------------
for j in range(5):
n = random.randint(1, 99)
text = str(str(sp100_list['Name'].loc[n]) + ',' + str(random.randint(5, 20)) + ',' + str(random.randint(200, 1000)) + ',' + str(
random.randint(100, 150)) + ',' + str(int(random.random() * 1000000)))
myfile = open(f'D:\\2000 transactions\\location_id_' + str(i) + '.csv', 'a')
myfile.write('\n' + text)

Actually you add unexpected blank row in the line myfile.write('\n' + text),not dw.writeheader(). You add a \n first, which add a blank row to your file. Move the \n to backwards should solve it.
code:
import csv
header_list = ["Firm", "hour", "unit sold", "product code", "dollar value"]
for i in range(1):
with open(f'test{str(i)}.csv', 'w', newline='') as file:
dw = csv.DictWriter(file, delimiter=',',fieldnames=header_list)
dw.writeheader() # this line create a blank row below header
for j in range(5):
with open(f'test{str(i)}.csv', 'a', newline='') as myfile:
text = "1,1"
myfile.write(text+'\n')
result:
Firm,hour,unit sold,product code,dollar value
1,1
1,1
1,1
1,1
1,1

you can only use csv writer instead of DictWriter.here is example
use csv write to write header and rows
header_list = ["Firm", "hour", "unit sold", "product code", "dollar value"]
for i in range(3):
#create writer here
with open(f'tmp_' + str(i) + '.csv', 'w') as file:
writer = csv.writer(file, delimiter=',')
#write header
writer.writerow(header_list)
# adding data to file ---------------
for j in range(5):
n = random.randint(1, 99)
writer.writerow([sp100_list['Name'].loc[n],random.randint(5, 20),
random.randint(200, 1000),random.randint(100, 150),
int(random.random() * 1000000)])

Related

Is there a way to read and alter the contents of a huge csv file in PyCharm?

I'm attempting to create a program currently that can read a csv, determine if a substring is included in one of the columns of each row, and if it isn't present, rewrites certain columns to a new csv. I have the code down for this much- but the csv I need to use the program for has well over 3 million rows. I use PyCharm and currently I'm not able to process this much data. It can only view the csv in a read-only format which doesn't allow me to use it. I know pandas has a chunk size feature but I don't know how to implement this with the rest of my code.
def reading(csv_input):
originalLength = 0
rowCount = 0
with open(f'Web Report {csv_input}', 'w') as file:
writer = csv.writer(file)
writer.writerow(['Index', 'URL Category', 'User IP', 'URL'])
dropCount = 0
data = pd.read_csv(csv_input, chunksize=100000)
df = pd.DataFrame(data,
columns=['Line', 'Date', 'Hour', 'User Name', 'User IP', 'Site Name',
'URL Category', 'Action', 'Action Description'])
originalLength = len(df.index)
for line in range(originalLength):
dataLine = df.loc[line]
x = dataLine.get(key='Action')
if x == 0:
siteName = dataLine.get(key='Site Name')
if 'dbk' in siteName:
dropCount = dropCount + 1
elif 'ptc' in siteName:
dropCount = dropCount + 1
elif 'wcf' in siteName:
dropCount = dropCount + 1
elif 'google' in siteName:
dropCount = dropCount + 1
else:
writer.writerow([line, # Original Index
df.loc[line].get(key='URL Category'), # Original URL Category
df.loc[line].get(key='User IP'), # Original User IP
df.loc[line].get(key='Site Name')]) # Original Site Name
rowCount = rowCount + 1
else:
dropCount = dropCount + 1
file.close()
print("Input: " + str(csv_input))
print("Output: " + str(file.name))
print("Original Length: " + str(originalLength))
print("Current Length: " + str(rowCount))
print("Drop Count: " + str(dropCount) + "\n")
return df

If you use csv to write file then you could use it also to read row by row.
import csv
with open('input.csv') as infile, open('output.csv', 'w') as outfile:
csv_reader = csv.reader(infile)
csv_writer = csv.writer(outfile)
# copy headers
headers = next(csv_reader)
csv_writer.writerow(headers)
# process rows
for row in csv_reader: # read row by row
# keep only rows with even index
if int(row[0]) % 2 == 0:
print('--- row ---')
print(row)
csv_writer.writerow(row)
If you want to use pandas with chunk then you should use for-loop for this.
And when you write with pandas then you need append mode without headers.
import pandas as pd
first = True
for df in pd.read_csv('input.csv', chunksize=1): # read row by row
# keep only rows with even index
if df.index % 2 == 0:
print('--- row ---')
print(df)
if first:
# create new file with headers
df.to_csv('output.csv', mode='w')
first = False
else:
# append to existing file without headers
df.to_csv('output.csv', mode='a', header=False)
Minimal working code
import pandas as pd
import csv
# --- create some data ---
data = {
'A': range(0,10),
'B': range(10,20),
'C': range(20,30),
} # columns
df = pd.DataFrame(data)
df.to_csv('input.csv', index=False)
# --- read and write with `pandas` ---
first = True
for df in pd.read_csv('input.csv', chunksize=1): # read row by row
# keep only rows with even index
if df.index % 2 == 0:
print('--- row ---')
print(df)
if first:
# create empty with headers
df.to_csv('output_pandas.csv', mode='w')
first = False
else:
# append to existing file without headers
df.to_csv('output_pandas.csv', mode='a', header=False)
# --- read and write with `csv` ---
with open('input.csv') as infile, open('output.csv', 'w') as outfile:
csv_reader = csv.reader(infile)
csv_writer = csv.writer(outfile)
# copy headers
headers = next(csv_reader)
csv_writer.writerow(headers)
# process rows
for row in csv_reader:
# keep only rows with even index
if int(row[0]) % 2 == 0:
print('--- row ---')
print(row)
csv_writer.writerow(row)
Doc: read_csv(), to_csv()

how to parse a txt file to csv and modify formatting

Is there a way I can use python to take my animals.txt file results and convert it to csv and format it differently?
Currently the animals.txt file looks like this:
ID:- 512
NAME:- GOOSE
PROJECT NAME:- Random
REPORT ID:- 30321
REPORT NAME:- ANIMAL
KEYWORDS:- ['"help,goose,Grease,GB"']
ID:- 566
NAME:- MOOSE
PROJECT NAME:- Random
REPORT ID:- 30213
REPORT NAME:- ANIMAL
KEYWORDS:- ['"Moose, boar, hansel"']
I would like the CSV file to present it as:
ID, NAME, PROJECT NAME, REPORT ID, REPORT NAME, KEYWORDS
Followed by the results underneath each header
Here is a script I have wrote:
import re
import csv
with open("animals.txt") as f: text = f.read()
data = {}
keys = ['ID', 'NAME', 'PROJECT NAME', 'REPORT ID', 'REPORT NAME', 'KEYWORDS']
for k in keys:
data[k] = re.findall(r'%s:- (.*)' % k, text)
csv_file = 'out.csv'
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=keys)
writer.writeheader()
for x in data:
writer.writerow(x)

An easy way to do is parsing using regex and store them in a dict, just before you write the final csv:
import re
# `text` is your input text
data = {}
keys = ['ID', 'NAME', 'PROJECT NAME', 'REPORT ID', 'REPORT NAME', 'KEYWORDS']
for k in keys:
data[k] = re.findall(r'%s:- (.*)' % k, text)
And to CSV:
import csv
csv_file = 'out.csv'
with open(csv_file, 'w') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE, escapechar='\\')
writer.writerow(data.keys())
for i in range(len(data[keys[0]])):
writer.writerow([data[k][i] for k in keys])
Output in csv:
ID,NAME,PROJECT NAME,REPORT ID,REPORT NAME,KEYWORDS
512,GOOSE,Random,30321,ANIMAL,['\"help\,goose\,Grease\,GB\"']
566,MOOSE,Random,30213,ANIMAL,['\"Moose\, boar\, hansel\"']
Note that I used re.M multiline mode since there's a trick in your text, preventing matching ID twice! Also the default write rows needed to be twisted.
Also uses \ to escape the quote.

This should work:
fname = 'animals.txt'
with open(fname) as f:
content = f.readlines()
content = [x.strip() for x in content]
output = 'ID, NAME, PROJECT NAME, REPORT ID, REPORT NAME, KEYWORDS\n'
line_output = ''
for i in range(0, len(content)):
if content[i]:
line_output += content[i].split(':-')[-1].strip() + ','
elif not content[i] and not content[i - 1]:
output += line_output.rstrip(',') + '\n'
line_output = ''
output += line_output.rstrip(',') + '\n'
print(output)

That's the code in Autoit (www.autoitscript.com)
Global $values_A = StringRegExp(FileRead("json.txt"), '[ID|NAME|KEYWORDS]:-\s(.*)?', 3)
For $i = 0 To UBound($values_A) - 1 Step +6
FileWrite('out.csv', $values_A[$i] & ',' & $values_A[$i + 1] & ',' & $values_A[$i + 2] & ',' & $values_A[$i + 3] & ',' & $values_A[$i + 4] & ',' & $values_A[$i + 5] & #CRLF)
Next

Create Excel Sheets from different files in Linux with Python

There are 2 txt file in a linux server.
first data file:
a;1234
b;12334
c;234234
second data file :
a ; ass ; asfda
b ; sdfq; qwrwffsaa
c ; asda ; qdasasd
What I try to make is to create a excel file with python which has 2 sheets.
First sheet keeps first data file second one should keep second data file.
What I develop so far is:
#!/bin/python
import xlsxwriter
import smtplib
import datetime
now = datetime.datetime.now()
workbookname = 'Excel_'+now.strftime("%Y-%m-%d_%H:%M")+'.xlsx'
workbook = xlsxwriter.Workbook(workbookname)
worksheet = workbook.add_worksheet('Sheet1')
worksheet.write('A1', 'Hostname')
worksheet.write('B1', 'User Name')
worksheet2 = workbook.add_worksheet('User Privilege')
worksheet2.write('A1', 'Hostname')
worksheet2.write('B1', 'User Detail')
worksheet2.write('C1', 'Description')
with open('/tmp/file1.txt') as f:
content = f.read().splitlines()
i = 0
while i < len(content):
content2 = content[i].split(';')
worksheet.write('A'+str(i+2), content2[0])
worksheet.write('B'+str(i+2), content2[1])
workbook.close()
i = 0
while i < len(content):
with open('/tmp/file2.txt') as f:
content = f.read().splitlines()
worksheet2.write('A' + str(i + 2), content2[0])
worksheet2.write('B' + str(i + 2), content2[1])
worksheet2.write('C' + str(i + 2), content2[2])
i=i+1
workbook.close()
This script only works for the first sheet it does not write to second sheet.

With pandas this can be done in a couple of lines
import pandas
df1 = pandas.read_csv('file1.csv', sep = ';', header = None)
df2 = pandas.read_csv('file2.csv', sep = ';', header = None)
writer = pandas.ExcelWriter('output.xlsx')
df1.to_excel(writer, 'sheet 1')
df2.to_excel(writer, 'sheet 2')
writer.save()

Adding DictHeader with corresponding values to the existing table

I'm iterating API requests for each row of the input CSV file. And I want to add API output results to the existing CSV file.
Input
Desired output
As you can see, I added three headers with corresponding results (latitude, longitude, coordinates)
However, I'm finding difficulty with writing the right query for this. Below is the best I could do.
df=pd.read_csv(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv",delimiter=',', na_values="nan")
# Output
with open(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv", 'r') as csvin, open (r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy3.csv", 'w', newline='') as out:
csvreader = csv.DictReader(csvin)
fieldnames = csvreader.fieldnames + ["latitude","longitude","coordinates"]
csvwriter = csv.DictWriter(out, fieldnames)
csvwriter.writeheader()
# Iterating requests for each row
for row in df.itertuples():
output = client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' + str(row.state) + ', ' + str(row.postalcode)).coords
cord = '(' + str(output[0]) + ', '+ str(output[1]) + ')'
for node, row in enumerate(csvreader, 3):
csvwriter.writerow(dict(3, {'latitude': output[0], 'longitude': output[1], 'coordinates': cord}))
Update:
Here is my new Python query:
df=pd.read_csv(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv",delimiter=',', na_values="nan")
# Output
with open(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv", 'r') as csvin, open (r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy3.csv", 'w', newline='') as out:
csvreader = csv.DictReader(csvin)
fieldnames = csvreader.fieldnames + ["latitude","longitude","coordinates"]
csvwriter = csv.DictWriter(out, fieldnames)
csvwriter.writeheader()
# Iterating requests for each row
for row in df.itertuples():
output = client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' + str(row.state) + ', ' + str(row.postalcode)).coords
cord = '(' + str(output[0]) + ', '+ str(output[1]) + ')'
for node, row1 in enumerate(csvreader, 38):
csvwriter.writerow(dict(row1,latitude= output[0] % node))
for node, row2 in enumerate(csvreader, 39):
csvwriter.writerow(dict(row2,longitude = output[1] % node))
for node, row3 in enumerate(csvreader, 40):
csvwriter.writerow(dict(row3,coordinates= cord % node))
However, I get the following result:

You can more easily accomplish this by using more of pandas features.
Import the data from csv as you have been doing.
import pandas as pd
df = pd.read_csv("input_file.csv")
You can use dataframe.apply(func, axis=1) to apply a function to each row of a dataframe. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html
def get_coords(row):
return client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' \
+ str(row.state) + ', ' + str(row.postalcode)).coords
coords = df.apply(get_coords, axis=1)
df['latitide'] = coords.values[:,0]
df['longitude'] = coords.values[:,1]
df['coords'] = coords
You can then easily save the dataframe to csv using:
df.to_csv('output_filename.csv')
Hope this help.
p.s. code is untested but should be good :)

Row lables to columns in multi column file

I am new to Python and am using version 2.7.1 as part of Hyperion FDMEE.
I have a file which I need to reorder the columns plus, split one column into 3 as part of the same file.
Source file;
ACCOUNT;UD1;UD2;UD3;PERIOD;PERIOD;AMOUNT
QTY;032074;99953;53;2017.07.31;2017.07.31;40.91
COGS;032074;99953;53;2017.07.31;2017.07.31;-7488.36
TURNOVER;032074;99953;53;2017.07.31;2017.07.31;505.73
QTY;032075;99960;60;2017.07.31;2017.07.31;40.91
COGS;032075;99960;60;2017.07.31;2017.07.31;-7488.36
TURNOVER;032075;99960;60;2017.07.31;2017.07.31;505.73
I have managed to reorder the columns per this script;
infilename = fdmContext["OUTBOXDIR"]+"/Targit_1707.dat"
outfilename = fdmContext["OUTBOXDIR"]+"/TargitExport.csv"
import csv
infile = open(infilename, 'r')
outfile = open(outfilename, 'w+')
for line in infile:
column = line.split(';')
outfile.write(column[1] + ";" + column[2] + ";" + column[3] + ";" + column[4] + ";" + column[0] + ";" + str(column[6].strip('\n')) + ";201701" + "\n")
outfile.close()
infile.close()
Producing the result;
UD1;UD2;UD3;PERIOD;ACCOUNT;AMOUNT;201701
032074;99953;53;2017.07.31;QTY;40.91;201701
032074;99953;53;2017.07.31;COGS;-7488.36;201701
032074;99953;53;2017.07.31;TURNOVER;505.73;201701
032075;99960;60;2017.07.31;QTY;40.91;201701
032075;99960;60;2017.07.31;COGS;-7488.36;201701
032075;99960;60;2017.07.31;TURNOVER;505.73;201701
but I am struggling to transpose the Account column (QTY, COGS, TURNOVER) into seperate columns as in the example below;
UD1;UD2;UD3;PERIOD;QTY;COGS;TURNOVER;201701
032074;99953;53;2017.07.31;40.91;-7488.36;505.73;201701
032075;99960;60;2017.07.31;40.91;-7488.36;505.73;201701
Any suggestions would be very much appreciated.

Use a dict, for instance:
import csv
fieldnames = infile.readline()[:-1]
fieldnames = fieldnames.split(';')[1:5] + ['QTY', 'COGS', 'TURNOVER']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
record_dict = {}
for i, line in enumerate(infile):
if not line: break
line = line[:-1].split(';')
# Assign column data every 1,2,3 lines
mod_row = (i % 3)+1
if mod_row == 1:
record_dict['QTY'] = line[6]
record_dict['UD1'] = line[1]
# ... and so on
if mod_row == 2:
record_dict['COGS'] = line[6]
if mod_row == 3:
record_dict['TURNOVER'] = line[6]
writer.writerow(record_dict)
record_dict = {}
Output:
UD1,UD2,UD3,PERIOD,QTY,COGS,TURNOVER
032074,,,,40.91,-7488.36,505.73
032075,,,,40.91,-7488.36,505.73
Tested with Python: 3.4.2
Read about:
Python » 3.6.1 Documentation csv.DictWriter

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

python create a unexpected blank row below header - python

Related

Is there a way to read and alter the contents of a huge csv file in PyCharm?

how to parse a txt file to csv and modify formatting

Create Excel Sheets from different files in Linux with Python

Adding DictHeader with corresponding values to the existing table

Row lables to columns in multi column file

Categories

Resources