Print first 5 rows of large csv file (not using pandas) - python

Im attempting to simplify a python code that will print the first five rows (plus header) of a large csv file in a more condensed output if possible. I would prefer to use pandas, however in this case I would like to just to just use the import cv and import os (Mac user).
Code as follows:
import csv
filename = "/Users/xx/Desktop/xx.csv"
fields = []
rows = []
with open(filename, 'r') as csvfile:
csvreader = csv.reader(csvfile)
fields = next(csvreader)
for row in csvreader:
rows.append(row)
print("Total no. of rows:%d"%(csvreader.line_num))
print('Field names are:' + ', '.join(field for field in fields))
print('\nFirst 5 rows are:\n')
for row in rows[:5]:
for col in row:
print("%10s"%col,end=" "),
print('\n')

Related

How to check certain value of column and skip certain row

import ctypes
import csv
with open('data.csv') as csv_file:
reader = csv.reader(csv_file)
next(reader)
for row in reader:
if(int(row[3])>=5):
print(row)
mymessage = 'A message'
title = 'Popup window'
ctypes.windll.user32.MessageBoxA(0, mymessage, title, 0)
else:
continue
I currently have the above csv file and code. However, when I do print(row),
['string1','00','00','21','00'..]
['string2','00','00','84','00'..]
['string3','00','00','21','00'..]
['string4','00','00','21','00'..]
.
.
.
['string7','00','00','21','00'..]
['string8','00','00','84','00'..]
['string9','00','00','15','00'..]
['string10','00','00','84','00'..]
[' ','precision','recall','f1-score','support'..]
It prints like above.
with open('data.csv') as csv_file:
reader = csv.reader(csv_file)
next(reader)
for row in reader[1:-8]:
print(row)
if(int(row[3])>=5):
mymessage = 'A message'
title = 'Popup window'
ctypes.windll.user32.MessageBoxA(0, mymessage, title, 0)
else:
continue
So in order to ignore the red part of the picture, I thought I could skip the last eight lines through the above code. But there's a problem.
My two problems with using this are:
I don't know the total number of lines, because
the number of strings1 to n changes every time when I write CSV file.
As I tried, I would like to remove the unnecessary lines below while checkinging whether each value in a particular column is greater than 5.
This gives you the number of lines of your csv before you import it into python.
lines = sum(1 for line in open('data.csv')) # gives you the total number of lines without loading csv
Then you can import it without the last 8 rows (like the example with pandas below)
import pandas as pd
data = pd.read_csv('data.csv', delimiter=';', decimal='.', header=0, skipfooter=8) # loads the csv without last 8 rows
or you load it with your code without the last 8 rows.
Connecting your code it should be:
lines = sum(1 for line in open('data.csv')) # gives you the total number of lines without loading csv
with open('data.csv') as csv_file:
reader = csv.reader(csv_file)
next(reader)
for row in range(lines-8): # using the number of lines here. It doesn't even load the last 8 lines
print(row)
if(int(row[3])>=5):
mymessage = 'A message'
title = 'Popup window'
ctypes.windll.user32.MessageBoxA(0, mymessage, title, 0)
else:
continue
I finally got the answer. We can make the csv reader read the certain range of rows through itertools.islice.
from itertools import islice
with open('data.csv') as csv_file:
reader = csv.reader(csv_file)
#next(reader) We don't need this anymore beacause we read only certain rows.
for row in islice(reader, 0, 10): #reads the row from 1 to 11
print(row)
if(int(row[3])>=5):
mymessage = 'A message'
title = 'Popup window'
ctypes.windll.user32.MessageBoxA(0, mymessage, title, 0)
else:
continue
So I used this code.
lines = sum(1 for line in open('data.csv'))
for row in islice(reader, 0, lines)
In case your total csv rows are not fixed, you can use the above code like this.

Extract two columns sorted from CSV

I have a large csv file, containing multiple values, in the form
Date,Dslam_Name,Card,Port,Ani,DownStream,UpStream,Status
2020-01-03 07:10:01,aart-m1-m1,204,57,302xxxxxxxxx,0,0,down
I want to extract the Dslam_Name and Ani values, sort them by Dslam_name and write them to a new csv in two different columns.
So far my code is as follows:
import csv
import operator
with open('bad_voice_ports.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
sortedlist = sorted(readCSV, key=operator.itemgetter(1))
for row in sortedlist:
bad_port = row[1][:4],row[4][2::]
print(bad_port)
f = open("bad_voice_portsnew20200103SORTED.csv","a+")
f.write(row[1][:4] + " " + row[4][2::] + '\n')
f.close()
But my Dslam_Name and Ani values are kept in the same column.
As a next step I would like to count how many times the same value appears in the 1st column.
You are forcing them to be a single column. Joining the two into a single string means Python no longer regards them as separate.
But try this instead:
import csv
import operator
with open('bad_voice_ports.csv') as readfile, open('bad_voice_portsnew20200103SORTED.csv', 'w') as writefile:
readCSV = csv.reader(readfile)
writeCSV = csv.writer(writefile)
for row in sorted(readCSV, key=operator.itemgetter(1)):
bad_port = row[1][:4],row[4][2::]
print(bad_port)
writeCSV.writerow(bad_port)
If you want to include the number of times each key occurred, you can easily include that in the program, too. I would refactor slightly to separate the reading and the writing.
import csv
import operator
from collections import Counter
with open('bad_voice_ports.csv') as readfile:
readCSV = csv.reader(readfile)
rows = []
counts = Counter()
for row in readCSV:
rows.append([row[1][:4], row[4][2::]])
counts[row[1][:4]] += 1
with open('bad_voice_portsnew20200103SORTED.csv', 'w') as writefile:
writeCSV = csv.writer(writefile)
for row in sorted(rows):
print(row)
writeCSV.writerow([counts[row[0]]] + row)
I would recommend to remove the header line from the CSV file entirely; throwing away (or separating out and prepending back) the first line should be an easy change if you want to keep it.
(Also, hard-coding input and output file names is problematic; maybe have the program read them from sys.argv[1:] instead.)
So my suggestion is failry simple. As i stated in a previous comment there is good documentation on CSV read and write in python here: https://realpython.com/python-csv/
As per an example, to read from a csv the columns you need you can simply do this:
>>> file = open('some.csv', mode='r')
>>> csv_reader = csv.DictReader(file)
>>> for line in csv_reader:
... print(line["Dslam_Name"] + " " + line["Ani"])
...
This would return:
aart-m1-m1 302xxxxxxxxx
Now you can just as easilly create a variable and store the column values there and later write them to a file or just open up a new file wile reading lines and writing the column values in there. I hope this helps you.
After the help from #tripleee and #marxmacher my final code is
import csv
import operator
from collections import Counter
with open('bad_voice_ports.csv') as csv_file:
readCSV = csv.reader(csv_file, delimiter=',')
sortedlist = sorted(readCSV, key=operator.itemgetter(1))
line_count = 0
rows = []
counts = Counter()
for row in sortedlist:
Dslam = row[1][:4]
Ani = row[4][2:]
if line_count == 0:
print(row[1], row[4])
line_count += 1
else:
rows.append([row[1][:4], row[4][2::]])
counts[row[1][:4]] += 1
print(Dslam, Ani)
line_count += 1
for row in sorted(rows):
f = open("bad_voice_portsnew202001061917.xls","a+")
f.write(row[0] + '\t' + row[1] + '\t' + str(counts[row[0]]) + '\n')
f.close()
print('Total of Bad ports =', str(line_count-1))
As with this way the desired values/columns are extracted from the initial csv file and a new xls file is generated with the desired values stored in different columns and the total values per key are counted, along with the total of entries.
Thanks for all the help, please feel free for any improvement suggestions!
You can use sorted:
import csv
_h, *data = csv.reader(open('filename.csv'))
with open('new_csv.csv', 'w') as f:
write = csv.writer(f)
csv.writerows([_h, *sorted([(i[1], i[4]) for i in data], key=lambda x:x[0])])

Selecting rows in csv file in with the variable number of columns

I have a csv file that i need to select certain rows. For me is easy remove the AGE and MEAN WEIGHT because these names are the same in any file.
ID,AGE,HEIGHT,MEAN WEIGHT,20-Nov-2002,05-Mar-2003,09-Apr-2003,23-Jul-2003
1,23,1.80,80,78,78,82,82
2,25,1.60,58,56,60,60,56
3,20,1.90,100,98,102,98,102
ID,HEIGHT,20-Nov-2002,05-Mar-2003,09-Apr-2003,23-Jul-2003
1,1.80,78,78,82,82
2,1.60,56,60,60,56
3,1.90,98,102,98,102
i have this code
import csv
out= open("C:/Users/Pedro/data.csv")
rdr= csv.reader(out)
result= open('C:/Users/Pedro/datanew.csv','w')
wtr= csv.writer ( result,delimiter=',',lineterminator='\n')
for row in rdr:
wtr.writerow( (row[0], row[2], row[4],row[5],row[6],row[7]) )
out.close()
result.close()
but my difficulty is select all columns that have dates. The number of columns of the dates may be variable. The solution could be to detect the character - in row[4]
I'm not 100 % sure what's you're asking, but here is a script that may do what you want, which is to reproduce the file with all of an unknown number of date columns, plus your columns 0 and 2 (ID & HEIGHT):
import csv
with open('data.csv') as infile: # Use 'with' to close files automatically
reader = csv.reader(infile)
headers = reader.next() # Read first line
# Figure out which columns have '-' in them (assume these are dates)
date_columns = [col for col, header in enumerate(headers) if '-' in header]
# Add our desired other columns
all_columns = [0, 2] + date_columns
with open('new.csv', 'w') as outfile:
writer = csv.writer(outfile, delimiter=',', lineterminator='\n')
# print headers
writer.writerow([headers[i] for i in all_columns])
# print data
for row in reader: # Read remaining data from our input CSV
writer.writerow([row[i] for i in all_columns])
Does that help?

How to read one single line of csv data in Python?

There is a lot of examples of reading csv data using python, like this one:
import csv
with open('some.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
print(row)
I only want to read one line of data and enter it into various variables. How do I do that? I've looked everywhere for a working example.
My code only retrieves the value for i, and none of the other values
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
i = int(row[0])
a1 = int(row[1])
b1 = int(row[2])
c1 = int(row[2])
x1 = int(row[2])
y1 = int(row[2])
z1 = int(row[2])
To read only the first row of the csv file use next() on the reader object.
with open('some.csv', newline='') as f:
reader = csv.reader(f)
row1 = next(reader) # gets the first line
# now do something here
# if first row is the header, then you can do one more next() to get the next row:
# row2 = next(f)
or :
with open('some.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
# do something here with `row`
break
you could get just the first row like:
with open('some.csv', newline='') as f:
csv_reader = csv.reader(f)
csv_headings = next(csv_reader)
first_line = next(csv_reader)
You can use Pandas library to read the first few lines from the huge dataset.
import pandas as pd
data = pd.read_csv("names.csv", nrows=1)
You can mention the number of lines to be read in the nrows parameter.
Just for reference, a for loop can be used after getting the first row to get the rest of the file:
with open('file.csv', newline='') as f:
reader = csv.reader(f)
row1 = next(reader) # gets the first line
for row in reader:
print(row) # prints rows 2 and onward
From the Python documentation:
And while the module doesn’t directly support parsing strings, it can easily be done:
import csv
for row in csv.reader(['one,two,three']):
print row
Just drop your string data into a singleton list.
The simple way to get any row in csv file
import csv
csvfile = open('some.csv','rb')
csvFileArray = []
for row in csv.reader(csvfile, delimiter = '.'):
csvFileArray.append(row)
print(csvFileArray[0])
To print a range of line, in this case from line 4 to 7
import csv
with open('california_housing_test.csv') as csv_file:
data = csv.reader(csv_file)
for row in list(data)[4:7]:
print(row)
I think the simplest way is the best way, and in this case (and in most others) is one without using external libraries (pandas) or modules (csv). So, here is the simple answer.
""" no need to give any mode, keep it simple """
with open('some.csv') as f:
""" store in a variable to be used later """
my_line = f.nextline()
""" do what you like with 'my_line' now """

How can I get a specific field of a csv file?

I need a way to get a specific item(field) of a CSV. Say I have a CSV with 100 rows and 2 columns (comma seperated). First column emails, second column passwords. For example I want to get the password of the email in row 38. So I need only the item from 2nd column row 38...
Say I have a csv file:
aaaaa#aaa.com,bbbbb
ccccc#ccc.com,ddddd
How can I get only 'ddddd' for example?
I'm new to the language and tried some stuff with the csv module, but I don't get it...
import csv
mycsv = csv.reader(open(myfilepath))
for row in mycsv:
text = row[1]
Following the comments to the SO question here, a best, more robust code would be:
import csv
with open(myfilepath, 'rb') as f:
mycsv = csv.reader(f)
for row in mycsv:
text = row[1]
............
Update: If what the OP actually wants is the last string in the last row of the csv file, there are several aproaches that not necesarily needs csv. For example,
fulltxt = open(mifilepath, 'rb').read()
laststring = fulltxt.split(',')[-1]
This is not good for very big files because you load the complete text in memory but could be ok for small files. Note that laststring could include a newline character so strip it before use.
And finally if what the OP wants is the second string in line n (for n=2):
Update 2: This is now the same code than the one in the answer from J.F.Sebastian. (The credit is for him):
import csv
line_number = 2
with open(myfilepath, 'rb') as f:
mycsv = csv.reader(f)
mycsv = list(mycsv)
text = mycsv[line_number][1]
............
#!/usr/bin/env python
"""Print a field specified by row, column numbers from given csv file.
USAGE:
%prog csv_filename row_number column_number
"""
import csv
import sys
filename = sys.argv[1]
row_number, column_number = [int(arg, 10)-1 for arg in sys.argv[2:])]
with open(filename, 'rb') as f:
rows = list(csv.reader(f))
print rows[row_number][column_number]
Example
$ python print-csv-field.py input.csv 2 2
ddddd
Note: list(csv.reader(f)) loads the whole file in memory. To avoid that you could use itertools:
import itertools
# ...
with open(filename, 'rb') as f:
row = next(itertools.islice(csv.reader(f), row_number, row_number+1))
print row[column_number]
import csv
def read_cell(x, y):
with open('file.csv', 'r') as f:
reader = csv.reader(f)
y_count = 0
for n in reader:
if y_count == y:
cell = n[x]
return cell
y_count += 1
print (read_cell(4, 8))
This example prints cell 4, 8 in Python 3.
There is an interesting point you need to catch about csv.reader() object. The csv.reader object is not list type, and not subscriptable.
This works:
for r in csv.reader(file_obj): # file not closed
print r
This does not:
r = csv.reader(file_obj)
print r[0]
So, you first have to convert to list type in order to make the above code work.
r = list( csv.reader(file_obj) )
print r[0]
Finaly I got it!!!
import csv
def select_index(index):
csv_file = open('oscar_age_female.csv', 'r')
csv_reader = csv.DictReader(csv_file)
for line in csv_reader:
l = line['Index']
if l == index:
print(line[' "Name"'])
select_index('11')
"Bette Davis"
Following may be be what you are looking for:
import pandas as pd
df = pd.read_csv("table.csv")
print(df["Password"][row_number])
#where row_number is 38 maybe
import csv
inf = csv.reader(open('yourfile.csv','r'))
for row in inf:
print row[1]

Categories