how to skip blank line while reading CSV file using python - python

This is my code i am able to print each line but when blank line appears it prints ; because of CSV file format, so i want to skip when blank line appears
import csv
import time
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
for line in csv.reader(ifile):
if not line:
empty_lines += 1
continue
print line

If you want to skip all whitespace lines, you should use this test: ' '.isspace().
Since you may want to do something more complicated than just printing the non-blank lines to the console(no need to use CSV module for that), here is an example that involves a DictReader:
#!/usr/bin/env python
# Tested with Python 2.7
# I prefer this style of importing - hides the csv module
# in case you do from this_file.py import * inside of __init__.py
import csv as _csv
# Real comments are more complicated ...
def is_comment(line):
return line.startswith('#')
# Kind of sily wrapper
def is_whitespace(line):
return line.isspace()
def iter_filtered(in_file, *filters):
for line in in_file:
if not any(fltr(line) for fltr in filters):
yield line
# A dis-advantage of this approach is that it requires storing rows in RAM
# However, the largest CSV files I worked with were all under 100 Mb
def read_and_filter_csv(csv_path, *filters):
with open(csv_path, 'rb') as fin:
iter_clean_lines = iter_filtered(fin, *filters)
reader = _csv.DictReader(iter_clean_lines, delimiter=';')
return [row for row in reader]
# Stores all processed lines in RAM
def main_v1(csv_path):
for row in read_and_filter_csv(csv_path, is_comment, is_whitespace):
print(row) # Or do something else with it
# Simpler, less refactored version, does not use with
def main_v2(csv_path):
try:
fin = open(csv_path, 'rb')
reader = _csv.DictReader((line for line in fin if not
line.startswith('#') and not line.isspace()),
delimiter=';')
for row in reader:
print(row) # Or do something else with it
finally:
fin.close()
if __name__ == '__main__':
csv_path = "C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
main_v1(csv_path)
print('\n'*3)
main_v2(csv_path)

Instead of
if not line:
This should work:
if not ''.join(line).strip():

my suggestion would be to just use the csv reader who can delimite the file into rows. Like this you can just check whether the row is empty and if so just continue.
import csv
with open('some.csv', 'r') as csvfile:
# the delimiter depends on how your CSV seperates values
csvReader = csv.reader(csvfile, delimiter = '\t')
for row in csvReader:
# check if row is empty
if not (row):
continue

You can always check for the number of comma separated values. It seems to be much more productive and efficient.
When reading the lines iteratively, as these are a list of comma separated values you would be getting a list object. So if there is no element (blank link), then we can make it skip.
with open(filename) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
for row in csv_reader:
if len(row) == 0:
continue

You can strip leading and trailing whitespace, and if the length is zero after that the line is empty.

import csv
with open('userlist.csv') as f:
reader = csv.reader(f)
user_header = next(reader) # Add this line if there the header is
user_list = [] # Create a new user list for input
for row in reader:
if any(row): # Pick up the non-blank row of list
print (row) # Just for verification
user_list.append(row) # Compose all the rest data into the list

This example just prints the data in array form while skipping the empty lines:
import csv
file = open("data.csv", "r")
data = csv.reader(file)
for line in data:
if line: print line
file.close()
I find it much clearer than the other provided examples.

import csv
ifile=csv.reader(open('C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv', 'rb'),delimiter=';')
for line in ifile:
if set(line).pop()=='':
pass
else:
for cell_value in line:
print cell_value

Related

My code is not working properly only on my device

I created a program to create a csv where every number from 0 to 1000000
import csv
nums = list(range(0,1000000))
with open('codes.csv', 'w') as f:
writer = csv.writer(f)
for val in nums:
writer.writerow([val])
then another program to remove a number from the file taken as input
import csv
import os
while True:
members= input("Please enter a number to be deleted: ")
lines = list()
with open('codes.csv', 'r') as readFile:
reader = csv.reader(readFile)
for row in reader:
if all(field != members for field in row):
lines.append(row)
else:
print('Removed')
os.remove('codes.csv')
with open('codes.csv', 'w') as writeFile:
writer = csv.writer(writeFile)
writer.writerows(lines)
The above code is working fine on any other device except my pc, in the first program it creates the csv file with empty rows between every number, in the second program the number of empty rows multiplies and the file size also multiples.
what is wrong with my device then?
Thanks in advance
I think you shouldn't use a csv file for single column data. Use a json file instead.
And the code that you've written for checking which value to not remove, is unnecessary. Instead you could write a list of numbers to the file, and read it back to a variable while removing a number you desire to, using the list.remove() method.
And then write it back to the file.
Here's how I would've done it:
import json
with open("codes.json", "w") as f: # Write the numbers to the file
f.write(json.dumps(list(range(0, 1000000))))
nums = None
with open("codes.json", "r") as f: # Read the list in the file to nums
nums = json.load(f)
to_remove = int(input("Number to remove: "))
nums.remove(to_remove) # Removes the number you want to
with open("codes.json", "w") as f: # Dump the list back to the file
f.write(json.dumps(nums))
Seems like you have different python versions.
There is a difference between built-in python2 open() and python3 open(). Python3 defaults to universal newlines mode, while python2 newlines depends on mode argument open() function.
CSV module docs provides a few examples where open() called with newline argument explicitly set to empty string newline='':
import csv
with open('some.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(someiterable)
Try to do the same. Probably without explicit newline='' your writerows calls add one more newline character.
CSV file from English - Comma-Separated Values, you have a record with spaces.
To remove empty lines - when opening a file for writing, add newline="".
Since this format is tabular data, you cannot simply delete the element, the table will go. It is necessary to insert an empty string or "NaN" instead of the deleted element.
I reduced the number of entries and made them in the form of a table for clarity.
import csv
def write_csv(file, seq):
with open(file, 'w', newline='') as f:
writer = csv.writer(f)
for val in seq:
writer.writerow([v for v in val])
nums = ((j*10 + i for i in range(0, 10)) for j in range(0, 10))
write_csv('codes.csv', nums)
nums_new = []
members = input("Please enter a number, from 0 to 100, to be deleted: ")
with open('codes.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
rows_new = []
for elem in row:
if elem == members:
elem = ""
rows_new.append(elem)
nums_new.append(rows_new)
write_csv('codesdel.csv', nums_new)

Stripping the last line/footer from a csv file in Python 2.7

import csv
import operator
import os
import sys
with open('test.csv', 'r') as cvsfile:
f = csv.reader(csvfile, delimiter=',')
for line in f:
line = [line:-1]
print (line)
=====
Tried the above
I am trying to strip the last line from a csv file before sorting. I am able to strip the header with a next() command but am unable to strip the footer.
To strip first/last line, you can convert the csv.reader to list and then use [1:-1] on it. For example:
import csv
with open('test.csv', 'r') as csvfile:
f = csv.reader(csvfile, delimiter=',')
for line in list(f)[1:-1]: # [1:-1] to strip first/last line from csv
print (line)
If you are dealing a with a big dataset, list(f)[1:-1] will load the entire set into memory at once and then make of copy of that list without the first and last element of the original list. To keep using the generator so that only one line is loaded into memory at a time,
import csv
import operator
import os
import sys
curr = None
next_line = None
with open('test.csv', 'r') as cvsfile:
f = csv.reader(csvfile, delimiter=',')
next(f) # remove first line
try:
curr = next(f)
next_line = next(f)
while True:
print(curr)
curr = next_line
next_line = next(f)
except StopIteration:
pass

Reading CSV and returning array

Taking a video game design course and I've never had to use python before so I am very confused... I am tasked with the following :
read in the CSV file into Python and store its contents as a list of lists
(or 2D list/array). To do so, you will make use of the CSV[1] library.
The reading of the CSV file should be done as its own function - please create a function called readCSV(...)
that takes in the file name as the argument and returns the 2D list.
As mentionned I have no previous coding experience with python. I have managed to do this so far and would greatly appreciate some support.
import csv
# reading each row and printing it
def readCSV(fileName):
TwoDimList = []
with open(fileName, 'r') as f:
r = csv.reader(f, delimiter=',')
for row in r:
entities = readCSV('entities.csv')
print(entities)
Just append each row (which is a list of columns values) to your 2d list and return it in the end:
def readCSV(fileName):
two_dim_list = [] # snake case ftw (PEP8)
with open(fileName, 'r') as f:
r = csv.reader(f, delimiter=',')
# next(r) # skip header line if necessary
for row in r:
two_dim_list.append(row)
return two_dim_list
The short version of that is:
def readCSV(fileName):
with open(fileName, 'r') as f:
r = csv.reader(f, delimiter=',')
# next(r) # skip header line
return list(r)
You can just call list on the reader to get the full 2d list:
def read_csv(file_name):
with open(file_name) as f:
return list(csv.reader(f))
This works because csv.reader is an iterable.
define a function to read csv and return list, and use it later in the program
def readCSVinList(fpath,fname):
with open(fpath+fname) as csv_file:
csv_reader=csv.reader(csv_file)
return list(csv_reader)
f= readCSVinList("A:\\Test\\","test.csv")
for row in f:
print(row)

python 3 csv reader + Ignore empty records [duplicate]

This is my code i am able to print each line but when blank line appears it prints ; because of CSV file format, so i want to skip when blank line appears
import csv
import time
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
for line in csv.reader(ifile):
if not line:
empty_lines += 1
continue
print line
If you want to skip all whitespace lines, you should use this test: ' '.isspace().
Since you may want to do something more complicated than just printing the non-blank lines to the console(no need to use CSV module for that), here is an example that involves a DictReader:
#!/usr/bin/env python
# Tested with Python 2.7
# I prefer this style of importing - hides the csv module
# in case you do from this_file.py import * inside of __init__.py
import csv as _csv
# Real comments are more complicated ...
def is_comment(line):
return line.startswith('#')
# Kind of sily wrapper
def is_whitespace(line):
return line.isspace()
def iter_filtered(in_file, *filters):
for line in in_file:
if not any(fltr(line) for fltr in filters):
yield line
# A dis-advantage of this approach is that it requires storing rows in RAM
# However, the largest CSV files I worked with were all under 100 Mb
def read_and_filter_csv(csv_path, *filters):
with open(csv_path, 'rb') as fin:
iter_clean_lines = iter_filtered(fin, *filters)
reader = _csv.DictReader(iter_clean_lines, delimiter=';')
return [row for row in reader]
# Stores all processed lines in RAM
def main_v1(csv_path):
for row in read_and_filter_csv(csv_path, is_comment, is_whitespace):
print(row) # Or do something else with it
# Simpler, less refactored version, does not use with
def main_v2(csv_path):
try:
fin = open(csv_path, 'rb')
reader = _csv.DictReader((line for line in fin if not
line.startswith('#') and not line.isspace()),
delimiter=';')
for row in reader:
print(row) # Or do something else with it
finally:
fin.close()
if __name__ == '__main__':
csv_path = "C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
main_v1(csv_path)
print('\n'*3)
main_v2(csv_path)
Instead of
if not line:
This should work:
if not ''.join(line).strip():
my suggestion would be to just use the csv reader who can delimite the file into rows. Like this you can just check whether the row is empty and if so just continue.
import csv
with open('some.csv', 'r') as csvfile:
# the delimiter depends on how your CSV seperates values
csvReader = csv.reader(csvfile, delimiter = '\t')
for row in csvReader:
# check if row is empty
if not (row):
continue
You can always check for the number of comma separated values. It seems to be much more productive and efficient.
When reading the lines iteratively, as these are a list of comma separated values you would be getting a list object. So if there is no element (blank link), then we can make it skip.
with open(filename) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
for row in csv_reader:
if len(row) == 0:
continue
You can strip leading and trailing whitespace, and if the length is zero after that the line is empty.
import csv
with open('userlist.csv') as f:
reader = csv.reader(f)
user_header = next(reader) # Add this line if there the header is
user_list = [] # Create a new user list for input
for row in reader:
if any(row): # Pick up the non-blank row of list
print (row) # Just for verification
user_list.append(row) # Compose all the rest data into the list
This example just prints the data in array form while skipping the empty lines:
import csv
file = open("data.csv", "r")
data = csv.reader(file)
for line in data:
if line: print line
file.close()
I find it much clearer than the other provided examples.
import csv
ifile=csv.reader(open('C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv', 'rb'),delimiter=';')
for line in ifile:
if set(line).pop()=='':
pass
else:
for cell_value in line:
print cell_value

Python csv seek() not working

Hi I am trying to read a csv file using the following code. I want to read from n th line to m th line of the csv file provided. As a example I want to start reading from 10th line to 100 line and after that start from 500th line to 1000th line. I give those parameters using start and end variables.
The problem that it always start from the beginning regardless the start and end variables. i tried and tried for a solution but failed.Can anyone help me to figure out the issue here.? Thanks a lot! (there are some duplicate questions but no one seems to have given a solution)
import csv
import os
with open('file.csv','r') as csvfile:
start=10
end=100
csvfile.seek(start)
r= csv.reader(csvfile)
r.next()
for i in range(start,end):
try:
url=r.next()[2]
print url
except IndexError,e:
print str(e),
except ValueError,b:
print b
csvfile.close()
Use the csv module.
import csv
n = 3
m = 5
read = 0
with open("so.csv") as csvfile:
reader = csv.reader(csvfile)
for record in reader:
read += 1
if read >= n and read <= m:
print(record)
You can iterate through csv file lines and get lines you want like this:
import csv
def read_lines(file_name, start, end):
with open(file_name, 'rb') as csvfile:
csvreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
for row in csvreader:
if csvreader.line_num >= start and csvreader.line_num <= end:
print ', '.join(row)
else:
continue
read_lines('test.csv', 10,12)
Update:
From documentation: csvreader.line_num: The number of lines read from the source iterator. This is not the same as the number of records returned, as records can span multiple lines.

Categories