CSV with a subset of columns

CSV with a subset of columns - python

Write a function named "filter_columns" that takes a string as a parameter representing the name of a CSV file with 5 columns in the format "string,int,int,int,int" and writes a file named "distant.csv" containing only the first and fifth columns from the input file.
import csv
def filter_columns(csvfile):
with open(csvfile, 'r') as rf:
reader = csv.reader(rf)
with open('distant.csv', 'w') as wf:
writer = csv.writer(wf)
for item in reader:
writer.writerow(item[0] + str(int(item[4])))
When inputting in the file items.csv
bed,7,22,137,157
defender,14,58,185,61
I should get
bed,157
defender,61
But, I am getting
b,e,d,1,5,7
d,e,f,e,n,d,e,r,6,1
How do I remove the unwanted commas?

From the docs, https://docs.python.org/3/library/csv.html, csvwrite.writerow takes an iterable as argument.
When you write item[0]+str(int(item[4])), you are producing a string, which is a list of characters. Hence the output is like d,e,f,e,n,d,e,r,6,1.
I might want to try:
import csv
def filter_columns(csvfile):
with open(csvfile, 'r') as rf:
reader = csv.reader(rf)
with open('distant.csv', 'w') as wf:
writer = csv.writer(wf)
for item in reader:
print(item)
writer.writerow([item[0]]+[str(int(item[4]))])

You need to edit your writerow.
writer.writerow([item[0]]+[item[4]])

You are passing the string item[0] + str(int(item[4])) to writer.writerow. For example, for the first row you're passing "bed157", when writer.writerow expects an iterable (e.g. a list) it converts the string you are passing to ["b", "e", "d", "1", "5", "7"].
You need to pass a list/tuple:
import csv
def filter_columns(csvfile):
with open(csvfile, 'r') as rf:
reader = csv.reader(rf)
with open('distant.csv', 'w') as wf:
writer = csv.writer(wf)
for row in reader:
writer.writerow([row[0], str(row[4])])

This worked for me
import csv
def filter_columns (x):
with open(x, 'r') as f:
reader = csv.reader(f)
with open ('museum.csv', 'w') as g:
writer = csv.writer(g)
for line in reader:
writer.writerow((line[0], str(line[2])))

Related

Data is adjacent to headers in csv

how can I put my first row of data in the csv under the header and not in the same row as header?
This is the results.
And down here is my coding.
import os
# ...
filename = 'C:/Desktop/GPS_Trial/Trial6/' + str(d1) + '_' + str(file_counter) +'.csv'
#check whether the file exist or not
rows_to_be_written = []
if not os.path.exists(filename):
rows_to_be_written.append(header1)
rows_to_be_written.append(header2)
rows_to_be_written.append(header3)
rows_to_be_written.append(gps)
rows_to_be_written.append(gps2)
rows_to_be_written.append(gps3)
#write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow(rows_to_be_written)
print(gps, gps2, gps3)

You write header with values in one row if it file not exists.
You should write it separately
rows_to_be_written = []
header = None
if not os.path.exists(filename):
header = [header1, header2, header3]
rows_to_be_written.append(gps)
rows_to_be_written.append(gps2)
rows_to_be_written.append(gps3)
# write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f, delimiter=',')
if header:
writer.writerow(header)
writer.writerow(rows_to_be_written)
print(gps, gps2, gps3)
Also may be you tried write rows, but you write only one row with header in it. Then change code like this
rows_to_be_written = []
if not os.path.exists(filename):
rows_to_be_written.append([header1, header2, header3])
rows_to_be_written.append([gps, gps2, gps3])
# write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f, delimiter=',')
for row in rows_to_be_written:
writer.writerow(row)
print(gps, gps2, gps3)

You need to add the headings separately, and only if they are not there already:
# check whether the file exist or not
if not os.path.exists(filename):
headings = [header1, header2, header3]
else:
headings = None
rows_to_be_written = [gps, gps2, gps3]
# write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
# Write headings if exist
if headings != None:
writer.writerow(headings)
# Write rows
writer.writerow(rows_to_be_written)
print(gps, gps2, gps3)
I suggest you consider this approach
# Open file to see if there are headings
with open(filename, "r") as f:
try:
has_headings = csv.Sniffer().has_header(f.read(1024))
except csv.Error:
# The file seems to be empty
has_headings = False
# Open to write. In append mode ("a")
with open(filename, "a") as f:
writer = csv.writer(f)
if has_headings:
# Write the rows at the top
writer.writerow(headings_list)
# Use writerows if youe have a 2D list, else use a for loop of writer.writerow
writer.writerows(lists_of_rows)

How to read a CSV and adapt + write every row to another CSV?

I tried this but it just writes "lagerungskissen kleinkind,44" several times instead of transferring every row.
keyword = []
rank = []
rank = list(map(int, rank))
data = []
with open("keywords.csv", "r") as file:
for line in file:
data = line.strip().replace('"', '').split(",")
keyword = data[0]
rank = data[3]
import csv
with open("mynew.csv", "w", newline="") as f:
thewriter = csv.writer(f)
thewriter.writerow(["Keyword", "Rank"])
for row in keyword:
thewriter.writerow([keyword, rank])
It should look like this

This is writing the same line in your output CSV because the final block is
for row in keyword:
thewriter.writerow([keyword, rank])
Note that the keyword variable doesn't change in the loop, but the row does. You're writing that same [keyword, rank] line len(keyword) times.
I would use the csv package to do the reading and the writing for this. Something like
import csv
input_file = '../keywords.csv'
output_file = '../mynew.csv'
# open the files
fIn = open(input_file, 'r', newline='')
fOut = open(output_file, 'w')
csvIn = csv.reader(fIn, quotechar='"') # check the keyword args in the docs!
csvOut = csv.writer(fOut)
# write a header, then write each row one at a time
csvOut.writerow(['Keyword', 'Rank'])
for row in csvIn:
keyword = row[0]
rank = row[3]
csvOut.writerow([keyword, rank])
# and close the files
fOut.close()
fIn.close()
As as side note, you could write the above using the with context manager (e.g. with open(...) as file:). The answer here shows how to do it with multiple files (in this case fIn and fOut).

Python file matching and appending

This is one file result.csv:
M11251TH1230
M11543TH4292
M11435TDS144
This is another file sample.csv:
M11435TDS144,STB#1,Router#1
M11543TH4292,STB#2,Router#1
M11509TD9937,STB#3,Router#1
M11543TH4258,STB#4,Router#1
Can I write a Python program to compare both the files and if line in result.csv matches with the first word in the line in sample.csv, then append 1 else append 0 at every line in sample.csv?

import pandas as pd
d1 = pd.read_csv("1.csv",names=["Type"])
d2 = pd.read_csv("2.csv",names=["Type","Col2","Col3"])
d2["Index"] = 0
for x in d1["Type"] :
d2["Index"][d2["Type"] == x] = 1
d2.to_csv("3.csv",header=False)
Considering "1.csv" and "2.csv" are your csv input files and "3.csv" is the result you needed

The solution using csv.reader and csv.writer (csv module):
import csv
newLines = []
# change the file path to the actual one
with open('./data/result.csv', newline='\n') as csvfile:
data = csv.reader(csvfile)
items = [''.join(line) for line in data]
with open('./data/sample.csv', newline='\n') as csvfile:
data = list(csv.reader(csvfile))
for line in data:
line.append(1 if line[0] in items else 0)
newLines.append(line)
with open('./data/sample.csv', 'w', newline='\n') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newLines)
The sample.csv contents:
M11435TDS144,STB#1,Router#1,1
M11543TH4292,STB#2,Router#1,1
M11509TD9937,STB#3,Router#1,0
M11543TH4258,STB#4,Router#1,0

With only one column, I wonder why you made it as a result.csv. If it is not going to have any more columns, a simple file read operation would suffice. Along with converting the data from result.csv to dictionary will help in quick run as well.
result_file = "result.csv"
sample_file = "sample.csv"
with open(result_file) as fp:
result_data = fp.read()
result_dict = dict.fromkeys(result_data.split("\n"))
"""
You can change the above logic, in case you have very few fields on csv like this:
result_data = fp.readlines()
result_dict = {}
for result in result_data:
key, other_field = result.split(",", 1)
result_dict[key] = other_field.strip()
"""
#Since sample.csv is a real csv, using csv reader and writer
with open(sample_file, "rb") as fp:
sample_data = csv.reader(fp)
output_data = []
for data in sample_data:
output_data.append("%s,%d" % (data, data[0] in result_dict))
with open(sample_file, "wb") as fp:
data_writer = csv.writer(fp)
data_writer.writerows(output_data)

The following snippet of code will work for you
import csv
with open('result.csv', 'rb') as f:
reader = csv.reader(f)
result_list = []
for row in reader:
result_list.extend(row)
with open('sample.csv', 'rb') as f:
reader = csv.reader(f)
sample_list = []
for row in reader:
if row[0] in result_list:
sample_list.append(row + [1])
else:
sample_list.append(row + [0]
with open('sample.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(sample_list)

How to read list which contains comma from CSV file as a column?

I want to read CSV file which contains following data :
Input.csv-
10,[40000,1][50000,5][60000,14]
20,[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
30,[60000,4]
40,[40000,5][50000,14]
I want to parse this CSV file and parse it row by row. But these lists contains commas ',' so I'm not getting correct result.
Program-Code-
if __name__ == "__main__":
with open(inputfile, "r") as f:
reader = csv.reader(f,skipinitialspace=True)
next(reader,None)
for read in reader:
no = read[0]
splitted_record = read[1]
print splitted_record
Output-
[40000
[40000
[60000
[40000
I can understand read.csv method reads till commas for each column. But how I can read whole lists as a one column?
Expected Output-
[40000,1][50000,5][60000,14]
[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
[60000,4]
[40000,5][50000,14]
Writing stuff to other file-
name_list = ['no','splitted_record']
file_name = 'temp/'+ no +'.csv'
if not os.path.exists(file_name):
f = open(file_name, 'a')
writer = csv.DictWriter(f,delimiter=',',fieldnames=name_list)
writer.writeheader()
else:
f = open(file_name, 'a')
writer = csv.DictWriter(f,delimiter=',',fieldnames=name_list)
writer.writerow({'no':no,'splitted_record':splitted_record})
How I can write this splitted_record without quote ""?

you can join those items together, since you know it split by comma
if __name__ == "__main__":
with open(inputfile, "r") as f:
reader = csv.reader(f,skipinitialspace=True)
next(reader,None)
for read in reader:
no = read[0]
splitted_record = ','.join(read[1:])
print splitted_record
output
[40000,1][50000,5][60000,14]
[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
[60000,4]
[40000,5][50000,14]
---update---
data is the above output
with open(filepath,'wb') as f:
w = csv.writer(f)
for line in data:
w.writerow([line])

You can use your own dialect and register it to read as you need.
https://docs.python.org/2/library/csv.html

Attempting to merge three columns in CSV, updating original CSV

Some example data:
title1|title2|title3|title4|merge
test|data|here|and
test|data|343|AND
",3|data|343|and
My attempt at coding this:
import csv
import StringIO
storedoutput = StringIO.StringIO()
fields = ('title1', 'title2', 'title3', 'title4', 'merge')
with open('file.csv', 'rb') as input_csv:
reader = csv.DictReader(input_csv, fields, delimiter='|')
for counter, row in enumerate(reader):
counter += 1
#print row
if counter != 1:
for field in fields:
if field == "merge":
row['merge'] = ("%s%s%s" % (row["title1"], row["title3"], row["title4"]))
print row
storedoutput.writelines(','.join(map(str, row)) + '\n')
contents = storedoutput.getvalue()
storedoutput.close()
print "".join(contents)
with open('file.csv', 'rb') as input_csv:
input_csv = input_csv.read().strip()
output_csv = []
output_csv.append(contents.strip())
if "".join(output_csv) != input_csv:
with open('file.csv', 'wb') as new_csv:
new_csv.write("".join(output_csv))
Output should be
title1|title2|title3|title4|merge
test|data|here|and|testhereand
test|data|343|AND|test343AND
",3|data|343|and|",3343and
For your reference upon running this code the first print it prints the rows as I would hope then to appear in the output csv. However the second print prints the title row x times where x is the number of rows.
Any input or corrections or working code would be appreciated.

I think we can make this a lot simpler. Dealing with the rogue " was a bit of a nuisance, I admit, because you have to work hard to tell Python you don't want to worry about it.
import csv
with open('file.csv', 'rb') as input_csv, open("new_file.csv", "wb") as output_csv:
reader = csv.DictReader(input_csv, delimiter='|', quoting=csv.QUOTE_NONE)
writer = csv.DictWriter(output_csv, reader.fieldnames, delimiter="|",quoting=csv.QUOTE_NONE, quotechar=None)
merge_cols = "title1", "title3", "title4"
writer.writeheader()
for row in reader:
row["merge"] = ''.join(row[col] for col in merge_cols)
writer.writerow(row)
produces
$ cat new_file.csv
title1|title2|title3|title4|merge
test|data|here|and|testhereand
test|data|343|AND|test343AND
",3|data|343|and|",3343and
Note that even though you wanted the original file updated, I refused. Why? It's a bad idea, because then you can destroy your data while working on it.
How can I be so sure? Because that's exactly what I did when I first ran your code, and I know better. ;^)

That double quote in the last line is definitely messing up the csv.DictReader().
This works:
new_lines = []
with open('file.csv', 'rb') as f:
# skip the first line
new_lines.append(f.next().strip())
for line in f:
# the newline and split the fields
line = line.strip().split('|')
# exctract the field data you want
title1, title3, title4 = line[0], line[2], line[3]
# turn the field data into a string and append in to the rest
line.append(''.join([title1, title3, title4]))
# save the new line for later
new_lines.append('|'.join(line))
with open('file.csv', 'w') as f:
# make one long string and write it to the new file
f.write('\n'.join(new_lines))

import csv
import StringIO
stored_output = StringIO.StringIO()
with open('file.csv', 'rb') as input_csv:
reader = csv.DictReader(input_csv, delimiter='|', quoting=csv.QUOTE_NONE)
writer = csv.DictWriter(stored_output, reader.fieldnames, delimiter="|",quoting=csv.QUOTE_NONE, quotechar=None)
merge_cols = "title1", "title3", "title4"
writer.writeheader()
for row in reader:
row["merge"] = ''.join(row[col] for col in merge_cols)
writer.writerow(row)
contents = stored_output.getvalue()
stored_output.close()
print contents
with open('file.csv', 'rb') as input_csv:
input_csv = input_csv.read().strip()
if input_csv != contents.strip():
with open('file.csv', 'wb') as new_csv:
new_csv.write("".join(contents))

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

CSV with a subset of columns - python

You need to edit your writerow. writer.writerow([item[0]]+[item[4]])

This worked for me import csv def filter_columns (x): with open(x, 'r') as f: reader = csv.reader(f) with open ('museum.csv', 'w') as g: writer = csv.writer(g) for line in reader: writer.writerow((line[0], str(line[2])))

Related

Data is adjacent to headers in csv

How to read a CSV and adapt + write every row to another CSV?

Python file matching and appending

How to read list which contains comma from CSV file as a column?

Attempting to merge three columns in CSV, updating original CSV

Categories

Resources