python to list a file - python

This script is actually not working with the desired input
script:
import csv
file1 = csv.reader(open("1.csv"))
file2 = csv.reader(open("2.csv"))
file3 = open("3.csv", "w")
k, l = list(file1),list(file2)
length_file1 = len(k)
length_file2 = len(l)
n = []
file3.write(",".join(str(i) for i in l[0])+'\n')
for i in xrange(1, length_file1):
arr = k[i][1]
for j in xrange(1, length_file2):
arr2 = l[j][1]
if arr == arr2:
l[j][0] = k[i][0]
print l[j]
n.append(l[j])
file3.write(",".join(str(i) for i in l[j])+'\n')
so i want the code to be replaced

You can create a dictionary with the key:value pairs from 1.csv and use compare each value in 2.csv with the keys from the dictionary. This is using Python3, there is no need to use range and xrange here, you can iterate over the lists directly.
import csv
with open("2.csv", 'r') as f:
file2 = csv.reader(f)
file2 = [j for _,j in file2] # This is to remove the blank item at the start of each row
with open("1.csv", 'r') as f:
file1 = csv.reader(f)
file1 = {i:j for j,i in file1}
toWrite = []
for i in file2:
if i in file1.keys():
toWrite.append("{},{}".format(file1[i],i))
with open("bdsp_updated.csv", "w") as f:
f.write('\n'.join(toWrite))
Content of bdsp_updated.csv:
1,99277050
10,92782013
2,71269815
3,99724582
7,92043333
4,92011116
8,99799635

Related

Store each file in a sublist based on subfolder

There is a list_1 which has paths of many subfolders.
list_1
which gives:
['C:\\Users\\user\\Downloads\\problem00001\\ground_truth.json',
'C:\\Users\\user\\Downloads\\problem00002\\ground_truth.json',
'C:\\Users\\user\\Downloads\\problem00003\\ground_truth.json']
Purpose
In gt2 list there should be a sublist for the json file from problem1. Then another sublist for the json from problem2 and so on.
The attempted code below stores all the json files in the gt2 list.
gt2=[]
for k in list_1:
with open(k, 'r') as f:
gt = {}
for i in json.load(f)['ground_truth']:
gt[i['unknown-text']] = i['true-author']
gt2.append(gt)
The end result should be: inside the gt2 list to have 3 sublists:
one for the file from problem1,
another from problem2 and
another from problem3
Assuming the list is sorted, use enumerate over list_1 & the make gt2 as dict to store the json data.
gt2 = {}
for k, v in enumerate(list_1):
gt = {}
with open(v, 'r') as f:
for i in json.load(f):
gt[i['unknown-text']] = i['true-author']
gt2[f'problem{k + 1}'] = gt
# access values of dict here
print(gt2['problem1'])
Edit
gt2 = []
for fi in list_1:
with open(fi, 'r') as f:
gt2.append([
{i['unknown-text']: i['true-author']} for i in json.load(f)
])

count instances of duplicates in a colum of a .csv file

import re, csv
import os,shutil
import io,json, collections
from collections import Counter, defaultdict,deque
sn=0 #1st column
p_f=1 #2nd column
reader = csv.reader(open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv", "r"), delimiter='\t')
f= csv.writer(open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/final.csv", "w"))
g=open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv",'r')
with open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv", 'r') as infh:
data = csv.reader(infh)
next(data) # skip header
seen = defaultdict(set)
counts = Counter(
row[sn]
for row in data
if row[sn] and row[p_f] not in seen[row[sn]] and not seen[row[sn]].add(row[sn])
)
print(counts.most_common())
#want to count instances of the number 2 in [('VFGRP15040030', 2), ('VFGRP15370118', 2), ('VFGRP15150113', 2)]
x=len(list(csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv'))))
print('# of rows including header=');print(x)
count_pass = sum(1 for row in csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv')) if row[1] =='pass')
print('# of passes=');print(count_pass)
count_fail = sum(1 for row in csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv')) if row[1] =='fail')
print('# of fails=');print(count_fail)
#count_retest = ??
g.close
#f.close
# to get duplicates and their frequency for a column
from collections import Counter
from operator import itemgetter
with open('data.csv', 'r', newline='') as f:
r = csv.reader(f)
# here we take as example column number 1
cn = Counter(map(itemgetter(1), r))
# print item that appears more than once in the column
for k, v in cn.items():
if v > 1:
print(k,v)

Python list write to CSV without the square brackets

I have this main function:
def main():
subprocess.call("cls", shell=True)
ipList,hostList,manfList,masterList,temp = [],[],[],[],[]
ipList,hostList,manfList, = getIPs(),getHosts(),getManfs()
entries = len(hostList)
i = 0
for i in xrange(i, entries):
temp = [[hostList[i]],[manfList[i]],[ipList[i]]]
masterList.append(temp)
with open("output.csv", "wb") as f:
writer = csv.writer(f, delimiter=',')
writer.writerows(masterList)
My current output is that it successfully writes to CSV but my objective is to remove the square brackets.
I tried using .join() method however I understand that it only takes single lists and not nested lists.
How can I achieve this given that I'm using a 3 dimensional list? Note, I intend to add more columns of data in the future.
Edit:
My current output for 1 row is similar to:
['Name1,'] ['Brand,'] ['1.1.1.1,']
I would like it to be:
Name1, Brand, 1.1.1.1,
Try to remove bracket for values in temp while creating masterList, because it will be nested list. So, the code should be:
def main():
subprocess.call("cls", shell=True)
ipList,hostList,manfList,masterList,temp = [],[],[],[],[]
ipList,hostList,manfList, = getIPs(),getHosts(),getManfs()
entries = len(hostList)
i = 0
for i in xrange(i, entries):
temp = [hostList[i], manfList[i], ipList[i]]
masterList.append(temp)
with open("output.csv", "wb") as f:
writer = csv.writer(f, delimiter=',')
writer.writerows(masterList)
What you could do is strip a string of the data maybe?
import string
writer.writerows(str(masterList).translate(string.maketrans('', ''), '[]\'')
E.g.
>>> import string
>>> temp = [['1.1.1'], ['Name1'], ['123']]
>>> str(temp).translate(string.maketrans('', ''), '[]\'')
'1.1.1, Name1, 123'
In Python 3.6:
>>> temp = [['1.1.1'], ['Name1'], ['123']]
>>> str(temp).translate({ord('['): '', ord(']'): '', ord('\''): ''})
'1.1.1, Name1, 123'
Try to change this:
temp = [[hostList[i]],[manfList[i]],[ipList[i]]]
to this:
temp = [hostList[i],manfList[i],ipList[i]]
I agree with the answers above, about the brackets removal, however if this is crucial to you for some reason, here is a function that takes a list as an input and returns you a csv row acceptable list.
def output_list(masterList):
output = []
for item in masterList:
if isinstance(item,list): #if item is a list
for i in output_list(item): #call this function on it and append its each value separately. If it has more lists in it this function will call itself again
output.append(i)
else:
output.append(item)
return output
You can use it in the line masterList.append(temp) as masterList.append(output_list(temp)), or even like this:
#in the end
with open("output.csv", "wb") as f:
writer = csv.writer(f, delimiter=',')
for i in masterList:
writer.writerow(output_list(i))

changing given text to list of lists

I have the following text in a given file:
1234,A,7.99,10.3,12.8,101,0.11843,0.27276,0.30101
87635,B,19.69,21.25,130,1203,0.1096,0.1599,0.1974
First, I want to get rid of the 1234 and 87635 in the front, and I also want to change A into the integer "1" and B into the integer "0".
This is my code:
def convert(file):
data = open(file, 'r')
list1 = []
for line in data:
line_data = line.strip().split(',')
if line_data[0] == "B":
line_data[0] = 0
else:
line_data[0] = 1
for i in range(len(line)):
datalist.append(line)
list1 = np.array(list1), float
data.close()
return list1
This is the output I want:
[[1234,A,7.99,10.3,12.8,101,0.11843,0.27276,0.30101], [87635,B,19.69,21.25,130,1203,0.1096,0.1599,0.1974]]
The output I'm currently getting is a list of strings, instead of the list of lists.
You are getting a list of strings , because you are appending to the list as -
datalist.append(line)
This appends the original line , not the changed line_data . Also, all the elements in line_data would still be strings (expect for the first element) , as you are never converting them to int.
This easiest here would be to use csv module , parse the file as csv and then pop the first element from each row that is returned and then change the second element (now first element after removing , as you want). Example -
def convert(file):
import csv
lst = []
with open('<filename>','r') as f:
reader = csv.reader(f)
for row in reader:
row.pop(0)
if row[0] == 'B':
row[0] = 0
else:
row[0] = 1
lst.append(list(map(float, row)))
return lst
Example/Demo -
My a.csv -
1234,A,7.99,10.3,12.8,101,0.11843,0.27276,0.30101
87635,B,19.69,21.25,130,1203,0.1096,0.1599,0.1974
Code and Result -
>>> import csv
>>> with open('a.csv','r') as f:
... reader = csv.reader(f)
... lst = []
... for row in reader:
... row.pop(0)
... if row[0] == 'B':
... row[0] = 0
... else:
... row[0] = 1
... lst.append(list(map(float, row)))
...
'1234'
'87635'
>>> lst
[[1.0, 7.99, 10.3, 12.8, 101.0, 0.11843, 0.27276, 0.30101], [0.0, 19.69, 21.25, 130.0, 1203.0, 0.1096, 0.1599, 0.1974]]

Python dictionary created from CSV file should merge the value (integer) whenever the key repeats

I have a file named report_data.csv that contains the following:
user,score
a,10
b,15
c,10
a,10
a,5
b,10
I am creating a dictionary from this file using this code:
with open('report_data.csv') as f:
f.readline() # Skip over the column titles
mydict = dict(csv.reader(f, delimiter=','))
After running this code mydict is:
mydict = {'a':5,'b':10,'c':10}
But I want it to be:
mydict = {'a':25,'b':25,'c':10}
In other words, whenever a key that already exists in mydict is encountered while reading a line of the file, the new value in mydict associated with that key should be the sum of the old value and the integer that appears on that line of the file. How can I do this?
The most straightforward way is to use defaultdict or Counter from useful collections module.
from collections import Counter
summary = Counter()
with open('report_data.csv') as f:
f.readline()
for line in f:
lbl, n = line.split(",")
n = int(n)
summary[lbl] = summary[lbl] + n
One of the most useful features in Counter class is the most_common() function, that is absent from the plain dictionaries and from defaultdict
This should work for you:
with open('report_data.csv') as f:
f.readline()
mydict = {}
for line in csv.reader(f, delimiter=','):
mydict[line[0]] = mydict.get(line[0], 0) + int(line[1])
try this.
mydict = {}
with open('report_data.csv') as f:
f.readline()
x = csv.reader(f, delimiter=',')
for x1 in x:
if mydict.get(x1[0]):
mydict[x1[0]] += int(x1[1])
else:
mydict[x1[0]] = int(x1[1])
print mydict

Categories