and thank you in advance for your help. I have this code so far :
with open("clean_result.csv", "r", encoding="utf-8", errors="ignore") as
new_data:
reader = csv.reader(new_data, delimiter=',', quotechar='"')
for row in reader:
if row:
columns = [row[0], row[1]]
init_dict.append(columns)
for ean, price in init_dict:
result[ean].append(price)
And then I get the min value for each price with this line :
maxitems = {ean : min(result[ean]) for ean in result}
Current Output : {'8714789828558': '5,51', '3326100000182': '15,00', '3286010016683': '3,93' (...) }
What I would like to is add row[2] and get additionnal info, but only for the minimum price value.
Desired Output : {'8714789828558': '5,51', 'A', '3326100000182': '15,00', 'B' '3286010016683': '3,93', 'C' (...) }
I tried this :
for row in reader:
if row:
columns = [row[0], row[1], row[2]]
init_dict.append(columns)
for ean, price, desc in init_dict:
result[ean].append(price)
result[ean].append(desc)
maxitems = {ean : min(result[ean]) for ean in result}
But Output is like this. Half data are missing :
{'8714789828558': 'A', '3326100000182': 'B' '3286010016683': 'C' (...) }
I probably misunderstand something so please any help appreciated
from operator import itemgetter
from collections import defaultdict
result = defaultdict(list)
for row in reader:
if row:
result[row[0]].append((row[1], row[2]))
minitems = {ean : min(prices, key = itemgetter(0)) for ean, prices in result.iteritems()}
Related
how to find Total number of duplicate rows in to file and how to write python code
import csv
csv_data = csv.reader(file('T:\DataDump\Book1.csv'))
next(csv_data)
already_seen = set()
for row in csv_data:
Address = row[6]
if Address in already_seen:
print('{} is a duplicate Address'.format(Address))
else:
print('{} is a unique Address'.format(Address))
already_seen.add(Address)
Try using pandas instead of the csv module
import pandas as pd
csv_data = pd.read_csv('T:/DataDump/Book1.csv')
shape_original = csv_data.shape
print(f"Number of rows: {shape_original[0]}")
#Below how to drop duplicates
csv_data_no_duplicates = csv_data.drop_duplicates(keep="first")
shape_new = csv_data_no_duplicates.shape
print(f"Number of rows: {shape_new[0]}")
number_duplicates = shape_original[0] - shape_new[0]
I did with this example to try if it works:
thisdict = {
"brand": ["Ford","Renault","Ford"],
"model": ["Mustang","Laguna","Mustang"],
"year": ["1964","1978","1964"]
}
data = pd.DataFrame.from_dict(thisdict)
data_no_duplicates = data.drop_duplicates(keep="first")
print(data_no_duplicates.head())
I have a CSV, OutputA with format:
Position,Category,Name,Team,Points
1,A,James,Team 1,100
2,A,Mark,Team 2,95
3,A,Tom,Team 1,90
I am trying to get an output of a CSV which gets the total points for each team, the average points per team and the number of riders.
So output would be:
Team,Points,AvgPoints,NumOfRiders
Team1,190,95,2
Team2,95,95,1
I have this function to convert each row to a namedtuple:
fields = ("Position", "Category", "Name", "Team", "Points")
Results = namedtuple('CategoryResults', fields)
def csv_to_tuple(path):
with open(path, 'r', errors='ignore') as file:
reader = csv.reader(file)
for row in map(Results._make, reader):
yield row
Then this sorts the rows into a sorted list by there club:
moutputA = sorted(list(csv_to_tuple("Male/outputA.csv")), key=lambda k: k[3])
This returns a list like:
[CategoryResults(Position='13', Category='A', Name='Marek', Team='1', Points='48'), CategoryResults(Position='7', Category='A', Name='', Team='1', Points='70')]
I am confident that this so far is right although I could be wrong.
I am trying to create a new list of teams with the points (not yet added up).
For example:
[Team 1(1,2,3,4,5)]
[Team 2 (6,9,10)]
etc.
The idea is that I can find how many unique values of points there are (this equals the number of riders). However, when trying to group the list I have this code:
Clubs = []
Club_Points = []
for Names, Club in groupby(moutputA, lambda x: x[3]):
for Teams in Names:
Clubs.append(list(Teams))
for Club, Points in groupby(moutputA, lambda x: x[4]):
for Point in Clubs:
Club_Points.append(list(Point))
print(Clubs)
but this retuns this error:
Teams.append(list(Team))
AttributeError: 'itertools._grouper' object has no attribute 'append'
If data.csv contains:
Position,Category,Name,Team,Points
1,A,James,Team 1,100
2,A,Mark,Team 2,95
3,A,Tom,Team 1,90
Then this script:
import csv
from collections import namedtuple
from itertools import groupby
from statistics import mean
fields = ("Position", "Category", "Name", "Team", "Points")
Results = namedtuple('CategoryResults', fields)
def csv_to_tuple(path):
with open(path, 'r', errors='ignore') as file:
next(file) # skip header
reader = csv.reader(file)
for row in map(Results._make, reader):
yield row
moutputA = sorted(csv_to_tuple("data.csv"), key=lambda k: k.Team)
out = []
for team, group in groupby(moutputA, lambda x: x.Team):
group = list(group)
d = {}
d['Team'] = team
d['Points'] = sum(int(i.Points) for i in group)
d['AvgPoints'] = mean(int(i.Points) for i in group)
d['NumOfRider'] = len(group)
out.append(d)
with open('data_out.csv', 'w', newline='') as csvfile:
fieldnames = ['Team', 'Points', 'AvgPoints', 'NumOfRider']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in out:
writer.writerow(row)
Produces data_out.csv:
Team,Points,AvgPoints,NumOfRider
Team 1,190,95,2
Team 2,95,95,1
Screenshot from LibreOffice:
Here's a start. You should be able to figure out how to get what you want from this.
import csv, io
from collections import namedtuple
from itertools import groupby
data = '''\
Position,Category,Name,Team,Points
1,A,James,Team 1,100
2,A,Mark,Team 2,95
3,A,Tom,Team 1,90
'''
b = io.StringIO(data)
next(b)
fields = ("Position", "Category", "Name", "Team", "Points")
Results = namedtuple('CategoryResults', fields)
def csv_to_tuple(file):
reader = csv.reader(file)
for row in map(Results._make, reader):
yield row
rows = sorted(list(csv_to_tuple(b)), key=lambda k: k[3])
for TeamName, TeamRows in groupby(rows, lambda x: x[3]):
print(TeamName)
TeamPoints = [row.Points for row in TeamRows]
print(TeamPoints)
print()
All of this would be made easier by just using pandas. Check out the code below.
import pandas as pd
import numpy as np
df = pd.read_csv(input_path)
teams = list(set(df['Team'])) # unique list of all the teams
num_teams = len(teams)
points = np.empty(shape=num_teams)
avg_points = np.empty(shape=num_teams)
num_riders = np.empty(shape=num_teams)
for i in range(num_teams):
# find all rows where the entry in the 'Team' column
# is the same as teams[i]
req = df.loc[df['Team'] == teams[i]]
points[i] = np.sum(req['Points'])
num_riders[i] = len(req)
avg_points[i] = point[i]/num_riders[i]
dict_out = {
'Team':teams,
'Points':points,
'AvgPoints':avg_points,
'NumOfRiders':num_riders
}
df_out = pd.DataFrame(data=dict_out)
df_out.to_csv(output_path)
Background
I am storing data in dictionaries. The dictionaries can be off different length and in a particular dictionary there could be keys with multiple values. I am trying to spit out the data on a CSV file.
Problem/Solution
Image 1 is how my actual output prints out. Image 2 shows how i would want my output to actually printout. Image 2 is the desired output.
CODE
import csv
from itertools import izip_longest
e = {'Lebron':[25,10],'Ray':[40,15]}
c = {'Nba':5000}
def writeData():
with open('file1.csv', mode='w') as csv_file:
fieldnames = ['Player Name','Points','Assist','Company','Total Employes']
writer = csv.writer(csv_file)
writer.writerow(fieldnames)
for employee, company in izip_longest(e.items(), c.items()):
row = list(employee)
row += list(company) if company is not None else ['', ''] # Write empty fields if no company
writer.writerow(row)
writeData()
I am open to all solutions/suggestions that can help me get my desired output format.
For a much simpler answer, you just need to add one line of code to what you have:
row = [row[0]] + row[1]
so:
for employee, company in izip_longest(e.items(), c.items()):
row = list(employee)
row = [row[0]] + row[1]
row += list(company) if company is not None else ['', ''] # Write empty fields if no company
from collections import defaultdict
values = defaultdict(dict)
values[Name1] = {Points: [], Assist: [], Company: blah, Total_Employees: 123}
for generating the output, traverse through each item in the values to give you names, and populate other values using the key_values in the nested dict.
Again, make sure that there no multiple entries with same name, or choose the one with unique entries in the defaultdict.
Demo for the example-
>>> from collections import defaultdict
>>> import csv
>>> values = defaultdict(dict)
>>> vals = [["Lebron", 25, 10, "Nba", 5000], ["Ray", 40, 15]]
>>> fields = ["Name", "Points", "Assist", "Company", "Total Employes"]
>>> for item in vals:
... if len(item) == len(fields):
... details = dict()
... for j in range(1, len(fields)):
... details[fields[j]] = item[j]
... values[item[0]] = details
... elif len(item) < len(fields):
... details = dict()
... for j in range(1, len(fields)):
... if j+1 <= len(item):
... details[fields[j]] = item[j]
... else:
... details[fields[j]] = ""
... values[item[0]] = details
...
>>> values
defaultdict(<class 'dict'>, {'Lebron': {'Points': 25, 'Assist': 10, 'Company': 'Nba', 'Total Employes': 5000}, 'Ray': {'Points': 40, 'Assist': 15, 'Company': '', 'Total Employes': ''}})
>>> csv_file = open('file1.csv', 'w')
>>> writer = csv.writer(csv_file)
>>> for i in values:
... row = [i]
... for j in values[i]:
... row.append(values[i][j])
... writer.writerow(row)
...
23
13
>>> csv_file.close()
Contents of 'file1.csv':
Lebron,25,10,Nba,5000
Ray,40,15,,
I use the csv library to create a table of products.
In order to then import it to the site, I need that each characteristic be written in a separate column.
Adding a new row is done using simple loop:
writer = csv.writer(csvfile)
for product in products:
writer.writerow((product['price'],
product['vendor_code'],
product['characteristics']))
Adding a new product:
product = []
product.append({
'price' : price,
'vendor_code' : vendor_code,
'characteristics' : characteristics,
})
characteristics - array that contains each characteristic as a separate element
How do I get the output file in this form:
190$ #0172 characteristic1 characteristic2 characteristic3
characteristics - initialization:
try:
characteristics = []
soup_characteristics = soup.find_all('tr', {'class' : 'product_card__product_characters_item clearfix'})
for ch in soup_characteristics:
characteristics.append(re.sub('\s\s+|\n',' ', ch.text))
except AttributeError:
characteristics = ""
Try unpacking the characteristic array:
for product in products:
writer.writerow((product['price'],
product['vendor_code'],
*product['characteristics']))
Here is the code I tested:
products = [{
'price': 100,
'vendor': 123,
'characters': [7, 8, 9],
}]
with open('test.csv', 'w') as fo:
writer = csv.writer(fo)
for p in products:
writer.writerow((
p['price'],
p['vendor'],
*p['characters'],
))
Here is the content of the test.csv file:
100,123,7,8,9
You should be able to build a list to write as an entire row:
for product in products:
row = [product['price'],product['vendor_code']] # [price,vendor_code]
row.extend(product['characteristics']) # [price,vendor_code,characteristic1,characteristic2,...]
writer.writerow(row) # writes each value in the list as a new column
I am working on a function to pull out of CSV specific rows. Every CSV row has a unique ID that identifies it to the function. Some IDs are missing. I want to somehow find after iterating these invalid IDs.
Example:
(a sample CSV db_short.csv with rows 1-52 and then 99)
import csv
def get_row(csvfile, row_id):
with open(csvfile, 'rb') as csvfile:
newfile = csv.DictReader(csvfile, delimiter=',', quotechar='|')
somevalue = 'default'
for row in newfile:
if row['id'] == str(row_id):
somevalue = 'id = {}'.format(row['id'])
else:
pass
return somevalue
db = "db_short.csv"
flatlist = [1, 18, 42, 51, 53, 99]
new_entries = []
for i in flatlist:
new_entries.append(get_row(db, i))
print new_entries
Note that flatlist includes a deliberately missing ID 53. This code predictably produces output where search for 'id' : 53' returns 'default'.
['id = 1', 'id = 18', 'id = 42', 'id = 51', 'default', 'id = 99']
I would however like to replace somevalue = 'default' with, say, a customized message alerting to a missing ID, that will only appear if DictReader went through the whole CSV and did not find any row that contains 'id' : '53' -- .
somevalue = '{} id missing!'.format(row_id)
So how do I have to change my code?