I have a code to create a CSV with information from another CSV file. In my new CSV file, I would like to save only 20 rows sorted from highest to lowest of row ['impressions']
I read something about pandas but I don't find anything about how to do it!
To be more clear, I shared some images:
before:
enter image description here
after:
enter image description here
Code:
import csv
input_file = 'report_2017_12_11_12_31_19UTC.csv'
output_file= "All_Data_Tags.csv"
with open(input_file) as csvfile, open(output_file, "w") as output:
reader = csv.DictReader(csvfile)
cols = ("domain","ddomain","opportunities", "impressions", "fillRate", "DATA")
writer = csv.DictWriter(output, fieldnames=cols, extrasaction='ignore')
writer.writeheader()
for row in reader:
row['fillRate'] = '{:.2f}'.format(float(row['fillRate']) * 100)
if row['ddomain'] == "":
if row['domain'] == "":
row['ddomain'] = "App"
row['domain'] = " "
if row['domain'] == row['ddomain']:
row['domain'] = "Real Site"
if row['domain'] == "":
row['domain'] = "Detected Only"
if row['ddomain'] == "":
row['ddomain'] = "Vast Media"
if row['ddomain'] != row['domain']:
if row['ddomain'] != "Vast Media":
if row['domain'] != "Real Site":
if row['domain'] != "Detected Only":
if row['ddomain'] != "App":
row['DATA'] = "FAKE"
else:
row['DATA'] = "OK"
else:
row['DATA'] = "OK"
else:
row['DATA'] = "OK"
else:
row['DATA'] = "OK"
writer.writerow(row)
Here is the Answer:
code:
import pandas as pd
movies = pd.read_csv('Top20_Media_Yesterday.csv')
movies = movies.sort_values(['impressions'], ascending=False)
movies = movies.to_csv("Top20_Media_Yesterday.csv")
movies = pd.read_csv('Top20_Media_Yesterday.csv', nrows=21)
movies = movies.to_csv("Top20_Media_Yesterday.csv")
Use the DataFrame.sort_values function of the pandas framework, passing the column name(s),you wish to sort, to the by argument and setting axis to 1.
You can find similar examples here.
Related
I'm trying to end the code, but I have problem how to slice data frame based on user's input. Is there any option to do this without pandas?
def dataSet_read():
enter = input('Enter file path:')
csvreader = csv.reader(open(enter))
head_inp = input('Has the file headers? Select Y or N:\n').upper()
header = []
if head_inp == 'Y':
header = next(csvreader)
print('\nFile headers:\n\n', header)
elif head_inp == 'N':
print("'\nFile doesn't have headers")
else:
print('Incorrect selection!!!')
sys.exit()
with open(str(enter), "r") as csvfile:
reader_variable = csv.reader(csvfile, delimiter = ",")
rows_inp = input("\nPlease provide range which you'd like to see using ',', otherwise all dataframe will open all dataset.\n")
if rows_inp == '':
for row in reader_variable:
print(row)
else:
print("????")
cast it to list then you can slice like what it is in normal list structure.
enter = input('Enter file path:')
rows_inp = input("slice")
with open(enter , 'r') as f:
reader_variable = csv.reader(f)
reader_list= list(reader_variable)
for row in reader_list[:rows_inp]:#if you want slice the whole data
current_date = row[:rows_inp] #if you want slice per row
print(current_date)
I found the way to get what I need, maybe it's not the best approach but works :)
with open(str(enter), "r") as csvfile:
reader_variable = csv.reader(csvfile, delimiter = ",")
rows_inp = input("\nPlease provide range which you'd like to see using ',', otherwise all dataframe will open all dataset.\n")
if rows_inp == '':
for row in reader_variable:
print(row)
else:
i, j = rows_inp.split(',')
reader_list = list(reader_variable)
print(reader_list[int(i):int(j)+1])
I am beginning to learn python and I have this project where I have a menu that either adds, lists, or updates values for books in a csv. The headers are "BookName", "AuthorName", "SharedWith", "IsRead" and I am trying to change the "IsRead" row for a given book added by the user.
My problem is that whenever I am trying to edit the row that contains that book, I end up deleting all other rows instead. All I want to do is to update a value for a certain row in a csv. Here's the function that I wrote.
def updateBook():
book_name = input("Enter book name: ")
import csv
rows_list = []
with open('booksDB.csv', mode='r') as file:
rows = list(csv.DictReader(file, fieldnames=("BookName", "AuthorName", "SharedWith", "IsRead")))
for row in rows:
rows_list.append(row["BookName"]) # we store every book name in a list
if book_name not in rows_list: # we search the book the user typed in our list
add_new_book = input(f' The {book_name} book does not exits. Would you like to add it? (Y/N)? ')
if add_new_book.upper() == "N":
return
else:
addBook()
return
else:
book_read = input("Is the book read? (Y/N)? ")
if book_read.upper() == 'Y':
book_read = True
else:
book_read = False
rows = []
with open('booksDB.csv', mode='r') as file:
rows = list(csv.DictReader(file, fieldnames=("BookName", "AuthorName", "SharedWith", "IsRead")))
for row in rows:
if row["BookName"] == book_name:
row["IsRead"] = book_read
break
with open('booksDB.csv',mode='r+') as file: # WIP, here I can't make the csv keep all books in the list when editing a certain one
csv_writer = csv.DictWriter(file, fieldnames=[
"BookName", "AuthorName", "SharedWith", "IsRead"
])
if row["IsRead"] == book_read:
csv_writer.writerow({"BookName": row.get("BookName"),
"AuthorName": row.get("AuthorName"),
"SharedWith": row.get("SharedWith"),
"IsRead": book_read})
print("Book was updated successfully")
The First Thing You Should Open Your CSV File In a 'w' Mode Not 'r'
After I Looked at your Code You Didn't Respect The Indentation For your Function Try To Fix it First
Did You Try To Give Input First
roww = input("Enter The Name Of The Row Here : ")
and After use it :
for row in rows:
if row["BookName"] == book_name:
row[rf"{roww}"] = book_read
break
Try This it Might Help
Thank you guys for your support. I made a small tweak and managed to get the code working 100% with the following lines:
def updateBook():
book_name = input("Enter book name: ")
import csv
rows = []
rows_list = []
with open('booksDB.csv', mode='r') as file:
rows = list(csv.DictReader(file, fieldnames=("BookName", "AuthorName", "SharedWith", "IsRead")))
for row in rows:
rows_list.append(row["BookName"]) # we store every book name in a list
if book_name not in rows_list: # we search the book the user typed in our list
add_new_book = input(f' The {book_name} book does not exits. Would you like to add it? (Y/N)? ')
if add_new_book.upper() == "N":
return
else:
addBook()
return
else:
book_read = input("Is the book read? (Y/N)? ")
if book_read.upper() == 'Y':
book_read = True
else:
book_read = False
rows = []
with open('booksDB.csv', mode='r') as file:
rows = list(csv.DictReader(file, fieldnames=("BookName", "AuthorName", "SharedWith", "IsRead")))
for row in rows:
if row["BookName"] == book_name:
row["IsRead"] = book_read
break
with open('booksDB.csv',mode='w') as file:
csv_writer = csv.DictWriter(file, fieldnames=["BookName", "AuthorName", "SharedWith", "IsRead"])
csv_writer.writerows(rows)
print("Book was updated successfully!")
I have a CSV file that has several columns. Two of the columns are called Namn (Name) (Product) and Alla bilder (All images).
Some of the products have several images. These images are inserted on their own row under the "All images":
Example:
Name All images
Name1 Image1
Name2 Image2
Image2-1
Image2-2
Name3 Image3
I am trying to get all images belonging to one product to be inserted in the first image-row and seperated by a |. Here's the code I've managed with help from a friend to make, but I just don't get it to work. (I downloaded Python, and ran the .py script) - Nothing happens. Please note I have never touched Python before.
import codecs, csv
def main():
file_input = codecs.open('test.csv', 'r', 'utf-8-sig')
dictreader = csv.DictReader(file_input, delimiter=',')
main_rows = []
fields = []
temp_row = {}
for row_item in dictreader:
if len(fields) == 0:
# fix the fields header...
for field in row_item:
fields.append(field)
if row_item['Name'] == '':
# this row probably only contains picture information...
if 'Alla bilder' in row_item and row_item['Alla bilder'] != '':
if 'Alla bilder' not in temp_row:
temp_row['Alla bilder'] = row_item['Alla bilder']
elif temp_row['Alla bilder'] == '':
temp_row['Alla bilder'] = row_item['Alla bilder']
else:
temp_row['Alla bilder'] = temp_row['Alla bilder'] + '|' + row_item['Alla bilder']
else:
# this seems to be a new product row...
if len(temp_row) != 0:
# there seems to be something to store....
print('\nSTORING: ' + str(temp_row))
main_rows.append(temp_row)
temp_row = {}
# print('\nNEW ROW: ' + str(row_item))
temp_row = row_item
if len(temp_row) != 0:
# there seems to be something to store....
print('\nSTORING: ' + str(temp_row))
main_rows.append(temp_row)
file_output = codecs.open('test2.csv', 'w', 'utf-8-sig')
dictwriter = csv.DictWriter(file_output, delimiter=',', fieldnames=fields)
dictwriter.writerows(main_rows)
if _name_ == "_main_":
main()
else:
print('This file was called from another class...')
What error or message gives you when you start the script from the console?
Try changing
if _name_ == "_main_":
main()
else:
print('This file was called from another class...')
For
if __name__ == "__main__":
main()
else:
print('This file was called from another class...')
It's the only error I see (if I'm not wrong, it's __name__, not _name_). Changing that works for me. Using this input:
Name,Alla bilder
Name1,Image1
Name2,Image2
,Image2-1
,Image2-2
Name3,Image3
I get the next result:
Name1,Image1
Name2,Image2|Image2-1|Image2-2
Name3,Image3
Is that your problem?
I want to optimize the following code:
for myFile in myFiles:
file = open(filename, 'rt')
try:
if CLIENT == "C1":
head = rows[:7]
tail = rows[7:]
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
else:
self.usecases(row, data, index)
elif CLIENT == 'C2':
reader = csv.reader(file)
firstline = next(reader)
secondline = next(reader)
else:
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
# Recuperation des donnees
else:
self.usecases(row, data, index)
The code below is repeated twice in the previous code, meaning there are some common instructions between these conditions "
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
else:
self.usecases(row, data, index)
Below should do the trick:
for myFile in myFiles:
file = open(filename, 'rt')
try:
if CLIENT == "C2":
reader = csv.reader(file)
firstline = next(reader)
secondline = next(reader)
else:
if CLIENT == "C1":
head = rows[:7]
tail = rows[7:]
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
else:
self.usecases(row, data, index)
I'm reading the data from one file named SPD_file. Matching the data with another file named Custom. And all the records which are matching in both the files will be written into the third file.
But it seems that something is wrong, because the code is matching the records and printing on console. But when I'm writing into another file nothing is coming into the new file, other than the header.
workbook = xlrd.open_workbook(SPD_file)
worksheets = workbook.sheet_names()
mapping_records = {}
for worksheet_name in worksheets:
worksheet = workbook.sheet_by_name(worksheet_name)
mapping_record = MappingRecord()
if worksheet_name == "CD":
for curr_row in range(0,worksheet.nrows):
mapping_record = worksheet.row(curr_row)
print worksheet_name
print mapping_record[0].value
for curr_row in mapping_record:
#print "In Loop...."
spd_record = MappingRecord()
spd_record.id = "00002269"
spd_record.erocode = None
spd_record.scno = None
mapping_records[mapping_record[8]] = spd_record
print "Read SPD File....."
custom_file_name = "Custom_" + today.strftime('%Y-%m-%d') + ".csv"
custom_file = ops_home + path + "\\" + custom_file_name
custom = open(custom_file, 'rb')
reader = csv.reader(custom, delimiter=',', quotechar='"')
for line in reader:
if mapping_records.has_key(mapping_record[8]):
spd_record = mapping_records[mapping_record[8]]
if line[7] == "ERO Code":
spd_record.erocode = line[8]
elif line[7] == "Service Number":
spd_record.scno = line[8]
#create a new file.
New_file = ops_home + '\\Reports\\SPD_new_' + today.strftime('%d%m%Y') + '.xlsx'
workbook = xlsxwriter.Workbook(New_file)
# Add a bold format to use to highlight cells.
bold = workbook.add_format({'bold': 1})
money = workbook.add_format({'num_format': '#,##0.00'})
worksheetCd = workbook.add_worksheet("CD")
cdHeader = ("Merchant ID", "EroCode", "Service Number")
cd_row = 0
cd_col = 0
for columnHeader in cdHeader:
worksheetCd.write(cd_row, cd_col, columnHeader,bold)
cd_col += 1
for ctx in mapping_records:
spd_record = mapping_records[ctx]
if spd_record.payment_mode == "CRD":
cd_row += 1
cd_col = 0
cdRow = (spd_record.id, spd_record.erocode, spd_record.scno)
for columnData in cdRow:
if cd_col == 5 or cd_col == 19 or cd_col ==20 or cd_col ==21:
worksheetCd.write_number(cd_row, cd_col, columnData, money)
else:
worksheetCd.write(cd_row, cd_col, columnData)
cd_col += 1
workbook.close()