Ordering by Time using cmp_to_key - python

I am trying to order my csvfile of food items by time whenever a new item is added. I have found a solution which I really like in which each time is compared against each other. My only issue is that I am unsure as to what variable/data structure I should place into the position marked with times.
What should happen is that the new row is added to the 2D list myRows, and then the contents of myRows is sorted based on time. The order should be earliest at the start, latest at the end.
myRows = []
with open("dataset.txt") as csvfile:
reader = csv.reader(csvfile)
for row in reader:
myRows.append(row)
newRow = [time, myType, desc, serving, kcal, sfat]
myRows.append(newRow)
myRows = sorted(times, key=cmp_to_key(compareTimes))
Compare Times Function
def compareTimes(timeStr1,timeStr2):
#Convert the time Strings passed into function into time objects
time1 = time.strptime(timeStr1, timeFormat)
time2 = time.strptime(timeStr2, timeFormat)
if time1 < time2:
return -1
elif time > time2:
return 1
else:
#If times are the same
return 0
Dataset.txt
22:30, Snack, Cereal, 200, 210,1.6
08:11, Breakfast, Cereal, 200, 210,1.6
08:20, Breakfast, Coffee, 200, 20,0.4
08:20, Breakfast, Pancake, 38, 74,1.4
10:30, Snack, Chocolate, 10, 56,2.5
I have tried myRows[0],myRows etc however this has not worked.

You have to use myRows as argument in sorted()
myRows = sorted(myRows ...)
But to sort strings like 22:30 with 08:11 you don't need function which convert strings to datetime because you can compare "22:30" < "08:11". So you can use
myRows = sorted(myRows, key=lambda x:x[0])
text = '''22:30, Snack, Cereal, 200, 210,1.6
08:11, Breakfast, Cereal, 200, 210,1.6
08:20, Breakfast, Coffee, 200, 20,0.4
08:20, Breakfast, Pancake, 38, 74,1.4
10:30, Snack, Chocolate, 10, 56,2.5'''
import csv
import io
myRows = []
#with open("dataset.txt") as csvfile:
with io.StringIO(text) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
myRows.append(row)
#newRow = [time, myType, desc, serving, kcal, sfat]
#myRows.append(newRow)
myRows = sorted(myRows, key=lambda x:x[0])
for row in myRows:
print(row)
Result
['08:11', ' Breakfast', ' Cereal', ' 200', ' 210', '1.6']
['08:20', ' Breakfast', ' Coffee', ' 200', ' 20', '0.4']
['08:20', ' Breakfast', ' Pancake', ' 38', ' 74', '1.4']
['10:30', ' Snack', ' Chocolate', ' 10', ' 56', '2.5']
['22:30', ' Snack', ' Cereal', ' 200', ' 210', '1.6']
EDIT: The same with pandas is much shorter
text = '''22:30, Snack, Cereal, 200, 210,1.6
08:11, Breakfast, Cereal, 200, 210,1.6
08:20, Breakfast, Coffee, 200, 20,0.4
08:20, Breakfast, Pancake, 38, 74,1.4
10:30, Snack, Chocolate, 10, 56,2.5'''
import pandas as pd
import io
#myRows = pd.read_csv("dataset.txt", sep=', ', names=['time', 'myType', 'desc', 'serving', 'kcal', 'sfat'])
myRows = pd.read_csv(io.StringIO(text), sep=', ', names=['time', 'myType', 'desc', 'serving', 'kcal', 'sfat'])
myRows = myRows.sort_values('time')
print(myRows)
EDIT: version with function
text = '''22:30, Snack, Cereal, 200, 210,1.6
08:11, Breakfast, Cereal, 200, 210,1.6
08:20, Breakfast, Coffee, 200, 20,0.4
08:20, Breakfast, Pancake, 38, 74,1.4
10:30, Snack, Chocolate, 10, 56,2.5'''
import csv
from functools import cmp_to_key
import time
import io
def compare_times(row1, row2): # 'lower_case_names' for functions
#time1 = time.strptime(row1[0], '%H:%M') # converting to time object
#time2 = time.strptime(row2[0], '%H:%M') # converting to time object
time1 = row1[0] # without converting to time object
time2 = row2[0] # without converting to time object
if time1 < time2:
return -1
elif time1 > time2:
return 1
else:
return 0
myRows = []
#with open("dataset.txt") as csvfile:
with io.StringIO(text) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
myRows.append(row)
#newRow = [time, myType, desc, serving, kcal, sfat]
#myRows.append(newRow)
myRows = sorted(myRows, key=cmp_to_key(compare_times))
for row in myRows:
print(row)

Related

How to distribute comma separated element to form a list in python

How to extract/split multi-line comment to make a new list
clientInfo="""James,Jose,664 New Avenue,New Orleans,Orleans,LA,8/27/200,123,jjose#gmail.com,;
Shenna,Laureles, 288 Livinghood Heights,Brighton,Livingston,MI,2/19/75,laureles9219#yahoo.com,;
"""
into this kind of list
f_name = ["james","sheena"]
l_name = ["jose","Laureles"]
strt = ["664 New Avenue","288 Livinghood Heights"]
cty = ["New Orleans","Brighton"]
state = ["New Orleans","Livingston"]
If the order is always same. You could do something like this;
f_name = []
l_name = []
strt = []
cty = []
state = []
for client in clientData.split(";\n "):
client_ = client.split(",")
f_name.append(client_[0])
l_name.append(client_[1])
strt.append(client_[2])
cty.append(client_[3])
state.append(client_[4])
I could add some exception handling to handle the ; at the end of your string but, leaving that to you.
You can use split and zip.
def extract(string):
lines = string.split(";")
split_lines = tuple(map(lambda line: line.split(","), lines))
no_space1 = tuple(map(lambda item: item.strip(), split_lines[0]))
no_space2 = tuple(map(lambda item: item.strip(), split_lines[1]))
return list(zip(no_space1, no_space2))
This will produce
[('James', 'Shenna'), ('Jose', 'Laureles'), ('664 New Avenue', '288 Livinghood Heights'), ('New Orleans', 'Brighton'), ('Orleans', 'Living
ston'), ('LA', 'MI'), ('8/27/200', '2/19/75'), ('123', 'laureles9219#yahoo.com'), ('jjose#gmail.com', '')]
It has some tuples at the end you didn't ask for, but its relatively good. The no_space 1 and 2 lines are a bit repetitive, but cramming them into one line is worse in my opinion.
You can try:
clientData = """James,Jose,664 New Avenue,New Orleans,Orleans,LA,8/27/200,123,jjose#gmail.com,;
Shenna,Laureles, 288 Livinghood Heights,Brighton,Livingston,MI,2/19/75,laureles9219#yahoo.com,;
"""
data = clientData.split(";\n")
f_name = []
l_name = []
strt = []
cty = []
state = []
for data_line in data:
data_line = data_line.strip()
if len(data_line) >= 5:
line_info = data_line.split(",")
f_name.append(line_info[0].strip())
l_name.append(line_info[1].strip())
strt.append(line_info[2].strip())
cty.append(line_info[3].strip())
state.append(line_info[4].strip())
print(f_name)
print(l_name)
print(strt)
print(cty)
print(state)
Output:
['James', 'Shenna']
['Jose', 'Laureles']
['664 New Avenue', '288 Livinghood Heights']
['New Orleans', 'Brighton']
['Orleans', 'Livingston']

Creating lists from the dictionary or just simply sort it

I have the following code:
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
for line in open(file_path,'r'):
split_line = line.split()
worker = 'worker{}'.format(k)
worker_name = '{}_{}'.format(worker, 'name')
worker_yob = '{}_{}'.format(worker, 'yob')
worker_job = '{}_{}'.format(worker, 'job')
worker_salary = '{}_{}'.format(worker, 'salary')
worker_dict[worker_name] = ' '.join(split_line[0:2])
worker_dict[worker_yob] = ' '.join(split_line[2:3])
worker_dict[worker_job] = ' '.join(split_line[3:4])
worker_dict[worker_salary] = ' '.join(split_line[4:5])
k += 1
else:
print('Error: Invalid file path')
File:
John Snow 1967 CEO 3400$
Adam Brown 1954 engineer 1200$
Output from worker_dict:
{
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
And I want to sort data by worker name and after that by salary. So my idea was to create a separate list with salaries and worker names to sort. But I have problems with filling it, maybe there is a more elegant way to solve my problem?
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
with open(file_path,'r') as file:
content=file.read().splitlines()
res=[]
for i in content:
val = i.split()
name = [" ".join([val[0],val[1]]),]#concatenate first name and last name
i=name+val[2:] #prepend name
res.append(i) #append modified value to new list
res.sort(key=lambda x: x[3])#sort by salary
print res
res.sort(key=lambda x: x[0])#sort by name
print res
Output:
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
d = {
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
from itertools import zip_longest
#re-group:
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
#re-order:
res = []
for group in list(grouper(d.values(), 4)):
reorder = [1,2,0,3]
res.append([ group[i] for i in reorder])
#sort:
res.sort(key=lambda x: (x[1], x[2]))
output:
[['Adam Brown', '1200$', 'engineer', '1954'],
['John Snow', '3400$', 'CEO', '1967']]
Grouper is defined and explained in itertools. I've grouped your dictionary by records pertaining to each worker, returned it as a reordered list of lists. As lists, I sort them by the name and salary. This is solution is modular: it distinctly groups, re-orders and sorts.
I recommend to store the workers in a different format, for example .csv, then you could use csv.DictReader and put it into a list of dictionaries (this would also allow you to use jobs, names, etc. with more words like "tomb raider").
Note that you have to convert the year of birth and salary to ints or floats to sort them correctly, otherwise they would get sorted lexicographically as in a real world dictionary (book) because they are strings, e.g.:
>>> sorted(['100', '11', '1001'])
['100', '1001', '11']
To sort the list of dicts you can use operator.itemgetter as the key argument of sorted, instead of a lambda function, and just pass the desired key to itemgetter.
The k variable is useless, because it's just the len of the list.
The .csv file:
"name","year of birth","job","salary"
John Snow,1967,CEO,3400$
Adam Brown,1954,engineer,1200$
Lara Croft,1984,tomb raider,5600$
The .py file:
import os
import csv
from operator import itemgetter
from pprint import pprint
file_path = input('Please, enter the path to the file: ')
if os.path.exists(file_path):
with open(file_path, 'r', newline='') as f:
worker_list = list(csv.DictReader(f))
for worker in worker_list:
worker['salary'] = int(worker['salary'].strip('$'))
worker['year of birth'] = int(worker['year of birth'])
pprint(worker_list)
pprint(sorted(worker_list, key=itemgetter('name')))
pprint(sorted(worker_list, key=itemgetter('salary')))
pprint(sorted(worker_list, key=itemgetter('year of birth')))
You still need some error handling, if a int conversion fails, or just let the program crash.

sort key in dict based on particular order

My code:
abc = {
'Name':emp_name,
'Id no':emp_idno,
'Leave taken':k,
'Leave reqd':b,
'Total days of leave':total,
'Reason':reason
}
I am getting output as:
id, name, total, reqason, leave taken, leave reqd
I want the output to be in this order:
Name, id, leave taken, leave reqd, total, reason
I am stuck up with this and it would be great if anyone can help me out.
my codes for csv
dl = {'Name':emp_name,'Id no':emp_idno, 'Leave taken':k, 'Leave
> reqd':b, 'Reason':reason}
>
>
> key_list = ['Name', 'Id no', 'Leave taken', 'Leave reqd', 'Reason'] abc = { 'Name':emp_name, 'Id no':emp_idno, 'Leave taken':k,
> 'Leave reqd':b, 'Reason':reason }
>
> for k in key_list:
> print abc[k] Lst.append(k)
>
> keys = Lst[0].keys() with open('employee.csv', 'wb') as output_file:
> dict_writer = csv.DictWriter(output_file, keys)
> dict_writer.writeheader()
> dict_writer.writerows(Lst)
Dictionaries are unordered by default.
You need an ordered Dicitionary.
see collections.OrderedDict
eg:
from collections import OrderedDict
d = OrderedDict()
d['Name'] = emp_name
d['Id no'] = emp_idno
d['Leave taken'] = k
print d
one simple hack can be storing the keys in a list.
key_list = ['Name', 'Id no', 'Leave taken', 'Leave reqd', 'Total days of leave', 'Reason']
abc = {
'Name':emp_name,
'Id no':emp_idno,
'Leave taken':k,
'Leave reqd':b,
'Total days of leave':total,
'Reason':reason
}
for k in key_list:
print abc[k]

Convert a csv into category-subcategory using array

Above is the input table i have in csv
I am trying to use array and while loops in python. I am new to this language. Loops should occur twice to give Category\sub-category\sub-category_1 order...I am trying to use split().Ouput should be like below
import csv
with open('D:\\test.csv', 'rb') as f:
reader = csv.reader(f, delimiter='',quotechar='|')
data = []
for name in reader:
data[name] = []
And if you read the lines of your csv and access the data then you can manipulate the way you want later.
cats = {}
with open('my.csv', "r") as ins:
# check each line of the fine
for line in ins:
# remove double quotes: replace('"', '')
# remove break line : rstrip()
a = str(line).replace('"', '').rstrip().split('|')
if a[0] != 'CatNo':
cats[int(a[0])] = a[1:];
for p in cats:
print 'cat_id: %d, value: %s' % (p, cats[p])
# you can access the value by the int ID
print cats[1001]
the output:
cat_id: 100, value: ['Best Sellers', 'Best Sellers']
cat_id: 1001, value: ['New this Month', 'New Products\\New this Month']
cat_id: 10, value: ['New Products', 'New Products']
cat_id: 1003, value: ['Previous Months', 'New Products\\Previous Months']
cat_id: 110, value: ['Promotional Material', 'Promotional Material']
cat_id: 120, value: ['Discounted Products & Special Offers', 'Discounted Products & Special Offers']
cat_id: 1002, value: ['Last Month', 'New Products\\Last Month']
['New this Month', 'New Products\\New this Month']
Updated script for your question:
categories = {}
def get_parent_category(cat_id):
if len(cat_id) <= 2:
return '';
else:
return cat_id[:-1]
with open('my.csv', "r") as ins:
for line in ins:
# remove double quotes: replace('"', '')
# remove break line : rstrip()
a = str(line).replace('"', '').rstrip().split('|')
cat_id = a[0]
if cat_id != 'CatNo':
categories[cat_id] = {
'parent': get_parent_category(cat_id),
'desc': a[1],
'long_desc': a[2]
};
print 'Categories relations:'
for p in categories:
parent = categories[p]['parent']
output = categories[p]['desc']
while parent != '':
output = categories[parent]['desc'] + ' \\ ' + output
parent = categories[parent]['parent']
print '\t', output
output:
Categories relations:
New Products
New Products \ Best Sellers
New Products \ Discounted Products & Special Offers
New Products \ Best Sellers \ Previous Months
New Products \ Best Sellers \ Last Month
New Products \ Best Sellers \ New this Month

Delete index in list if multiple strings are matched

I've scraped a website containing a table and I want to format the headers for my desired final out.
headers = []
for row in table.findAll('tr'):
for item in row.findAll('th'):
for link in item.findAll('a', text=True):
headers.append(link.contents[0])
print headers
Which returns:
[u'Rank ', u'University Name ', u'Entry Standards', u'Click here to read more', u'Student Satisfaction', u'Click here to read more', u'Research Quality', u'Click here to read more', u'Graduate Prospects', u'Click here to read more', u'Overall Score', u'Click here to read more', u'\r\n 2016\r\n ']
I don't want the "Click here to read more' or '2016' headers so I've done the following:
for idx, i in enumerate(headers):
if 'Click' in i:
del headers[idx]
for idx, i in enumerate(headers):
if '2016' in i:
del headers[idx]
Which returns:
[u'Rank ', u'University Name ', u'Entry Standards', u'Student Satisfaction', u'Research Quality', u'Graduate Prospects', u'Overall Score']
Perfect. But is there a better/neater way of removing the unwanted items? Thanks!
headers = filter(lambda h: not 'Click' in h and not '2016' in h, headers)
If you want to be more generic:
banned = ['Click', '2016']
headers = filter(lambda h: not any(b in h for b in banned), headers)
You can consider using list comprehension to get a new, filtered list, something like:
new_headers = [header for header in headers if '2016' not in header]
If you can be sure that '2016' will always be last:
>>> [x for x in headers[:-1] if 'Click here' not in x]
['Rank ', 'University Name ', 'Entry Standards', 'Student Satisfaction', 'Research Quality', 'Graduate Prospects', 'Overall Score']
pattern = '^Click|^2016'
new = [x for x in header if not re.match(pattern,str(x).strip())]

Categories