Sort a specific column from a text file - python

I am trying to sort a column from a .txt file. I am going do a point-register system where I am going to save the name of the player and his three laps.
I am saving my values in the text-file like so:
1. name;lap_1;lap_2;lap_3;
2. name;lap_1;lap_2;lap_3;
3. name;lap_1;lap_2;lap_3;
In my code I write them to the file like so:
for result in results:
my_file.write("{}:{}:{}:{}:{}:{};\n".format(result["name"],
result["lap1"],
result["lap2"],
result["lap3"],
result["total"],
result["average"]))
How do I sort each column, for example "name"? And how do I print it out?

first of all, as #sgrg suggested, use CSV-file format, e. g. we can write simply with
import csv
def write_results(results, fields_names):
# or use mode="a" if you want to append
with open("my_file.csv", mode="w", newline="") as my_file:
csv_writer = csv.DictWriter(my_file, fieldnames=fields_names, delimiter=";")
# remember: you don"t need to add headers in "append" mode
csv_writer.writeheader()
for result in results:
csv_writer.writerow(result)
then read with
def read_results(fields_names):
with open("my_file.csv", mode="r") as my_file:
# ignoring headers
next(my_file)
csv_reader = csv.DictReader(my_file, fieldnames=fields_names, delimiter=";")
return list(csv_reader)
sorting of results by name can be done with
sorted_results = sorted(results, key=lambda result: result["name"])
usage
fields_names = ["name", "lap1", "lap2", "lap3", "total", "avarage"]
results_tuples = [("Luke", "lap1_value", "lap2_value", "lap3_value", 100, 96.3),
("Stephen", "lap1_value", "lap2_value", "lap3_value", 100, 96.3),
("Adrian", "lap1_value", "lap2_value", "lap3_value", 100, 96.3)]
results = [dict(zip(fields_names, result_tuple)) for result_tuple in results_tuples]
write_results(results,
fields_names=fields_names)
results = read_results(fields_names)
sorted_results = sorted(results, key=lambda result: result["name"])
in given example results is a list object which looks like
[{'avarage': 96.3,
'lap1': 'lap1_value',
'lap2': 'lap2_value',
'lap3': 'lap3_value',
'name': 'Luke',
'total': 100},
{'avarage': 96.3,
'lap1': 'lap1_value',
'lap2': 'lap2_value',
'lap3': 'lap3_value',
'name': 'Stephen',
'total': 100},
{'avarage': 96.3,
'lap1': 'lap1_value',
'lap2': 'lap2_value',
'lap3': 'lap3_value',
'name': 'Adrian',
'total': 100}]
and sorted_results is a list object which looks like
[OrderedDict([('name', 'Adrian'),
('lap1', 'lap1_value'),
('lap2', 'lap2_value'),
('lap3', 'lap3_value'),
('total', '100'),
('avarage', '96.3')]),
OrderedDict([('name', 'Luke'),
('lap1', 'lap1_value'),
('lap2', 'lap2_value'),
('lap3', 'lap3_value'),
('total', '100'),
('avarage', '96.3')]),
OrderedDict([('name', 'Stephen'),
('lap1', 'lap1_value'),
('lap2', 'lap2_value'),
('lap3', 'lap3_value'),
('total', '100'),
('avarage', '96.3')])]
More info about csv module at docs
More info about OrderedDict at docs

Related

how to save data as csv file [duplicate]

This question already has answers here:
Save results to csv file with Python
(7 answers)
Closed 1 year ago.
i am tring to save imu and gnss data to a csv file from carla simulator , i can read the datas from the terminal but can not save them as csv file it gives an error AttributeError: 'list' object has no attribute 'to_csv ', i created an empty list then tried to save then convert to csv file
actor_list = []
try:
# Add GNSS sensor to ego vehicle.
# --------------
gnss_bp = world.get_blueprint_library().find('sensor.other.gnss')
gnss_location = carla.Location(0,0,0)
gnss_rotation = carla.Rotation(0,0,0)
gnss_transform = carla.Transform(gnss_location,gnss_rotation)
gnss_bp.set_attribute("sensor_tick",str(3.0))
ego_gnss = world.spawn_actor(gnss_bp,gnss_transform,attach_to=ego_vehicle, attachment_type=carla.AttachmentType.Rigid)
def gnss_callback(gnss):
print("GNSS measure:\n"+str(gnss)+'\n')
ego_gnss.listen(lambda gnss: gnss_callback(gnss))
actor_list.append(ego_gnss)
# --------------
# Add IMU sensor to ego vehicle.
# --------------
imu_bp = world.get_blueprint_library().find('sensor.other.imu')
imu_location = carla.Location(0,0,0)
imu_rotation = carla.Rotation(0,0,0)
imu_transform = carla.Transform(imu_location,imu_rotation)
imu_bp.set_attribute("sensor_tick",str(3.0))
ego_imu = world.spawn_actor(imu_bp,imu_transform,attach_to=ego_vehicle, attachment_type=carla.AttachmentType.Rigid)
def imu_callback(imu):
print("IMU measure:\n"+str(imu)+'\n')
ego_imu.listen(lambda imu: imu_callback(imu))
actor_list.append(ego_imu)
#add this sensor to our actors.
time.sleep(10)
# sleep for 5 seconds, then finish:
data =[]
low_data = { "accelX":ego_imu.accelerometer[0],
"accelY":ego_imu.accelerometer[1],
"accelZ":ego_imu.accelerometer[2],
"gyroX":ego_imu.gyroscope[0],
"gyroY":ego_imu.gyroscope[1],
"gyroZ":ego_imu.gyroscope[2],
"Lat": ego.gnss[0],
"Lon" : ego.gnss[1]}
data = data.append(low_data, ignore_index=True)
print(low_data)
finally:
data.to_csv("out_{}.csv".format(args.name))
print("CSV SAVED FOR DRIVER named {}".format(args.name))
Use the csv module as found https://www.geeksforgeeks.org/python-save-list-to-csv/
You once you have the list formed you want to:
fields = ['Name', 'Branch', 'Year', 'CGPA']
# data rows of csv file
rows = [ ['Nikhil', 'COE', '2', '9.0'],
['Sanchit', 'COE', '2', '9.1'],
['Aditya', 'IT', '2', '9.3'],
['Sagar', 'SE', '1', '9.5'],
['Prateek', 'MCE', '3', '7.8'],
['Sahil', 'EP', '2', '9.1']]
with open('file_name.csv', 'w') as f:
# using csv.writer method from CSV package
write = csv.writer(f)
write.writerow(fields)
write.writerows(rows)
Essentially you are creating the file and then manually writing it out with each item in the list.
Another way you could do it is if you have the information saved in a dictionary is use:
def to_csv(self):
'''
Save data to csv file
'''
keys = self.data[0].keys()
with open("sample.csv", "w", newline='') as csv_output:
dict_writer = csv.DictWriter(csv_output, keys)
dict_writer.writeheader()
dict_writer.writerows(self.data)

Compare list items with a list of pairs, and output matching pairs

I have lists that are formatted like so:
order_ids = ['Order ID', '026-2529662-9119536', '026-4092572-3574764', '026-4267878-0816332', '026-5334006-4073138', '026-5750353-4848328', '026-5945233-4883500', '026-5966822-8160331', '026-8799392-8255522', '202-5076008-9615516', '202-5211901-8584318', '202-5788153-3773918', '202-6208325-9677946', '203-1024454-3409960', '203-1064201-9833131', '203-4104559-7038752', '203-5013053-9959554', '203-5768187-0573905', '203-8639245-4145958', '203-9473169-4807564', '204-1577436-4733125', '204-7025768-1965915', '204-9196762-0226720', '205-6427246-2264368', '205-9028779-8764322', '206-0703454-9777135', '206-0954144-1685131', '206-3381432-7615531', '206-3822931-6939555', '206-4658913-5563533', '206-5213573-9997926', '206-5882801-0583557', '206-7158700-9326744', '206-7668862-3913143', '206-8019246-1474732', '206-8541775-0545153']
one = [['Order ID', 'Amount'], ['026-2529662-9119536', '10.42'], ['026-4092572-3574764', '10.42'], ['026-4267878-0816332', '1.75'], ['026-5334006-4073138', '17.990000000000002'], ['026-5750353-4848328', '16.25'], ['026-5945233-4883500', '1.83'], ['026-5966822-8160331', '11.92'], ['026-8799392-8255522', '8.5'], ['202-5076008-9615516', '1.83'], ['202-5211901-8584318', '1.83'], ['202-5788153-3773918', '8.08'], ['202-6208325-9677946', '11.33'], ['203-1024454-3409960', '8.08'], ['203-1064201-9833131', '1.5'], ['203-4104559-7038752', '8.5'], ['203-5013053-9959554', '9.67'], ['203-5113131-7525963', '-8.5'], ['203-5768187-0573905', '3.66'], ['203-8639245-4145958', '5.08'], ['203-9473169-4807564', '3.66'], ['204-1577436-4733125', '1.83'], ['204-7025768-1965915', '1.83'], ['204-9196762-0226720', '11.33'], ['205-8348990-1889964', '-11.33'], ['205-9028779-8764322', '6.91'], ['206-0703454-9777135', '23.84'], ['206-0954144-1685131', '22.66'], ['206-3381432-7615531', '8.08'], ['206-3822931-6939555', '11.92'], ['206-4658913-5563533', '9.67'], ['206-5213573-9997926', '3.66'], ['206-5882801-0583557', '13.92'], ['206-7158700-9326744', '27.5'], ['206-7668862-3913143', '6.58'], ['206-8541775-0545153', '1.83']]
What I want to do is cycle through every item inside order_ids, and if the order_id is present in one - get the "value"
So far what I have tried is:
with open('test.csv', mode='w', newline='') as outfile:
writer = csv.writer(outfile)
i = 0
while i < len(order_ids):
for order in order_ids:
try:
if order == one[i][0]:
value_a = one[i][1]
print(order, value_a)
writer.writerow([order, value_a])
i += 1
else:
i += 1
pass
except IndexError:
i += 1
This is working somewhat - but there are 36 items inside "order_ids" and 36 lists inside "one", however only 18 rows are being wrote to my outfile.
An example of one order_id that isn't being wrote is "206-7668862-3913143", even though this clearly has a value of "6.58" inside "one"
What is stopping the rest of my rows being written?
You can do this simply with a dictionary. The dict() constructor will accept a nested list of pairs and create a dictionary mapping order_id to amount. Then we can just loop over the order_ids list, and write out any order_id that appears to test.csv.
Code:
import csv
d = dict(one)
with open('test.csv', mode='w', newline='') as outfile:
writer = csv.writer(outfile)
for order_id in order_ids:
if order_id in d:
writer.writerow([order_id, d[order_id]])
test.csv:
Order ID,Amount
026-2529662-9119536,10.42
026-4092572-3574764,10.42
026-4267878-0816332,1.75
026-5334006-4073138,17.990000000000002
026-5750353-4848328,16.25
026-5945233-4883500,1.83
026-5966822-8160331,11.92
026-8799392-8255522,8.5
202-5076008-9615516,1.83
202-5211901-8584318,1.83
202-5788153-3773918,8.08
202-6208325-9677946,11.33
203-1024454-3409960,8.08
203-1064201-9833131,1.5
203-4104559-7038752,8.5
203-5013053-9959554,9.67
203-5768187-0573905,3.66
203-8639245-4145958,5.08
203-9473169-4807564,3.66
204-1577436-4733125,1.83
204-7025768-1965915,1.83
204-9196762-0226720,11.33
205-9028779-8764322,6.91
206-0703454-9777135,23.84
206-0954144-1685131,22.66
206-3381432-7615531,8.08
206-3822931-6939555,11.92
206-4658913-5563533,9.67
206-5213573-9997926,3.66
206-5882801-0583557,13.92
206-7158700-9326744,27.5
206-7668862-3913143,6.58
206-8541775-0545153,1.83

How to flag if a dictionary key is absent

I have files with CommonChar is some of them and my python code works on them to build a dictionary. While building there are some required keys which users might forget to put in. The code should be able to flag the file and the key which is missing.
The syntax for python code to work on is like this:
CommonChar pins Category General
CommonChar pins Contact Mark
CommonChar pins Description 1st line
CommonChar pins Description 2nd line
CommonChar nails Category specific
CommonChar nails Description 1st line
So for above example "Contact" is missing:
CommonChar nails Contact Robert
I have a list for ex: mustNeededKeys=["Category", "Description", "Contact"]
mainDict={}
for dirName, subdirList, fileList in os.walk(sys.argv[1]):
for eachFile in fileList:
#excluding file names ending in .swp , swo which are creatied temporarily when editing in vim
if not eachFile.endswith(('.swp','.swo','~')):
#print eachFile
filePath= os.path.join(dirName,eachFile)
#print filePath
with open(filePath, "r") as fh:
contents=fh.read()
items=re.findall("CommonChar.*$",contents,re.MULTILINE)
for x in items:
cc, group, topic, data = x.split(None, 3)
data = data.split()
group_dict = mainDict.setdefault(group, {'fileLocation': [filePath]})
if topic in group_dict:
group_dict[topic].extend(['</br>'] + data)
else:
group_dict[topic] = data
This above code does its job of building a dict like this:
{'pins': {'Category': ['General'], 'Contact': ['Mark'], 'Description': ['1st', 'line', '2nd', 'line'] } , 'nails':{'Category':['specific'], 'Description':['1st line']}
So when reading each file with CommonChar and building a group_dict , a way to check all the keys and compare it with mustNeededKeys and flag if not there and proceed if met.
Something like this should work:
# Setup mainDict (equivalent to code given above)
mainDict = {
'nails': {
'Category': ['specific'],
'Description': ['1st', 'line'],
'fileLocation': ['/some/path/nails.txt']
},
'pins': {
'Category': ['General'],
'Contact': ['Mark'],
'Description': ['1st', 'line', '</br>', '2nd', 'line'],
'fileLocation': ['/some/path/pins.txt']
}
}
# check for missing keys
mustNeededKeys = {"Category", "Description", "Contact"}
for group, group_dict in mainDict.items():
missing_keys = mustNeededKeys - set(group_dict.keys())
if missing_keys:
missing_key_list = ','.join(missing_keys)
print(
'group "{}" ({}) is missing key(s): {}'
.format(group, group_dict['fileLocation'][0], missing_key_list)
)
# group "nails" (/some/path/nails.txt) is missing key(s): Contact
If you must check for missing keys immediately after processing each group, you could use the code below. This assumes that each group is stored as a contiguous collection of rows in a single file (i.e., not mixed with other groups in the same file or spread across different files).
from itertools import groupby
mainDict={}
mustNeededKeys = {"Category", "Description", "Contact"}
for dirName, subdirList, fileList in os.walk(sys.argv[1]):
for eachFile in fileList:
# excluding file names ending in .swp , swo which are created
# temporarily when editing in vim
if not eachFile.endswith(('.swp','.swo','~')):
#print eachFile
filePath = os.path.join(dirName,eachFile)
#print filePath
with open(filePath, "r") as fh:
contents = fh.read()
items = re.findall("CommonChar.*$", contents, re.MULTILINE)
split_items = [line.split(None, 3) for line in items]
# group the items by group name (element 1 in each row)
for g, group_items in groupby(split_items, lambda row: row[1]):
group_dict = {'fileLocation': [filePath]}
# store all items in the current group
for cc, group, topic, data in group_items:
data = data.split()
if topic in group_dict:
group_dict[topic].extend(['</br>'] + data)
else:
group_dict[topic] = data
# check for missing keys
missing_keys = mustNeededKeys - set(group_dict.keys())
if missing_keys:
missing_key_list = ','.join(missing_keys)
print(
'group "{}" ({}) is missing key(s): {}'
.format(group, filePath, missing_key_list)
)
# add group to mainDict
mainDict[group] = group_dict
data = '''CommonChar pins Category General
CommonChar pins Contact Mark
CommonChar pins Description 1st line
CommonChar pins Description 2nd line
CommonChar nails Category specific
CommonChar nails Description 1st line'''
from collections import defaultdict
from pprint import pprint
required_keys = ["Category", "Description", "Contact"]
d = defaultdict(dict)
for line in data.splitlines():
line = line.split()
if line[2] == 'Description':
if line[2] not in d[line[1]]:
d[line[1]][line[2]] = []
d[line[1]][line[2]].extend(line[3:])
else:
d[line[1]][line[2]] = [line[3]]
pprint(dict(d))
print('*' * 80)
# find missing keys
for k in d.keys():
for missing_key in set(d[k].keys()) ^ set(required_keys):
print('Key "{}" is missing "{}"!'.format(k, missing_key))
Prints:
{'nails': {'Category': ['specific'], 'Description': ['1st', 'line']},
'pins': {'Category': ['General'],
'Contact': ['Mark'],
'Description': ['1st', 'line', '2nd', 'line']}}
********************************************************************************
Key "nails" is missing "Contact"!

Nested dictionary keeps overwriting data

I am trying to read in from a data file that has lines like:
2007 ANDREA 30 31.40 -71.90 05/13/18Z 25 1007 LOW
2007 ANDREA 31 31.80 -69.40 05/14/00Z 25 1007 LOW
I am trying to create a nested dictionary that has a key holding the year and then the nested dictionary will hold the name and a tuple containing statistics. I would like the return value to look like this:
{'2007': {'ANDREA': [(31.4, -71.9, '05/13/18Z', 25.0, 1007.0), (31.8, -69.4, '05/14/00Z', 25.0, 1007.0)]
However when I run the code it returns only one set of statistics. It seems to be overwriting itself because I am getting that last line of statistics in the txt file returned:
{'2007': {'ANDREA': [(31.8, -69.4, '05/14/00Z', 25.0, 1007.0)]
Here is the code:
def create_dictionary(fp):
'''Remember to put a docstring here'''
dict1 = {}
f = []
for line in fp:
a = line.split()
f.append(a)
for item in f:
a = (float(item[3]), float(item[4]), item[5], float(item[6]),
float(item[7]))
dict1 = update_dictionary(dict1, item[0], item[1], a))
print(dict1)
def update_dictionary(dictionary, year, hurricane_name, data):
if year not in dictionary:
dictionary[year] = {}
if hurricane_name not in dictionary:
dictionary[year][hurricane_name] = [data]
else:
dictionary[year][hurricane_name].append(data)
else:
if hurricane_name not in dictionary:
dictionary[year][hurricane_name] = [data]
else:
dictionary[year][hurricane_name].append(data)
return dictionary
These lines:
if hurricane_name not in dictionary:
...should be:
if hurricane_name not in dictionary[year]:
Since I was a little late here's a suggestion instead of an answer to your original question. You can simplify the logic a bit because when the year doesn't exist then the name also can't exist for that year. Everything can be put in a single function and using a "with" statement to open the file will ensure it is properly closed even if your program encounters an error.
def build_dict(file_path):
result = {}
with open(file_path, 'r') as f:
for line in f:
items = line.split()
year, name, data = items[0], items[1], tuple(items[2:])
if year in result:
if name in result[year]:
result[year][name].append(data)
else:
result[year][name] = [data]
else:
result[year] = {name: [data]}
return result
print(build_dict(file_path))
Output:
{'2007': {'ANDREA': [('30', '31.40', '-71.90', '05/13/18Z', '25', '1007', 'LOW'), ('31', '31.80', '-69.40', '05/14/00Z', '25', '1007', 'LOW')]}}

Creating lists from the dictionary or just simply sort it

I have the following code:
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
for line in open(file_path,'r'):
split_line = line.split()
worker = 'worker{}'.format(k)
worker_name = '{}_{}'.format(worker, 'name')
worker_yob = '{}_{}'.format(worker, 'yob')
worker_job = '{}_{}'.format(worker, 'job')
worker_salary = '{}_{}'.format(worker, 'salary')
worker_dict[worker_name] = ' '.join(split_line[0:2])
worker_dict[worker_yob] = ' '.join(split_line[2:3])
worker_dict[worker_job] = ' '.join(split_line[3:4])
worker_dict[worker_salary] = ' '.join(split_line[4:5])
k += 1
else:
print('Error: Invalid file path')
File:
John Snow 1967 CEO 3400$
Adam Brown 1954 engineer 1200$
Output from worker_dict:
{
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
And I want to sort data by worker name and after that by salary. So my idea was to create a separate list with salaries and worker names to sort. But I have problems with filling it, maybe there is a more elegant way to solve my problem?
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
with open(file_path,'r') as file:
content=file.read().splitlines()
res=[]
for i in content:
val = i.split()
name = [" ".join([val[0],val[1]]),]#concatenate first name and last name
i=name+val[2:] #prepend name
res.append(i) #append modified value to new list
res.sort(key=lambda x: x[3])#sort by salary
print res
res.sort(key=lambda x: x[0])#sort by name
print res
Output:
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
d = {
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
from itertools import zip_longest
#re-group:
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
#re-order:
res = []
for group in list(grouper(d.values(), 4)):
reorder = [1,2,0,3]
res.append([ group[i] for i in reorder])
#sort:
res.sort(key=lambda x: (x[1], x[2]))
output:
[['Adam Brown', '1200$', 'engineer', '1954'],
['John Snow', '3400$', 'CEO', '1967']]
Grouper is defined and explained in itertools. I've grouped your dictionary by records pertaining to each worker, returned it as a reordered list of lists. As lists, I sort them by the name and salary. This is solution is modular: it distinctly groups, re-orders and sorts.
I recommend to store the workers in a different format, for example .csv, then you could use csv.DictReader and put it into a list of dictionaries (this would also allow you to use jobs, names, etc. with more words like "tomb raider").
Note that you have to convert the year of birth and salary to ints or floats to sort them correctly, otherwise they would get sorted lexicographically as in a real world dictionary (book) because they are strings, e.g.:
>>> sorted(['100', '11', '1001'])
['100', '1001', '11']
To sort the list of dicts you can use operator.itemgetter as the key argument of sorted, instead of a lambda function, and just pass the desired key to itemgetter.
The k variable is useless, because it's just the len of the list.
The .csv file:
"name","year of birth","job","salary"
John Snow,1967,CEO,3400$
Adam Brown,1954,engineer,1200$
Lara Croft,1984,tomb raider,5600$
The .py file:
import os
import csv
from operator import itemgetter
from pprint import pprint
file_path = input('Please, enter the path to the file: ')
if os.path.exists(file_path):
with open(file_path, 'r', newline='') as f:
worker_list = list(csv.DictReader(f))
for worker in worker_list:
worker['salary'] = int(worker['salary'].strip('$'))
worker['year of birth'] = int(worker['year of birth'])
pprint(worker_list)
pprint(sorted(worker_list, key=itemgetter('name')))
pprint(sorted(worker_list, key=itemgetter('salary')))
pprint(sorted(worker_list, key=itemgetter('year of birth')))
You still need some error handling, if a int conversion fails, or just let the program crash.

Categories