Convert a text file into a dictionary list - python

I have a text file in this format (in_file.txt):
banana 4500 9
banana 350 0
banana 550 8
orange 13000 6
How can I convert this into a dictionary list in Python?
Code:
in_filepath = 'in_file.txt'
def data_dict(in_filepath):
with open(in_filepath, 'r') as file:
for line in file.readlines():
title, price, count = line.split()
d = {}
d['title'] = title
d['price'] = int(price)
d['count'] = int(count)
return [d]
The terminal shows the following result:
{'title': 'orange', 'price': 13000, 'count': 6}
Correct output:
{'title': 'banana', 'price': 4500, 'count': 9}, {'title': 'banana', 'price': 350, 'count': 0} , ....
Can anyone help me with my problem? Thank you!

titles = ["title","price","count"]
[dict(zip(titles, [int(word) if word.isdigit() else word for word in line.strip().split()])) for line in open("in_file.txt").readlines()]
or:
titles = ["title","price","count"]
[dict(zip(titles, [(data:=line.strip().split())[0], *map(int, data[1:])])) for line in open("in_file.txt").readlines()]
your approach(corrected):
in_filepath = 'in_file.txt'
def data_dict(in_filepath):
res = []
with open(in_filepath, 'r') as file:
for line in file.readlines():
title, price, count = line.split()
d = {}
d['title'] = title
d['price'] = int(price)
d['count'] = int(count)
res.append(d)
return res
data_dict(in_filepath)
why? because
->
d = {}
d['title'] = title
d['price'] = int(price)
d['count'] = int(count)
is out of for loop and run only once and when ‍‍for be finished and then you have just one element
you return your last element and didn't use others and use must create a list and append every element at the last line of for loop (saving) and at last, return result
#Rockbar approach:
import pandas as pd
list(pd.read_csv("in_file.txt", sep=" ", header=None, names=["title","price","count"]).T.to_dict().values())

You can read the file line-by-line and then create dict base keys that define in the first.
keys = ['title', 'price' , 'count']
res = []
with open('in_file.txt', 'r') as file:
for line in file:
# Or in python >= 3.8
# while (line := file.readline().rstrip()):
tmp = [int(w) if w.isdigit() else w for w in line.rstrip().split() ]
res.append(dict(zip(keys, tmp)))
print(res)
[
{'title': 'banana', 'price': 4500, 'count': 9},
{'title': 'banana', 'price': 350, 'count': 0},
{'title': 'banana', 'price': 550, 'count': 8},
{'title': 'orange', 'price': 13000, 'count': 6}
]

You are trying to create a list of dictionaries (array of objects). So it would be best if you appended dictionary into a list each time you created it from a line of text.
Code
in_filepath = 'in_file.txt'
def data_dict(in_filepath):
dictionary = []
with open(in_filepath, 'r') as file:
for line in file:
title, price, count = line.split()
dictionary.append({'title': title, 'price': int(price), 'count': int(count)})
return dictionary
print(data_dict(in_filepath))
Output
[
{"title": "banana", "price": 4500, "count": 9},
{"title": "banana", "price": 350, "count": 0 },
{"title": "banana", "price": 550, "count": 8},
{"title": "orange", "price": 13000, "count": 6}
]

Related

How to make a one key for all values in dictonary python

I have a list:
List_ = ["Peter", "Peter", "Susan"]
I want to make a dictonary like this:
Dict_ = {"Name": "Peter", "Count": 2, "Name": "Susan", "Count": 1}
Dict_ = {}
Dict_new = {}
for text in List_:
if text not in Dict_:
Dict_[text] = 1
else:
Dict_[text] += 1
for key, values in Dict_.items():
Dict_new["Name"] = key
Dict_new["Count"] = values
print(Dict_new)
It is printing only last ones:
{"Name": "Susan", "Count": 1}
Here is the implementation that you can use according to what you would like :
from collections import Counter
# Your data
my_list = ["Peter", "Peter", "Susan"]
# Count the occurrences
counted = Counter(my_list)
# Your format
counted_list = []
for key, value in counted.items():
counted_list.append({"Name": key, "Count": value})
print(counted_list)
And output will be :
[{'Name': 'Peter', 'Count': 2}, {'Name': 'Susan', 'Count': 1}]
As noted in comments, a dictionary can only have each key once.
You may want a list of dictionaries, built with help from collections.Counter and a list comprehension.
>>> from collections import Counter
>>> List_ = ["Peter", "Peter", "Susan"]
>>> [{'name': k, 'count': v} for k, v in Counter(List_).items()]
[{'name': 'Peter', 'count': 2}, {'name': 'Susan', 'count': 1}]
In addition to using collections.Counter you could use a defaultdict.
>>> from collections import defaultdict
>>> d = defaultdict(int)
>>> for n in List_:
... d[n] += 1
...
>>> d
defaultdict(<class 'int'>, {'Peter': 2, 'Susan': 1})
>>> [{'name': k, 'count': v} for k, v in d.items()]
[{'name': 'Peter', 'count': 2}, {'name': 'Susan', 'count': 1}]
You can use the following code to achieve what you are trying to do.
List_ = ["Peter", "Peter", "Susan"]
dict_ = {}
for name in List_:
if name in dict_:
dict_[name] += 1
else:
dict_[name] = 1
print(dict_)
Generates the following output where key is the name and value is the count.
{'Peter': 2, 'Susan': 1}

Dealing with duplicates in lists of dictionaries

I uploaded a a csv file using DictReader so I essentially have a list of dictionaries. For example I have a called reader with the following:
[{'name': 'Jack', 'hits:' :7, 'misses:': 12, 'year': 10},
{'name': 'Lisa', 'hits': 5, 'misses': 3,' year': 8},
{'name': 'Jack', 'hits': 5, 'misses ':7, 'year': 9}]
I am using a loop to create lists like the following:
name = []
hits = []
for row in reader:
name.append(row["name"])
hits.append(row["hits"])
However I don't want duplicates in my list so where there is a duplicate name I am only interested in the names with the highest year. So basically I want to end up with the following
name = [Jack, Lisa]
hits = [7,5]
What is the best way to go about this
TRY:
reader = sorted(reader, key = lambda i: i['year'], reverse=True)
name = []
hits = []
for row in reader:
if row['name'] in name:
continue
name.append(row["name"])
hits.append(row["hits"])
Idea is to sort the list of dict based on year and then iterate over the list.
import pandas as pd
data = [{'name': 'Jack', 'hits' :7, 'misses': 12, 'year': 10},
{'name': 'Lisa', 'hits': 5, 'misses': 3,'year': 8},
{'name': 'Jack', 'hits': 5, 'misses':7, 'year': 9}]
df = pd.DataFrame(data).sort_values(by=['name','year'],ascending=False).groupby('name').first()
dict(zip(df.index,df['hits']))
In pure Python (no libraries):
people = {} # maps "name" -> "info"
for record in csv_reader:
# do we have someone with that name already?
old_record = people.get(record['name'], {})
# what's their year (defaulting to -1)
old_year = old_record.get('year', -1)
# if this record is more up to date
if record['year'] > old_year:
# replace the old record
people[record['name']] = record
# -- then, you can pull out your name and year lists
name = list(people.keys())
year = list(r['year'] for r in people.values())
If you want to learn Pandas
import pandas as pd
df = pd.read_csv('yourdata.csv')
df.groupby(['name']).max()
Solution without pandas:
lst = [
{"name": "Jack", "hits": 7, "misses:": 12, "year": 10},
{"name": "Lisa", "hits": 5, "misses": 3, " year": 8},
{"name": "Jack", "hits": 5, "misses ": 7, "year": 9},
]
out = {}
for d in lst:
out.setdefault(d["name"], []).append(d)
name = [*out]
hits = [max(i["hits"] for i in v) for v in out.values()]
print(name)
print(hits)
Prints:
['Jack', 'Lisa']
[7, 5]

From the list of dictionaries find the largest value lengths for each key

data = [{"id": "78ab45",
"name": "Jonh"},
{"id": "69cd234457",
"name": "Joe"}]
I want my function to return the largest value lengths for each key from all dictionaries:
expected_output = [
{ "size": 10, "name": "id" }, #because the length of the largest "id" value is 10
{ "size": 4, "name": "name" }, #because the length of the largest "name" value is 4
]
My code so far:
def my_func(data):
headers_and_sizes = []
for item in data:
for key, value in item.items():
headers_and_sizes.append({"size": f'{len(value)}', "name": key})
if int(headers_and_sizes[0]["size"]) < len(value):
headers_and_sizes[0]["size"] = len(value)
return headers_and_sizes
Gives me this:
[{'size': '6', 'name': 'id'}, {'size': '4', 'name': 'name'}, {'size': '10', 'name': 'id'}, {'size': '3', 'name': 'name'}]
How can I fix that so that it will return the values as in expected_output?
You'll want to be updating a dictionary that stores each key mapped to the maximum length seen for that key thus far.
data = [
{
"id": "78ab45",
"name": "Jonh",
},
{
"id": "69cd234457",
"name": "Joe",
},
]
key_to_max_len = {}
for datum in data:
for key, val in datum.items():
if key not in key_to_max_len or len(val) > key_to_max_len[key]:
key_to_max_len[key] = len(val)
key_size_arr = [{"size": val, "name": key} for key, val in key_to_max_len.items()]
you can get the max value for id and name like below code, and structure the output accordingly
>>> data
[{'id': '78ab45', 'name': 'Jonh'}, {'id': '69cd234457', 'name': 'Joe'}]
id = max(map(lambda x:len(x['id']), data))
name = max(map(lambda x:len(x['name']), data))
>>> id
10
>>> name
4
You can use list comprehension to form a tuple with ids and names:
names_ids = [(eachdict['id'],eachdict['name']) for eachdict in data]
Format the output to have the desired shape (dictionaries), find the max length (using the max() function, passing it the lengths of names and ids, using another list comprehension, inside max()):
expected_output = \
[{"size":max([len(each[0]) for each in names_ids]),"name":"id"},
{"size":max([len(each[1]) for each in names_ids]),"name":"name"}]
Output will be:
[{'name': 'id', 'size': 10}, {'name': 'name', 'size': 4}]
Using the following:
keys = list(data[0].keys())
output = {key:-1 for key in keys}
for d in data:
for k in d.keys():
if len(d[k]) > output[k]:
output[k] = len(d[k])
Will output:
{'id': 10, 'name': 4}
I think the easiest method here is pandas...
import pandas as pd
df = pd.DataFrame(data)
out = [{'size': df['id'].str.len().max(), 'name':'id'},
{'size': df['name'].str.len().max(), 'name':'name'}]
output:
[{'size': 10, 'name': 'id'}, {'size': 4, 'name': 'name'}]
or for addt'l names..
[{'size':df[col].str.len().max(), 'name':col} for col in df.columns]
Here is how you can use a nested dictionary comprehension:
data = [{"id": "78ab45",
"name": "Jonh"},
{"id": "69cd234457",
"name": "Joe"}]
expected_output = [{'size': len(max([i[k] for i in data], key=len)),
'name': k} for k in data[0]]
print(expected_output)
Output:
[{'size': 10, 'name': 'id'},
{'size': 4, 'name': 'name'}]

Comparing two dictionaries for specific value within list of dict

I have two list of dictionaries. I am looping through them and looking for matching id. If the id in the src_dict matches to destination I need to call an update method else an insert method. When I am using the below code I am getting unintended result.
This is the outcome I want. While updating i need to preserve rec_id from dest dict with corresponding values from src dict. insert is pretty much just the src dict elements thats not in dest dict. Appreciate any help!
records_update = [{'rec_id': 'abc', 'fields': {'id': 111, 'name': 'sam'}}, {'rec_id': 'xyz', 'fields': {'id': 333, 'name': 'name_changed_to_not_ross'}}]
#. the rec_id is from dest_dict while rest of the field should come from src_dict since these values could change that needs to be update
records_insert = [{"id": 444, "name": "jack"}]
src_dict = [{"id": 111, "name": "sam"}, {"id": 333, "name": "name_changed_to_not_ross"}, {"id": 444, "name": "jack"}]
dest_dict = [{"rec_id":"abc","fields":{"id":111,"name":"sam"}},
{"rec_id":"pqr","fields":{"id":222,"name":"john"}},
{"rec_id":"xyz","fields":{"id":333,"name":"ross"}}]
records_update = []
records_insert = []
for rec_src in src_dict:
for rec_dest in dest_dict:
if rec_src['id'] == rec_dest['fields']['id']:
print('match and add this element to update list')
records_update.append(rec_src)
else:
print('no match add this element to insert list')
records_insert.append(rec_src)```
You can create a dict indexed by IDs from dest_dict for efficient lookups, and then use list comprehensions to filter src_dict for respective records:
dest = {d['fields']['id']: d for d in dest_dict}
records_update = [dest[d['id']] for d in src_dict if d['id'] in dest]
records_insert = [d for d in src_dict if d['id'] not in dest]
You need to do an append to insert list only when no append is done to update list.
records_update = []
records_insert = []
for rec_src in src_dict:
flag = 0
for rec_dest in dest_dict:
if rec_src['id'] == rec_dest['fields']['id']:
print('match and add this element to update list')
records_update.append(rec_dest)
flag = 1
if flag != 1:
print('no match add this element to insert list')
records_insert.append(rec_src)
print(records_update)
# [{'rec_id': 'abc', 'fields': {'id': 111, 'name': 'sam'}}, {'rec_id': 'xyz', 'fields': {'id': 333, 'name': 'ross'}}]
print(records_insert)
# [{'id': 444, 'name': 'jack'}]
I am able to achieve the desired result using this:
records_update = []
records_insert = []
for rec_src in src_dict:
flag = 0
for rec_dest in dest_dict:
if rec_src['id'] == rec_dest['fields']['id']:
new_dict = {}
print('match and add this element to update list')
new_dict['rec_id'] = rec_dest['rec_id']
new_dict['fields'] = rec_src
records_update.append(new_dict)
flag = 1
if flag != 1:
print('no match add this element to insert list')
records_insert.append(rec_src)
records_update
[{'rec_id': 'abc', 'fields': {'id': 111, 'name': 'sam'}},
{'rec_id': 'xyz', 'fields': {'id': 333, 'name': 'name_changed_to_not_ross'}}]
records_insert
[{'id': 444, 'name': 'jack'}]
Just curious how I can achieve the result using #blhsing approach since that seems to be more efficient approach
ids = {d['fields']['id'] for d in dest_dict}
records_update = [dest[d['id']] for d in src_dict if d['id'] in dest]
records_insert = [d for d in src_dict if d['id'] not in dest]

how do I build a dictionary data inside a list?

I have this program:
def file(fname):
lines = open(fname).read().splitlines()
return(lines)
print(file('venue.txt'))
And it came out like this which I change into list:
['room 1, 10, 250']
How do I build a dictionary data with it, so that it can be like this:
[{'name': 'room 1', 'max': 10, 'cost': 250}]
Some clue maybe for me to build it.
Thanks
Edited:
def file(fname):
lines = open(fname).read().splitlines()
new = []
for i in lines:
split = i.split(', ')
new.append({'name':split[0],'max':split[1],'cost':split[2]})
return(new)
print(file('venue.txt'))
It prints:
new.append({'name':split[0],'max':split[1],'cost':split[2]})
IndexError: list index out of range
What does it mean?
You can try this:
import re
def file(fname):
lines = open(fname).read().splitlines()
return(lines)
headers = ["name", "max", "cost"]
data1 = [re.split(",\s+", i) for i in file("venue.txt")]
final_data = [{a:b for a, b in zip(headers, data} for data in data1]
print(final_data)
If they are separated by ', ' you can use the split() on ', '.
Will return an array with the separated items.
For your example:
current_list = ['room 1, 10, 250']
split = current_list[0].split(', ')
new_list = [{'name': split[0], 'max': int(split[1]), 'cost': int(split[2])}]
print(new_list)
output:
[{'name': 'room 1', 'max': 10, 'cost': 250}]
For a larger list:
current_list = ['room 1, 10, 250', 'room 2, 30, 500','room 3, 50, 850']
new_list = []
for i in current_list:
split = i.split(', ')
new_list.append({'name': split[0], 'max': int(split[1]), 'cost': int(split[2])})
print(new_list)
output:
[{'name': 'room 1', 'max': 10, 'cost': 250}, {'name': 'room 2', 'max': 30, 'cost': 500}, {'name': 'room 3', 'max': 50, 'cost': 850}]

Categories