Text File to Dictionary with multiple columns - python

I have a text file with the following content in it
last name, first name, email, some arbitrary id number, and a phone number.
Hill, Jonah, jonahhill#outlook.com, 015666, 123-456-7890
Reynolds, Ryan, rrdp#yahoo.com, 1081254, 789-456-1230
Baccarin,Morena, bmdp#yahoo.com, 1011340, 159-753-4561
...
I want to make a dictionary for each row, but have keys to name such as last name, firstname etc.
here's the code I'm trying
d = {}
with open("oldFile.txt") as f:
d = dict(x.rstrip().split(None, 1) for x in f)
print d
I get something like this with all the content in the file in one whole dictionary
{'"hil"': '"jonah" "jonahhill#outlook" "015666" "123-456-7890"'...}
The results I'm looking for is
First person:
{lastname: "hill" , firstname: "Jonah", email: "jonahhill#outlook.com...}
Second person:
{Reynolds, Ryan, rrdp#yahoo.com, 1081254, 789-456-1230}
Third person:
...
I want keys to print them out individual such as
in file1 print out first person and I get
First person:
{lastname: "hill" , firstname: "Jonah", email: "jonahhill#outlook.com...}

You need zip.
keys = ['lastname', 'firstname', 'email', 'id', 'phone']
dicts = []
with open("oldFile.txt") as f:
for line in f:
# Split each line.
line = line.strip().split()
# Create dict for each row.
d = dict(zip(keys, line))
# Print the row dict
print d
# Store for future use
dicts.append(d)
The dictionaries for each row are available in the list dicts.

Something like this should get the job done:
keys = ['lastname', 'firstname', 'email', 'id', 'phone']
file = open('oldFile.txt', 'r')
results = []
while True:
line = file.readline()
if not line:
break
else:
content = line.split(', ')
dict = {}
index = 0
for value in content:
if value != '\n':
pair = {keys[index]: value}
dict.update(pair)
index += 1
if dict != {}: # Prevent empty lines from appending to results
results.append(dict)
for dict in results:
print dict

Related

Converting text file to dictionary then to JSON object using Python - But I need to split data from the text file

I am trying to convert txt file data to a Python dictionary before dumping it to JSON and then writing JSON data into avro. My text file contains unwanted data that I need to remove, I only need field names and field types:
mappings = ['name', 'type']
with open('xxx.txt', 'r') as fn:
dict1 = {}
names = []
types = []
for line in file.readlines():
names_lines = line.split()[0] # index 0 = names
names.append(names_lines)
types_lines = line.split()[1] # index 1 = types
types.append(types_lines)
id = 1
for d in fn:
desc = list(d.strip().split(' ', 1))
name = desc[0]
i = 0
dict2 = {}
while i < len(mappings):
dict2[mappings[i]] = desc[i]
i = i + 1
dict1[name] = dict2
id = id + 1
print(dict1)
1- My Error is:
name 'file' is not defined. Did you mean: 'filter'?***
replace file with fn:
for line in fn.readlines():
From what you described you wanted in the comments, try this:
records = []
with open('xxx.txt', 'r') as f:
for line in f:
name, type_, *_ = line.split()
records.append({"name": name, "type": type_})
print(records)

how to take the content of a file in nested dictionary in python?

I am trying to read a txt file and get the content in to a nested dictionary. Content of the file is as below.
Name: John Doe
Email: john.doe#exabeam.com
Name: Martina jackson
Email: Martina.jackson#exabeam.com
Name: Steve Bob
Email: steve.bob#exabeam.com
Name: John Doe
Email: john.doe#exabeam.com
Name: Steve Bob
Email: steve.bob#exabeam.com
Name: John Doe
Email: john.doe#exabeam.com
My purpose is to sort the file content in such a way that "name" should be the key and for every "name" how many times the email is mentioned should be in the nested dict as a value. Below is the sample output:
{ "John Doe": {"John.doe#exabeam.com": 3}, "Steve Bob": {"steve.bob#exabeam.com": 2}, "Martina Jackson":{"martina.jackson": 1}}
So, I tried something as below. But it's not working.
with open('contacts', 'r') as file:
for read_file in file:
new_dict = sorted(read_file.items(), key = lambda x: x[1]['point'],reverse=True)
print(new_dict)
When I execute the above program, I get below errors:
new_dict = sorted(read_file.items(), key = lambda x: x[1]['point'],reverse=True)
AttributeError: 'tuple' object has no attribute 'items'
Any thoughts?
Thank you,
Yet another example!
with open('contacts', 'r') as file:
for read_file in file.read().split("\n\n"): #data chunks apart
data = read_file.split("\n") #split email/name
name = data[0].split(" ")[1] #split at space and grab second
email = data[1].split(" ")[1] #^
if name not in d:
d[name] = {email:1}
else:
d[name][email] += 1
How about this one?
result = {}
with open(filename, "r") as f:
lines = f.readlines()
for name_line, address_line in zip(lines[::3], lines[1::3]):
name = name_line.replace(": ", ":").split(":")[1].strip()
address = address_line.replace(": ", ":").split(":")[1].strip()
if not name in result:
result[name] = {address: 1}
else:
result[name][address] += 1
Something like this?
contacts = {}
with open('contacts.txt', 'r') as f:
name = None
for line in f:
parts = line.rstrip().split(': ')
if parts[0] == 'Name':
name = parts[1]
if name not in contacts:
contacts[name] = {} # Initialize empty dict for new name
elif parts[0] == 'Email':
email = parts[1]
if email not in contacts[name]:
contacts[name][email] = 0 # Initialize count for new email
contacts[name][email] += 1
print(contacts) # Outputs: {'John Doe': {'john.doe#exabeam.com': 3}, 'Martina jackson': {'Martina.jackson#exabeam.com': 1}, 'Steve Bob': {'steve.bob#exabeam.com': 2}}
I do not understand WHY such a data structure is advantageous,
{"name":(email, count)}
would likely be better, as I only see one email per name.
But this parses the data into the format you specify:
result = dict()
with open('contacts', 'r') as file:
# read all lines from file
lines = file.readlines()
# process them in chunks
for i in range(0, len(lines), 3):
name, email, _ = lines[i:i+3]
# remove "NAME:" and "EMAIL:" aswell as newlines
name = name.split(":")[1].strip()
email = email.split(":")[1].strip()
# If we haven't seen this name before, initialize dict
if name not in result:
result[name] = dict()
# increment the number of times the email has been seen, else set to 1
result[name][email] = result[name].get(email,0) + 1
print(result)
prints, as expected
{'John Doe': {'john.doe#exabeam.com': 3}, 'Martina jackson': {'Martina.jackson#exabeam.com': 1}, 'Steve Bob': {'steve.bob#exabeam.com': 2}}

How to create a dictionary based on a string from a file

I have this following string in a text file
InfoType 0 :
string1
string2
string3
InfoType 1 :
string1
string2
string3
InfoType 3 :
string1
string2
string3
Is there a way to create a dictionary that would look like this:
{'InfoType 0':'string1,string2,string3', 'InfoType 1':'string1,string2,string3', 'InfoType 3':'string1,string2,string3'}
Something like this should work:
def my_parser(fh, key_pattern):
d = {}
for line in fh:
if line.startswith(key_pattern):
name = line.strip()
break
# This list will hold the lines
lines = []
# Now iterate to find the lines
for line in fh:
line = line.strip()
if not line:
continue
if line.startswith(key_pattern):
# When in this block we have reached
# the next record
# Add to the dict
d[name] = ",".join(lines)
# Reset the lines and save the
# name of the next record
lines = []
name = line
# skip to next line
continue
lines.append(line)
d[name] = ",".join(lines)
return d
Use like so:
with open("myfile.txt", "r") as fh:
d = my_parser(fh, "InfoType")
# {'InfoType 0 :': 'string1,string2,string3',
# 'InfoType 1 :': 'string1,string2,string3',
# 'InfoType 3 :': 'string1,string2,string3'}
There are limitations, such as:
Duplicate keys
The key needs processing
You could get around these by making the function a generator and yielding name, str pairs and processing them as you read the file.
This will do:
dictionary = {}
# Replace ``file.txt`` with the path of your text file.
with open('file.txt', 'r') as file:
for line in file:
if not line.strip():
continue
if line.startswith('InfoType'):
key = line.rstrip('\n :')
dictionary[key] = ''
else:
value = line.strip('\n') + ','
dictionary[key] += value

Python extract values from text using keys

I have a text file in the following format of Key Value
--START--
FirstName Kitty
LastName McCat
Color Red
random_data
Meow Meow
--END--
I'm wanting to extract specific values from the text into a variable or a dict. For example if I want to extract the values of LastName and Color what would be the best way to do this?
The random_data may be anywhere in the file and span multiple lines.
I've considered using regex but am concerned with performance and readability as in the real code I have many different keys to extract.
I could also loop over each line and check for each key but it's quite messy when having 10+ keys. For example:
if line.startswith("LastName"):
#split line at space and handle
if line.startswith("Color"):
#split line at space and handle
Hoping for something a little cleaner
tokens = ['LastName', 'Color']
dictResult = {}
with open(fileName,'r') as fileHandle:
for line in fileHandle:
lineParts = line.split(" ")
if len(lineParts) == 2 and lineParts[0] in tokens:
dictResult[lineParts[0]] = lineParts[1]
Assuming your file is in something called sampletxt.txt, this would work. It creates a dictionary mapping from key -> list of values.
import re
with open('sampletxt.txt', 'r') as f:
txt = f.read()
keys = ['FirstName', 'LastName', 'Color']
d = {}
for key in keys:
d[key] = re.findall(key+r'\s(.*)\s*\n*', txt)
This version allows you to optionally specify the tokens
import re
​
s = """--START--
FirstName Kitty
LastName McCat
Color Red
random_data
Meow Meow
--END--"""
tokens = ["LastName", "Color"]
if len(tokens) == 0:
print(re.findall("({0}) ({0})".format("\w+"), s))
else:
print( list((t, re.findall("{} (\w+)".format(t), s)[0]) for t in tokens))
Output
[('LastName', 'McCat'), ('Color', 'Red')]
Building off the other answers, this function would use regular expressions to take any text key and return the value if found:
import re
file_name = 'test.txt'
def get_text_value(text_key, file_name):
match_str = text_key + "\s(\w+)\n"
with open(file_name, "r") as f:
text_to_check = f.readlines()
text_value = None
for line in text_to_check:
matched = re.match(match_str, line)
if matched:
text_value = matched.group(1)
return text_value
if __name__ == "__main__":
first_key = "FirstName"
first_value = get_text_value(first_key, file_name)
print('Check for first key "{}" and value "{}"'.format(first_key,
first_value))
second_key = "Color"
second_value = get_text_value(second_key, file_name)
print('Check for first key "{}" and value "{}"'.format(second_key,
second_value))

Group and Check-mark using Python

I have several files, each of which has data like this (filename:data inside separated by newline):
Mike: Plane\nCar
Paula: Plane\nTrain\nBoat\nCar
Bill: Boat\nTrain
Scott: Car
How can I create a csv file using python that groups all the different vehicles and then puts a X on the applicable person, like:
Assuming those line numbers aren't in there (easy enough to fix if they are), and with an input file like following:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
Solution can be found here : https://gist.github.com/999481
import sys
from collections import defaultdict
import csv
# see http://stackoverflow.com/questions/6180609/group-and-check-mark-using-python
def main():
# files = ["group.txt"]
files = sys.argv[1:]
if len(files) < 1:
print "usage: ./python_checkmark.py file1 [file2 ... filen]"
name_map = defaultdict(set)
for f in files:
file_handle = open(f, "r")
process_file(file_handle, name_map)
file_handle.close()
print_csv(sys.stdout, name_map)
def process_file(input_file, name_map):
cur_name = ""
for line in input_file:
if ":" in line:
cur_name, item = [x.strip() for x in line.split(":")]
else:
item = line.strip()
name_map[cur_name].add(item)
def print_csv(output_file, name_map):
names = name_map.keys()
items = set([])
for item_set in name_map.values():
items = items.union(item_set)
writer = csv.writer(output_file, quoting=csv.QUOTE_MINIMAL)
writer.writerow( [""] + names )
for item in sorted(items):
row_contents = map(lambda name:"X" if item in name_map[name] else "", names)
row = [item] + row_contents
writer.writerow( row )
if __name__ == '__main__':
main()
Output:
,Mike,Bill,Scott,Paula
Boat,,X,,X
Car,X,,X,X
Plane,X,,,X
Train,,X,,X
Only thing this script doesn't do is keep the columns in order that the names are in. Could keep a separate list maintaining the order, since maps/dicts are inherently unordered.
Here is an example of how-to parse these kind of files.
Note that the dictionary is unordered here. You can use ordered dict (in case of Python 3.2 / 2.7) from standard library, find any available implmentation / backport in case if you have older Python versions or just save an order in additional list :)
data = {}
name = None
with open(file_path) as f:
for line in f:
if ':' in line: # we have a name here
name, first_vehicle = line.split(':')
data[name] = set([first_vehicle, ]) # a set of vehicles per name
else:
if name:
data[name].add(line)
# now a dictionary with names/vehicles is available
# let's convert it to simple csv-formatted string..
# a set of all available vehicles
vehicles = set(v for vlist in data.values()
for v in vlist)
for name in data:
name_vehicles = data[name]
csv_vehicles = ''
for v in vehicles:
if v in name_vehicles:
csv_vehicles += v
csv_vehicles += ','
csv_line = name + ',' + csv_vehicles
Assuming that the input looks like this:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
This python script, places the vehicles in a dictionary, indexed by the person:
#!/usr/bin/python
persons={}
vehicles=set()
with open('input') as fd:
for line in fd:
line = line.strip()
if ':' in line:
tmp = line.split(':')
p = tmp[0].strip()
v = tmp[1].strip()
persons[p]=[v]
vehicles.add(v)
else:
persons[p].append(line)
vehicles.add(line)
for k,v in persons.iteritems():
print k,v
print 'vehicles', vehicles
Result:
Mike ['Plane', 'Car']
Bill ['Boat', 'Train']
Scott ['Car']
Paula ['Plane', 'Train', 'Boat', 'Car']
vehicles set(['Train', 'Car', 'Plane', 'Boat'])
Now, all the data needed are placed in data-structures. The csv-part is left as an exercise for the reader :-)
The most elegant and simple way would be like so:
vehiclesToPeople = {}
people = []
for root,dirs,files in os.walk('/path/to/folder/with/files'):
for file in files:
person = file
people += [person]
path = os.path.join(root, file)
with open(path) as f:
for vehicle in f:
vehiclesToPeople.setdefault(vehicle,set()).add(person)
people.sort()
table = [ ['']+people ]
for vehicle,owners in peopleToVehicles.items():
table.append([('X' if p in vehiclesToPeople[vehicle] else '') for p in people])
csv = '\n'.join(','.join(row) for row in table)
You can do pprint.pprint(table) as well to look at it.

Categories