Turning a python dict. to an excel sheet - python

I am having an issue with the below code.
import urllib2
import csv
from bs4 import BeautifulSoup
soup = BeautifulSoup(urllib2.urlopen('http://www.ny.com/clubs/nightclubs/index.html').read())
clubs = []
trains = ["A","C","E","1","2","3","4","5","6","7","N","Q","R","L","B","D","F"]
for club in soup.find_all("dt"):
clubD = {}
clubD["name"] = club.b.get_text()
clubD["address"] = club.i.get_text()
text = club.dd.get_text()
nIndex = text.find("(")
if(text[nIndex+1]=="2"):
clubD["number"] = text[nIndex:nIndex+15]
sIndex = text.find("Subway")
sIndexEnd = text.find(".",sIndex)
if(text[sIndexEnd-1] == "W" or text[sIndexEnd -1] == "E"):
sIndexEnd2 = text.find(".",sIndexEnd+1)
clubD["Subway"] = text[sIndex:sIndexEnd2]
else:
clubD["Subway"] = text[sIndex:sIndexEnd]
try:
cool = clubD["number"]
except (ValueError,KeyError):
clubD["number"] = "N/A"
clubs.append(clubD)
keys = [u"name", u"address",u"number",u"Subway"]
f = open('club.csv', 'wb')
dict_writer = csv.DictWriter(f, keys)
dict_writer.writerow([unicode(s).encode("utf-8") for s in clubs])
I get the error ValueError: dict contains fields not in fieldnames. I dont understand how this could be. Any assistance would be great. I am trying to turn the dictionary into an excel file.

clubs is a list of dictionaries, whereas each dictionary has four fields: name, address, number, and Subway. You will need to encode each of the fields:
# Instead of:
#dict_writer.writerow([unicode(s).encode("utf-8") for s in clubs])
# Do this:
for c in clubs:
# Encode each field: name, address, ...
for k in c.keys():
c[k] = c[k].encode('utf-8').strip()
# Write to file
dict_writer.writerow(c)
Update
I looked at your data and some of the fields have ending new line \n, so I updated the code to encode and strip white spaces at the same time.

Related

Converting text file to dictionary then to JSON object using Python - But I need to split data from the text file

I am trying to convert txt file data to a Python dictionary before dumping it to JSON and then writing JSON data into avro. My text file contains unwanted data that I need to remove, I only need field names and field types:
mappings = ['name', 'type']
with open('xxx.txt', 'r') as fn:
dict1 = {}
names = []
types = []
for line in file.readlines():
names_lines = line.split()[0] # index 0 = names
names.append(names_lines)
types_lines = line.split()[1] # index 1 = types
types.append(types_lines)
id = 1
for d in fn:
desc = list(d.strip().split(' ', 1))
name = desc[0]
i = 0
dict2 = {}
while i < len(mappings):
dict2[mappings[i]] = desc[i]
i = i + 1
dict1[name] = dict2
id = id + 1
print(dict1)
1- My Error is:
name 'file' is not defined. Did you mean: 'filter'?***
replace file with fn:
for line in fn.readlines():
From what you described you wanted in the comments, try this:
records = []
with open('xxx.txt', 'r') as f:
for line in f:
name, type_, *_ = line.split()
records.append({"name": name, "type": type_})
print(records)

How to get multiple lines from a .txt file that have been split and to save them into different variables

def WorkLoad():
readWork=open("Todays_Work.txt","r")
for line in readWork.readlines():
WorkLine = line.split()
Order_No = WorkLine[0]
Deliver_Address = WorkLine[1]
Bay_Collection = WorkLine[2]
Stock = WorkLine[3]
print(WorkLine[0],"\n",WorkLine[1],"\n",WorkLine[2],"\n",WorkLine[3])
print(WorkLine)
I am currently started of with this but it only prints out the last line in the text file.
Try below code this will give you collective result in the form of dictionary.
from collections import OrderedDict
result = OrderedDict() #creating a Ordered dictionary
#setting dictionary elements
result["Order_No"] = []
result["Deliver_Address"] = []
result["Bay_Collection"] = []
result["Stock"] = []
def WorkLoad(result):
readWork=open("Todays_Work.txt","r")
for line in readWork.readlines():
WorkLine = line.split()
result["Order_No"].append(WorkLine[0])
result["Deliver_Address"],append(WorkLine[1])
result["Bay_Collection"].append(WorkLine[2])
result["Stock"].append(WorkLine[3])
return result
data = Workload(result) #calling the workload function
print(data) #printing the data
If you wish to print each line, you should add a print statement inside the loop. Or save them to a later use. Probably you want something like this:
Order_No = []
Delivery_Address = []
Bay_Collection = []
Stock = []
def WorkLoad():
readWork=open("Todays_Work.txt","r")
for line in readWork.readlines():
WorkLine = line.split()
Order_No.append(WorkLine[0])
Deliver_Address.append(WorkLine[1])
Bay_Collection.append(WorkLine[2])
Stock.append(WorkLine[3])
print(Order_No[-1], Deliver_Address[-1], Bay_Collection[-1], Stock[-1])
This prints each line during the loop and saves all the fields in the relative lists. I define the lists outside WorkLoad so they are available from other functions you may have. Hope this is helpful.

Many emoji characters are not read by python file read

I have a list of 1500 emoji character dictionary in a json file, and I wanted to import those to my python code, I did a file read and convert it to a python dictionary but now I have only 143 records. How can I import all the emoji to my code, this is my code.
import sys
import ast
file = open('emojidescription.json','r').read()
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))
#word = word.replaceAll(",", " ");
keys = list(emoji_dictionary["emojis"][0].keys())
values = list(emoji_dictionary["emojis"][0].values())
file_write = open('output.txt','a')
print(len(keys))
for i in range(len(keys)):
try:
content = 'word = word.replace("{0}", "{1}")'.format(keys[i],values[i][0])
except Exception as e:
content = 'word = word.replace("{0}", "{1}")'.format(keys[i],'')
#file.write()
#print(keys[i],values[i])
print(content)
file_write.close()
This is my input sample
{
"emojis": [
{
"👨‍🎓": ["Graduate"],
"©": ["Copy right"],
"®": ["Registered"],
"👨‍👩‍👧": ["family"],
"👩‍❤️‍💋‍👩": ["love"],
"™": ["trademark"],
"👨‍❤‍👨": ["love"],
"⌚": ["time"],
"⌛": ["wait"],
"⭐": ["star"],
"🐘": ["Elephant"],
"🐕": ["Cat"],
"🐜": ["ant"],
"🐔": ["cock"],
"🐓": ["cock"],
This is my result, and the 143 denotes number of emoji.
143
word = word.replace("�‍�‍�‍�", "family")
word = word.replace("Ⓜ", "")
word = word.replace("♥", "")
word = word.replace("♠", "")
word = word.replace("⌛", "wait")
I'm not sure why you're seeing only 143 records from an input of 1500 (your sample doesn't seem to display this behavior).
The setup doesn't seem to do anything useful, but what you're doing boils down to (simplified and skipping lots of details):
d = ..read json as python dict.
keys = d.keys()
values = d.values()
for i in range(len(keys)):
key = keys[i]
value = values[i]
and that should be completely correct. There are better ways to do this in Python, however, like using the zip function:
d = ..read json as python dict.
keys = d.keys()
values = d.values()
for key, value in zip(keys, values): # zip picks pair-wise elements
...
or simply asking the dict for its items:
for key, value in d.items():
...
The json module makes reading and writing json much simpler (and safer), and using the idiom from above the problem reduces to this:
import json
emojis = json.load(open('emoji.json', 'rb'))
with open('output.py', 'wb') as fp:
for k,v in emojis['emojis'][0].items():
val = u'word = word.replace("{0}", "{1}")\n'.format(k, v[0] if v else "")
fp.write(val.encode('u8'))
Why do you replace all emojis with 0xfffd in the lines:
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))
Just don't to this!
Using json:
import json
with open('emojidescription.json', encoding="utf8") as emojis:
emojis = json.load(emojis)
with open('output.txt','a', encoding="utf8") as output:
for emoji, text in emojis["emojis"][0].items():
text = "" if not text else text[0]
output.write('word = word.replace("{0}", "{1}")\n'.format(emoji, text))

Parsing Input File in Python

I have a plain text file with some data in it, that I'm trying to open and read using a Python (ver 3.2) program, and trying to load that data into a data structure within the program.
Here's what my text file looks like (file is called "data.txt")
NAME: Joe Smith
CLASS: Fighter
STR: 14
DEX: 7
Here's what my program looks like:
player_name = None
player_class = None
player_STR = None
player_DEX = None
f = open("data.txt")
data = f.readlines()
for d in data:
# parse input, assign values to variables
print(d)
f.close()
My question is, how do I assign the values to the variables (something like setting player_STR = 14 within the program)?
player = {}
f = open("data.txt")
data = f.readlines()
for line in data:
# parse input, assign values to variables
key, value = line.split(":")
player[key.strip()] = value.strip()
f.close()
now the name of your player will be player['name'], and the same goes for all other properties in your file.
import re
pattern = re.compile(r'([\w]+): ([\w\s]+)')
f = open("data.txt")
v = dict(pattern.findall(f.read()))
player_name = v.get("name")
plater_class = v.get('class')
# ...
f.close()
The most direct way to do it is to assign the variables one at a time:
f = open("data.txt")
for line in f: # loop over the file directly
line = line.rstrip() # remove the trailing newline
if line.startswith('NAME: '):
player_name = line[6:]
elif line.startswith('CLASS: '):
player_class = line[7:]
elif line.startswith('STR: '):
player_strength = int(line[5:])
elif line.startswith('DEX: '):
player_dexterity = int(line[5:])
else:
raise ValueError('Unknown attribute: %r' % line)
f.close()
That said, most Python programmers would stored the values in a dictionary rather than in variables. The fields can be stripped (removing the line endings) and split with: characteristic, value = data.rstrip().split(':'). If the value should be a number instead of a string, convert it with float() or int().

Group and Check-mark using Python

I have several files, each of which has data like this (filename:data inside separated by newline):
Mike: Plane\nCar
Paula: Plane\nTrain\nBoat\nCar
Bill: Boat\nTrain
Scott: Car
How can I create a csv file using python that groups all the different vehicles and then puts a X on the applicable person, like:
Assuming those line numbers aren't in there (easy enough to fix if they are), and with an input file like following:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
Solution can be found here : https://gist.github.com/999481
import sys
from collections import defaultdict
import csv
# see http://stackoverflow.com/questions/6180609/group-and-check-mark-using-python
def main():
# files = ["group.txt"]
files = sys.argv[1:]
if len(files) < 1:
print "usage: ./python_checkmark.py file1 [file2 ... filen]"
name_map = defaultdict(set)
for f in files:
file_handle = open(f, "r")
process_file(file_handle, name_map)
file_handle.close()
print_csv(sys.stdout, name_map)
def process_file(input_file, name_map):
cur_name = ""
for line in input_file:
if ":" in line:
cur_name, item = [x.strip() for x in line.split(":")]
else:
item = line.strip()
name_map[cur_name].add(item)
def print_csv(output_file, name_map):
names = name_map.keys()
items = set([])
for item_set in name_map.values():
items = items.union(item_set)
writer = csv.writer(output_file, quoting=csv.QUOTE_MINIMAL)
writer.writerow( [""] + names )
for item in sorted(items):
row_contents = map(lambda name:"X" if item in name_map[name] else "", names)
row = [item] + row_contents
writer.writerow( row )
if __name__ == '__main__':
main()
Output:
,Mike,Bill,Scott,Paula
Boat,,X,,X
Car,X,,X,X
Plane,X,,,X
Train,,X,,X
Only thing this script doesn't do is keep the columns in order that the names are in. Could keep a separate list maintaining the order, since maps/dicts are inherently unordered.
Here is an example of how-to parse these kind of files.
Note that the dictionary is unordered here. You can use ordered dict (in case of Python 3.2 / 2.7) from standard library, find any available implmentation / backport in case if you have older Python versions or just save an order in additional list :)
data = {}
name = None
with open(file_path) as f:
for line in f:
if ':' in line: # we have a name here
name, first_vehicle = line.split(':')
data[name] = set([first_vehicle, ]) # a set of vehicles per name
else:
if name:
data[name].add(line)
# now a dictionary with names/vehicles is available
# let's convert it to simple csv-formatted string..
# a set of all available vehicles
vehicles = set(v for vlist in data.values()
for v in vlist)
for name in data:
name_vehicles = data[name]
csv_vehicles = ''
for v in vehicles:
if v in name_vehicles:
csv_vehicles += v
csv_vehicles += ','
csv_line = name + ',' + csv_vehicles
Assuming that the input looks like this:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
This python script, places the vehicles in a dictionary, indexed by the person:
#!/usr/bin/python
persons={}
vehicles=set()
with open('input') as fd:
for line in fd:
line = line.strip()
if ':' in line:
tmp = line.split(':')
p = tmp[0].strip()
v = tmp[1].strip()
persons[p]=[v]
vehicles.add(v)
else:
persons[p].append(line)
vehicles.add(line)
for k,v in persons.iteritems():
print k,v
print 'vehicles', vehicles
Result:
Mike ['Plane', 'Car']
Bill ['Boat', 'Train']
Scott ['Car']
Paula ['Plane', 'Train', 'Boat', 'Car']
vehicles set(['Train', 'Car', 'Plane', 'Boat'])
Now, all the data needed are placed in data-structures. The csv-part is left as an exercise for the reader :-)
The most elegant and simple way would be like so:
vehiclesToPeople = {}
people = []
for root,dirs,files in os.walk('/path/to/folder/with/files'):
for file in files:
person = file
people += [person]
path = os.path.join(root, file)
with open(path) as f:
for vehicle in f:
vehiclesToPeople.setdefault(vehicle,set()).add(person)
people.sort()
table = [ ['']+people ]
for vehicle,owners in peopleToVehicles.items():
table.append([('X' if p in vehiclesToPeople[vehicle] else '') for p in people])
csv = '\n'.join(','.join(row) for row in table)
You can do pprint.pprint(table) as well to look at it.

Categories