sort key in dict based on particular order - python

My code:
abc = {
'Name':emp_name,
'Id no':emp_idno,
'Leave taken':k,
'Leave reqd':b,
'Total days of leave':total,
'Reason':reason
}
I am getting output as:
id, name, total, reqason, leave taken, leave reqd
I want the output to be in this order:
Name, id, leave taken, leave reqd, total, reason
I am stuck up with this and it would be great if anyone can help me out.
my codes for csv
dl = {'Name':emp_name,'Id no':emp_idno, 'Leave taken':k, 'Leave
> reqd':b, 'Reason':reason}
>
>
> key_list = ['Name', 'Id no', 'Leave taken', 'Leave reqd', 'Reason'] abc = { 'Name':emp_name, 'Id no':emp_idno, 'Leave taken':k,
> 'Leave reqd':b, 'Reason':reason }
>
> for k in key_list:
> print abc[k] Lst.append(k)
>
> keys = Lst[0].keys() with open('employee.csv', 'wb') as output_file:
> dict_writer = csv.DictWriter(output_file, keys)
> dict_writer.writeheader()
> dict_writer.writerows(Lst)

Dictionaries are unordered by default.
You need an ordered Dicitionary.
see collections.OrderedDict
eg:
from collections import OrderedDict
d = OrderedDict()
d['Name'] = emp_name
d['Id no'] = emp_idno
d['Leave taken'] = k
print d

one simple hack can be storing the keys in a list.
key_list = ['Name', 'Id no', 'Leave taken', 'Leave reqd', 'Total days of leave', 'Reason']
abc = {
'Name':emp_name,
'Id no':emp_idno,
'Leave taken':k,
'Leave reqd':b,
'Total days of leave':total,
'Reason':reason
}
for k in key_list:
print abc[k]

Related

Create a dictionary where the keys are values of dictionaries inside lists in a dictionary and the values are the number of times they appear

I have this dictionary of lists of dictionaries (I cannot change the structure for the work):
dict_countries = {'gb': [{'datetime': '1955-10-10 17:00:00', 'city': 'chester'},
{'datetime': '1974-10-10 23:00:00', 'city': 'chester'}],
'us': [{'datetime': '1955-10-10 17:00:00', 'city': 'hudson'}]
}
And the function:
def Seen_in_the_city(dict_countries:dict,)-> dict:
city_dict = {}
for each_country in dict_countries.values():
for each_sight in each_country:
citi = each_sight["city"]
if citi in city_dict.keys():
city_dict[each_sight["city"]] =+1
else:
city_dict[citi] =+1
return city_dict
I get:
{'chester': 1,'hudson': 1}
instead of
{'chester': 2,'hudson': 1}
You can try using Counter (a subclass of dict) from the collections module in the Python Standard Library:
from collections import Counter
c = Counter()
for key in dict_countries:
for d in dict_countries[key]:
c.update(v for k, v in d.items() if k == 'city')
print(c)
Output
Counter({'chester': 2, 'hudson': 1})
Try:
output = dict()
for country, cities in dict_countries.items():
for city in cities:
if city["city"] not in output:
output[city["city"]] = 0
output[city["city"]] += 1
You don't need to say +1 in order to add a positive number. Also in the if citi statement, += 1 means adding 1 to the existing value (1+1) where as =+1 is basically saying giving it a value of 1 once again.
if citi in city_dict.keys():
city_dict[each_sight["city"]] +=1
else:
city_dict[citi] = 1
You can use groupby from itertools
from itertools import groupby
print({i: len(list(j)[0]) for i,j in groupby(dict_countries.values(), key=lambda x: x[0]["city"])})
If you don't want additional imports (not that you shouldn't use Counter) here's another way:
dict_countries = {'gb': [{'datetime': '1955-10-10 17:00:00', 'city': 'chester'},
{'datetime': '1974-10-10 23:00:00', 'city': 'chester'}],
'us': [{'datetime': '1955-10-10 17:00:00', 'city': 'hudson'}]
}
def Seen_in_the_city(dict_countries:dict,)-> dict:
city_dict = {}
for each_country in dict_countries.values():
for each_sight in each_country:
citi = each_sight["city"]
city_dict[citi] = city_dict.get(citi, 0) + 1
return city_dict
print(Seen_in_the_city(dict_countries))

How to avoid very long if-elif-elif-else statements in Python function

Is there a smart way to shorten very long if-elif-elif-elif... statements?
Let's say I have a function like this:
def very_long_func():
something = 'Audi'
car = ['VW', 'Audi', 'BMW']
drinks = ['Cola', 'Fanta', 'Pepsi']
countries = ['France', 'Germany', 'Italy']
if something in car:
return {'type':'car brand'}
elif something in drinks:
return {'type':'lemonade brand'}
elif something in countries:
return {'type':'country'}
else:
return {'type':'nothing found'}
very_long_func()
>>>> {'type': 'car brand'}
The actual function is much longer than the example. What would be the best way to write this function (not in terms of speed but in readability)
I was reading this, but I have trouble to apply it to my problem.
You can't hash lists as dictionary values. So go other way round. Create a mapping of type -> list. And initialize your output with the default type. This allows you to keep on adding new types to your mapping without changing any code.
def very_long_func():
something = 'Audi'
car = ['VW', 'Audi', 'BMW']
drinks = ['Cola', 'Fanta', 'Pepsi']
countries = ['France', 'Germany', 'Italy']
out = {'type': 'nothing found'} # If nothing matches
mapping = {
'car brand': car,
'lemonade brand': drinks,
'country': countries
}
for k,v in mapping.items() :
if something in v:
out['type'] = k # update if match found
break
return out # returns matched or default value
you can create dictionary like this and then use map_dict.
from functools import reduce
car = ['VW', 'Audi', 'BMW']
drinks = ['Cola', 'Fanta', 'Pepsi']
countries = ['France', 'Germany', 'Italy']
li = [car, drinks, countries]
types = ['car brand', 'lemonade brand', 'country', 'nothing found']
dl = [dict(zip(l, [types[idx]]*len(l))) for idx, l in enumerate(li)]
map_dict = reduce(lambda a, b: dict(a, **b), dl)
Try this:
def create_dct(lst, flag):
return {k:flag for k in lst}
car = ['VW', 'Audi', 'BMW']
drinks = ['Cola', 'Fanta', 'Pepsi']
countries = ['France', 'Germany', 'Italy']
merge_dcts = {}
merge_dcts.update(create_dct(car, 'car brand'))
merge_dcts.update(create_dct(drinks, 'lemonade brand'))
merge_dcts.update(create_dct(countries, 'country'))
something = 'Audi'
try:
print("type: ", merge_dcts[something])
except:
print("type: nothing found")
You can simulate a switch statement with a helper function like this:
def switch(v): yield lambda *c: v in c
The your code could be written like this:
something = 'Audi'
for case in switch(something):
if case('VW', 'Audi', 'BMW'): name = 'car brand' ; break
if case('Cola', 'Fanta', 'Pepsi'): name = 'lemonade brand' ; break
if case('France', 'Germany', 'Italy'): name = 'country' ; break
else: name = 'nothing found'
return {'type':name}
If you don't have specific code to do for each value, then a simple mapping dictionary would probably suffice. For ease of maintenance, you can start with a category-list:type-name mapping and expand it before use:
mapping = { ('VW', 'Audi', 'BMW'):'car brand',
('Cola', 'Fanta', 'Pepsi'):'lemonade brand',
('France', 'Germany', 'Italy'):'country' }
mapping = { categ:name for categs,name in mapping.items() for categ in categs }
Then your code will look like this:
something = 'Audi'
return {'type':mapping.get(something,'nothing found')}
using a defaultdict would make this even simpler to use by providing the 'nothing found' value automatically so you could write: return {'type':mapping[something]}

How to flag if a dictionary key is absent

I have files with CommonChar is some of them and my python code works on them to build a dictionary. While building there are some required keys which users might forget to put in. The code should be able to flag the file and the key which is missing.
The syntax for python code to work on is like this:
CommonChar pins Category General
CommonChar pins Contact Mark
CommonChar pins Description 1st line
CommonChar pins Description 2nd line
CommonChar nails Category specific
CommonChar nails Description 1st line
So for above example "Contact" is missing:
CommonChar nails Contact Robert
I have a list for ex: mustNeededKeys=["Category", "Description", "Contact"]
mainDict={}
for dirName, subdirList, fileList in os.walk(sys.argv[1]):
for eachFile in fileList:
#excluding file names ending in .swp , swo which are creatied temporarily when editing in vim
if not eachFile.endswith(('.swp','.swo','~')):
#print eachFile
filePath= os.path.join(dirName,eachFile)
#print filePath
with open(filePath, "r") as fh:
contents=fh.read()
items=re.findall("CommonChar.*$",contents,re.MULTILINE)
for x in items:
cc, group, topic, data = x.split(None, 3)
data = data.split()
group_dict = mainDict.setdefault(group, {'fileLocation': [filePath]})
if topic in group_dict:
group_dict[topic].extend(['</br>'] + data)
else:
group_dict[topic] = data
This above code does its job of building a dict like this:
{'pins': {'Category': ['General'], 'Contact': ['Mark'], 'Description': ['1st', 'line', '2nd', 'line'] } , 'nails':{'Category':['specific'], 'Description':['1st line']}
So when reading each file with CommonChar and building a group_dict , a way to check all the keys and compare it with mustNeededKeys and flag if not there and proceed if met.
Something like this should work:
# Setup mainDict (equivalent to code given above)
mainDict = {
'nails': {
'Category': ['specific'],
'Description': ['1st', 'line'],
'fileLocation': ['/some/path/nails.txt']
},
'pins': {
'Category': ['General'],
'Contact': ['Mark'],
'Description': ['1st', 'line', '</br>', '2nd', 'line'],
'fileLocation': ['/some/path/pins.txt']
}
}
# check for missing keys
mustNeededKeys = {"Category", "Description", "Contact"}
for group, group_dict in mainDict.items():
missing_keys = mustNeededKeys - set(group_dict.keys())
if missing_keys:
missing_key_list = ','.join(missing_keys)
print(
'group "{}" ({}) is missing key(s): {}'
.format(group, group_dict['fileLocation'][0], missing_key_list)
)
# group "nails" (/some/path/nails.txt) is missing key(s): Contact
If you must check for missing keys immediately after processing each group, you could use the code below. This assumes that each group is stored as a contiguous collection of rows in a single file (i.e., not mixed with other groups in the same file or spread across different files).
from itertools import groupby
mainDict={}
mustNeededKeys = {"Category", "Description", "Contact"}
for dirName, subdirList, fileList in os.walk(sys.argv[1]):
for eachFile in fileList:
# excluding file names ending in .swp , swo which are created
# temporarily when editing in vim
if not eachFile.endswith(('.swp','.swo','~')):
#print eachFile
filePath = os.path.join(dirName,eachFile)
#print filePath
with open(filePath, "r") as fh:
contents = fh.read()
items = re.findall("CommonChar.*$", contents, re.MULTILINE)
split_items = [line.split(None, 3) for line in items]
# group the items by group name (element 1 in each row)
for g, group_items in groupby(split_items, lambda row: row[1]):
group_dict = {'fileLocation': [filePath]}
# store all items in the current group
for cc, group, topic, data in group_items:
data = data.split()
if topic in group_dict:
group_dict[topic].extend(['</br>'] + data)
else:
group_dict[topic] = data
# check for missing keys
missing_keys = mustNeededKeys - set(group_dict.keys())
if missing_keys:
missing_key_list = ','.join(missing_keys)
print(
'group "{}" ({}) is missing key(s): {}'
.format(group, filePath, missing_key_list)
)
# add group to mainDict
mainDict[group] = group_dict
data = '''CommonChar pins Category General
CommonChar pins Contact Mark
CommonChar pins Description 1st line
CommonChar pins Description 2nd line
CommonChar nails Category specific
CommonChar nails Description 1st line'''
from collections import defaultdict
from pprint import pprint
required_keys = ["Category", "Description", "Contact"]
d = defaultdict(dict)
for line in data.splitlines():
line = line.split()
if line[2] == 'Description':
if line[2] not in d[line[1]]:
d[line[1]][line[2]] = []
d[line[1]][line[2]].extend(line[3:])
else:
d[line[1]][line[2]] = [line[3]]
pprint(dict(d))
print('*' * 80)
# find missing keys
for k in d.keys():
for missing_key in set(d[k].keys()) ^ set(required_keys):
print('Key "{}" is missing "{}"!'.format(k, missing_key))
Prints:
{'nails': {'Category': ['specific'], 'Description': ['1st', 'line']},
'pins': {'Category': ['General'],
'Contact': ['Mark'],
'Description': ['1st', 'line', '2nd', 'line']}}
********************************************************************************
Key "nails" is missing "Contact"!

Categorizing sentence using dictionary

I am using below function for getting categorizing sentence in themes
def theme(x):
output =[]
category = ()
for i in x:
if 'AC' in i:
category = 'AC problem'
elif 'insects' in i:
category = 'Cleanliness'
elif 'clean' in i:
category = 'Cleanliness'
elif 'food' in i:
category = 'Food Problem'
elif 'delay' in i:
category = 'Train Delayed'
else:
category = 'None'
output.append(category)
return output
I don't want to use repeated if statements for every word in a category. Instead I want the i give a list/dictionary e.g. Cleanliness = ['Clean', 'Cleaned', 'spoilt', 'dirty'] for getting category 'Cleanliness' against the sentence if it has any of the words in list. How can i do that
You can use a dict of sets to structure your words with categories, and then generate a word-to-category lookup dict based on the said structure:
categories = {
'Cleanliness': {'insects', 'clean'},
'AC Problem': {'AC'},
'Food Problem': {'food'},
'Train Delayed': {'delay'}
}
lookup = {word: category for category, words in categories.items() for word in words}
def theme(x):
return {lookup.get(word, 'None') for word in x}
so that theme(['AC', 'clean', 'insects']) would return a set of corresponding categories:
{'Cleanliness', 'AC Problem'}
This should do what you're asking. I set all the keys to lowercase and converted i to lowercase when checking if you get a match, but with different capitalization, it still counts.
def theme(x):
output =[]
category = ()
myDict = {"ac":"AC problem", "insects":"Cleanliness", "clean":"Cleanliness", "food":"Food Problem", "delay":"Train Delayed"} #I reccomend coming up with a more suitable name for your dictionary in your actual program
for i in x:
if i.lower() in myDict: #Checks to see if i is in the dictionary before trying to print the result; prevents possible Key Errors
category = (myDict[i.lower()]) #If it is in the dictionary it category will be set to the result of the key
output.append(category)
else:
output.append("None") #If i isn't in the dictionary output will append None instead
return output
Here's some examples:
>>>print(theme(['Clean', 'Cleaned', 'spoilt', 'dirty']))
['Cleanliness', 'None', 'None', 'None']
>>>print(theme(['Delay', 'Ham', 'Cheese', 'Insects']))
['Train Delayed', 'None', 'None', 'Cleanliness']
I have worked out a another way:
def theme(x):
output = []
for i in x:
if set(cleanliness).intersection(i.lower().split()):
category = 'clean'
elif set(ac_problem).intersection(i.lower().split()):
category = 'ac problem'
else:
category = 'none'
output.append(category)
return output
Maybe you can do it like this:
def theme(x):
output = []
name_dic = {"AC": "AC problem",
"clean": "Cleanliness",
"food": "Food Problem"
}
for e in x:
output.append(name_dic.get(e))
return output
Or more exactly like this:
def theme(x):
output = []
name_list = [
("AC", "AC problem"),
("clean", "Cleanliness"),
("insects", "Cleanliness"),
("food", "Food Problem")
]
name_dic = dict(name_list)
for e in x:
output.append(name_dic.get(e))
return output
Hope it helps.

Convert a csv into category-subcategory using array

Above is the input table i have in csv
I am trying to use array and while loops in python. I am new to this language. Loops should occur twice to give Category\sub-category\sub-category_1 order...I am trying to use split().Ouput should be like below
import csv
with open('D:\\test.csv', 'rb') as f:
reader = csv.reader(f, delimiter='',quotechar='|')
data = []
for name in reader:
data[name] = []
And if you read the lines of your csv and access the data then you can manipulate the way you want later.
cats = {}
with open('my.csv', "r") as ins:
# check each line of the fine
for line in ins:
# remove double quotes: replace('"', '')
# remove break line : rstrip()
a = str(line).replace('"', '').rstrip().split('|')
if a[0] != 'CatNo':
cats[int(a[0])] = a[1:];
for p in cats:
print 'cat_id: %d, value: %s' % (p, cats[p])
# you can access the value by the int ID
print cats[1001]
the output:
cat_id: 100, value: ['Best Sellers', 'Best Sellers']
cat_id: 1001, value: ['New this Month', 'New Products\\New this Month']
cat_id: 10, value: ['New Products', 'New Products']
cat_id: 1003, value: ['Previous Months', 'New Products\\Previous Months']
cat_id: 110, value: ['Promotional Material', 'Promotional Material']
cat_id: 120, value: ['Discounted Products & Special Offers', 'Discounted Products & Special Offers']
cat_id: 1002, value: ['Last Month', 'New Products\\Last Month']
['New this Month', 'New Products\\New this Month']
Updated script for your question:
categories = {}
def get_parent_category(cat_id):
if len(cat_id) <= 2:
return '';
else:
return cat_id[:-1]
with open('my.csv', "r") as ins:
for line in ins:
# remove double quotes: replace('"', '')
# remove break line : rstrip()
a = str(line).replace('"', '').rstrip().split('|')
cat_id = a[0]
if cat_id != 'CatNo':
categories[cat_id] = {
'parent': get_parent_category(cat_id),
'desc': a[1],
'long_desc': a[2]
};
print 'Categories relations:'
for p in categories:
parent = categories[p]['parent']
output = categories[p]['desc']
while parent != '':
output = categories[parent]['desc'] + ' \\ ' + output
parent = categories[parent]['parent']
print '\t', output
output:
Categories relations:
New Products
New Products \ Best Sellers
New Products \ Discounted Products & Special Offers
New Products \ Best Sellers \ Previous Months
New Products \ Best Sellers \ Last Month
New Products \ Best Sellers \ New this Month

Categories