How to transpose a single item in a nested list - python

I am trying to input values into my table but the table is not coming out the way I would like it to. The headers ("OrderDate", "Rep", etc.) of my given csv file should be under the "Columns:" cell in the following image: Table of statistical values
I have tried to create multiple functions that could transpose the headers but when trying to print the table, it would give the error:
TypeError: unsupported format string passed to list.__format__.
One code I tired to input just before the "labels" line was:
headers2 = [x.split() for x in headers]
P.S. I have removed the csv file code and manually put in a list assigned to "A".
My Code:
A = [['OrderDate', 'Region', 'Rep', 'Item', 'Units', 'Unit Price'],
['4-Jul-2014', 'East', 'Richard', 'Pen Set', '62', '4.99'],
['12-Jul-2014', 'East', 'Nick', 'Binder', '29', '1.99'],
['21-Jul-2014', 'Central', 'Morgan', 'Pen Set', '55', '12.49'],
['29-Jul-2014', 'East', 'Susan', 'Binder', '81', '19.99'],
['7-Aug-2014', 'Central', 'Matthew', 'Pen Set', '42', '23.95'],
['15-Aug-2014', 'East', 'Richard', 'Pencil', '35', '4.99'],
['24-Aug-2014', 'West', 'James', 'Desk', '3', '275'],
['1-Sep-2014', 'Central', 'Smith', 'Desk', '2', '125']]
minVal = []
maxVal = []
hist = []
average = []
stanDev = []
mode = []
headers = A[0] #this sets the variable "headers" as the first row
rows = A[1:] #sets the variable 'rows' to be a nested list without headers
def rows2cols(A):
if len(A) == 0: #this covers the base case of having an empty csv file
return []
res = [[] for x in headers] #creates a list of empty lists
for line in A:
for col in range(len(line)):
res[col].append(line[col])
return res
def convertstringtofloats(A):
res = []
for x in A:
res.append(float(x))
return res
def isnumericlist(A):
for x in A:
try:
numeric = float(x)
except:
return False
return True
def getMin(A):
B = convertstringtofloats(cols[col]) #Let's Python know what B is for the next line. If this isn't here, there is an error.
res = B[0]
for x in A:
if x < res:
res = x
return res
def getMax(A):
B = convertstringtofloats(cols[col]) #Let's Python know what B is for the next line. If this isn't here, there is an error.
res = B[0]
for x in A:
if x > res:
res = x
return res
def getAvg(A):
return sum(A)/len(A)
def most_common(A):
counts = {}
for x in A:
counts[(x)] = counts.get((x), 0) + 1
max = -1
maxKey = ""
for key,value in counts.items():
if max < value:
max = value
maxKey = key
return maxKey
def getSD(A):
sumsq = 0
for n in A:
sumsq += (getAvg(A))**2
return sumsq
cols = rows2cols(rows) #transposes 'rows' and assigns to variable 'cols'
def stats(A):
B = convertstringtofloats(A)
minVal.append(getMin(B))
maxVal.append(getMax(B))
average.append(getAvg(B))
stanDev.append(getSD(B))
for col in range(len(headers)):
if isnumericlist(cols[col]):
stats(cols[col]) #calls the function to calculate stats of the transposed matrix
else:
minVal.append("n/a")
maxVal.append("n/a")
average.append("n/a")
stanDev.append("n/a")
mode.append(most_common(cols[col]))
#headers2 = [x.split() for x in headers]
labels = ["Columns:", "Min", "Max", "Avg", "Std. Dev.", "Most Common Word"] #labels for the table
table_values = [labels, headers, minVal, maxVal, average, stanDev, mode] #combines all the calculated stats into a single list
print(table_values)
def print_table(table):
longest_cols = [
(max([len(str(row[i])) for row in table]) + 0) for i in range(len(table[0]))
]
row_format = "|".join([" {:>" + str(longest_col) + "} " for longest_col in longest_cols])
first = True
for row in table:
print(row_format.format(*row))
if first:
print((sum(longest_cols) + (len(table[0]) - 0) * 3) * "-")
first = False
print_table(table_values) # this prints the 'labels' at the top, but the statistical values are not in the right place

Related

How can I use my helper functions to get the correct output

So I created a helper function to help my main function in extracting stuff from a dictionary...
and here is my code and function
def rdict(recipes):
recipes_splitted = {}
for r in recipes:
recipe_name, parts = r.split(":")
recipe_parts = {}
for part in parts.split(','):
product, number = part.split('*')
recipe_parts[product] = int(number)
recipes_splitted[recipe_name] = recipe_parts
return recipes_splitted
def extract(recipes, data):
result = []
for r in recipes:
tmp = []
for key in data[r]:
tmp.append(f"{key}:{data[r][key]}")
final_string = ""
for i in range(len(tmp)):
if i < len(tmp) - 1:
final_string += tmp[i] + ", "
else:
final_string += tmp[i]
result.append(final_string)
return result
So what I'm trying to do is make sure data in extract(recipe, data) go through rdict(data) since rdict will convert data into a dictionary, which is what I need.. However, when I tried doing for key in rdict(data[r]): the output returns Error. String is not supscriptable..
what should I do to successfully implement the changes??
Edit
So from my current code, here is a sample input..
print(extract(recipes = ['T-Bone', 'Green Salad1'],data = ["Pork Stew:Cabbage*5,Carrot*1,Fatty Pork*10",
"Green Salad1:Cabbage*10,Carrot*2,Pineapple*5",
"T-Bone:Carrot*2,Steak Meat*1"]
))
and in order for my code to work, it has to be like this
print(extract(recipes = ['T-Bone', 'Green Salad1'], data = {'Pork Stew': {'Cabbage': 5, 'Carrot': 1, 'Fatty Pork': 10}, 'Green Salad1': {'Cabbage': 10, 'Carrot': 2, 'Pineapple': 5},'T-Bone': {'Carrot': 2, 'Steak Meat': 1}}))
So from the input, data should be changed from
data = ["Pork Stew:Cabbage*5,Carrot*1,Fatty Pork*10",
"Green Salad1:Cabbage*10,Carrot*2,Pineapple*5",
"T-Bone:Carrot*2,Steak Meat*1"]
to
data = {'Pork Stew': {'Cabbage': 5, 'Carrot': 1, 'Fatty Pork': 10}, 'Green Salad1': {'Cabbage': 10, 'Carrot': 2, 'Pineapple': 5},'T-Bone': {'Carrot': 2, 'Steak Meat': 1}}
Convert the data to dict in extract().
recipes = ['T-Bone', 'Green Salad1']
data = ["Pork Stew:Cabbage*5,Carrot*1,Fatty Pork*10",
"Green Salad1:Cabbage*10,Carrot*2,Pineapple*5",
"T-Bone:Carrot*2,Steak Meat*1"]
def rdict(recipes):
recipes_splitted = {}
for r in recipes:
recipe_name, parts = r.split(":")
recipe_parts = {}
for part in parts.split(','):
product, number = part.split('*')
recipe_parts[product] = int(number)
recipes_splitted[recipe_name] = recipe_parts
return recipes_splitted
def extract(recipes, data):
data = rdict(data) # convert data to dict first
result = []
for r in recipes:
tmp = []
for key in data[r]:
tmp.append(f"{key}:{data[r][key]}")
final_string = ""
for i in range(len(tmp)):
if i < len(tmp) - 1:
final_string += tmp[i] + ", "
else:
final_string += tmp[i]
result.append(final_string)
return result
print(extract(recipes, data))
Output:
['Carrot:2, Steak Meat:1', 'Cabbage:10, Carrot:2, Pineapple:5']
Renamed rdict to parse_recipe, and modified it to return a tuple that is lighter and easier to process
In extract:
a) Build a dict of recipes: data_recipes
b) Built result by getting the wanted recipes, with a guard against missing recipe (which be an empty dict:{} )
def parse_recipe(s):
recipe, ings_s = s.split(':')
ings_l = ings_s.split(',')
ings_d= {}
for ing in ings_l:
i,q = ing.split('*')
ings_d[i.strip()] = q.strip()
return recipe.strip(), ings_d
def extract(recipes, data):
data_recipes = {}
for s in data:
recipe, ings_d = parse_recipe(s)
data_recipes[recipe] = ings_d
return {r: data_recipes.get(r, dict()) for r in recipes}

For a dictionary compare both key and value with the values in a list

I have the following structures:
prev = { 'alpha:10.2': '145', 'teta:180': '198', 'eltira:140': '222', 'ronta:23.14':220}
now = ['alpha:10.3','teta:180', 'eltira:142']
and the following code:
old = []
for k, v in prev.iteritems():
if k not in now:
old.append(v)
So if the key from the dict prev is not found in the list now, I add the value to the old list.
Result 'old= [ '145', '222']`
But, I want to check also which part doesn't correspond from the key. I'm inclined to use namedtuple.
Package = collections.namedtuple('Package', 'name version')
for k, v in prev.items():
name, version = k.split(':')
Package(name=k, version='v')
if the name corresponds but the version not, do the same as above.
if the name is not found add it to a new list old_names, but also do as above.
The issue is that if k not in now will not work. I can do it with two loops and three ifs but maybe there is an option to do it more clean.
Expecting output:
old=[ '145', '222']
old_names=['ronta']
I have splitted the 'now' list and converted it to a dictionary (name:version) which takes a single iteration. Later, the values from the dictionary can be accessed in constant time for further operations.
prev = { 'alpha:10.2': '145', 'teta:180': '198', 'eltira:140': '222', 'old:1. 43':'150'}
now = ['alpha:10.3','teta:180', 'eltira:142']
now_dict = {}
old = []
old_names = []
for items in now:
temp0,temp1 = items.split(':')
now_dict[temp0] = temp1
for k,v in prev.items():
name,version = k.split(':')
if name not in now_dict.keys():
old_names.append(name)
old.append(v)
continue
if version != now_dict[name]:
old.append(v)
continue
else:
continue
if __name__ == '__main__':
print(old_names)
print(old)
Here is a way to do it:
prev = { 'alpha:10.2': '145', 'teta:180': '198', 'eltira:140': '222', 'zeta:12.1' : '334'}
now = ['alpha:10.3','teta:180', 'eltira:142']
now_splited = [nk.split(":") for nk in now]
old = []
old_name = []
for k, v in prev.items():
if k not in new_key:
old_n, old_v = k.split(":")
name = any(map(lambda n : n[0] == old_n, now_splited))
version = any(map(lambda n : n[1] == old_v, now_splited))
if name and not version:
old.append(v)
elif not name:
old_name.append(old_n
old.append(old_v))
Results :
>>> print(old, old_name)
['145', '222', '334'] ['zeta']
You can make now a dict by maping operator.methodcaller to it and wrapping it in dict() then a comprehension will be easy using the same method on prev:
splitter = operator.methodcaller('split', ':')
now_dict, kv_map = dict(map(splitter, now)), map(splitter, prev)
old = [prev[':'.join((k,v))] for k, v in kv_map if k in now_dict]
old_names = [n for n, v in kv_map if n not in now_dict]
Results:
#old
['145', '198', '222']
#old_names
[]

Recursively parse/convert structured text to dictionary

Is there any good regex/function or packages that allows us to parse indented structured text/data into a dictionary? For example, I have data something like this (can have deeper levels than I mentioned below):
xyz1 : 14
xyz2 : 35
xyz3 : 14
xyz4
sub1_xyz4
sub1_sub1_xyz4 : 45
sub2_sub1_xyz4 : b1fawe
sub2 xyz4 : 455
xyz5 : 2424
And I want to convert it into a dictionary like:
{
'xyz1': '14',
'xyz2': '34',
'xyz3': '14',
'xyz4': {
'sub1_xyz4': {
'sub1_sub1_xyz4': '45',
'sub2_sub1_xyz4': 'b1fawe',
},
'sub2_xyz4': '455'
},
'xyz5': '2424'
}
I tried the following but not able to get it consistently. I feel like there is a very good recursive (so that it can handle unknown depths) function when trying to manage the indented/sub attributes. Any suggestions?
def parse_output(value, indent=0):
parsed_dict = dict()
if indent > 0:
for i in re.split('\n(?!\s{,%d})' % (indent - 1), value):
print("split value is: : ", i)
if '\n' not in i:
iter_val = iter(list(map(lambda x: x.strip(), re.split(' : ', i))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
else:
parse_bearer_info(re.split('\n', i, 1)[1])
iter_val = iter(list(map(lambda x: x.strip(), re.split('\n', i, 1))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
else:
for i in re.split('\n(?!\s+)', value):
#print("iteration value is: ", i)
if '\n' not in i:
iter_val = iter(list(map(lambda x: x.strip(), re.split(' : ', i))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
else:
#print(re.split('\n', i, 1))
#out = parse_bearer_info(re.split('\n', i, 1)[1], 4)
iter_val = iter(list(map(lambda x: x.strip(), re.split('\n', i, 1))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
return parsed_dict
You can use itertools.groupby with recursion:
import itertools, re, json
_data = [re.split('\s+:\s+', i) for i in filter(None, content.split('\n'))]
def group_data(d):
_d = [[a, list(b)] for a, b in itertools.groupby(d, key=lambda x:bool(x[-1]) and not x[0].startswith(' '))]
_new_result = {}
for a, b in _d:
if a:
_new_result.update(dict([[c, _d] for c, [_d] in b]))
else:
_new_result[b[0][0]] = group_data([[c[2:], _d] for c, _d in b[1:]])
return _new_result
print(json.dumps(group_data([[a, b] for a, *b in _data]), indent=4))
Output:
{
"xyz1": "14",
"xyz2": "35",
"xyz3": "14",
"xyz4": {
"sub1_xyz4": {
"sub1_sub1_xyz4": "45",
"sub2_sub1_xyz4": "b1fawe"
},
"sub2 xyz4": "455"
},
"xyz5": "2424"
}
Where content is:
xyz1 : 14
xyz2 : 35
xyz3 : 14
xyz4
sub1_xyz4
sub1_sub1_xyz4 : 45
sub2_sub1_xyz4 : b1fawe
sub2 xyz4 : 455
xyz5 : 2424
You could probably do this recursively, but since you only need to track a single indent level, you could just keep a stack with the current object. Add keys to the last item in the stack. When the value is empty, add a new dictionary and push it to the stack. When the indent decreases, pop from the stack.
Something like:
res = {}
stack = [res]
cur_indent = 0
for line in s.split('\n'):
indent = len(line) - len(line.lstrip())
if (indent < cur_indent): # backing out
stack.pop()
cur_indent = indent
else:
cur_indent = indent
vals = line.replace(" ", "").split(':')
current_dict = stack[-1]
if(len(vals) == 2):
current_dict[vals[0]] = vals[1]
else: # no value, must be a new level
current_dict[vals[0]] = {}
stack.append(current_dict[vals[0]])
Result:
{'xyz1': '14',
'xyz2': '35',
'xyz3': '14',
'xyz4': {'sub1_xyz4': {'sub1_sub1_xyz4': '45', 'sub2_sub1_xyz4': 'b1fawe'},
'sub2xyz4': '455'},
'xyz5': '2424'}

Matching element from value list in a dictionary to another element in the same list in python

I'm using the following: http://deron.meranda.us/data/nicknames.txt which has the nicknames for most of the names. I'm using it as follows:
def load_names():
with open('file.txt', 'r') as infile:
outdict = collections.defaultdict(list)
for i, line in enumerate(infile.readlines()):
tmp = line.strip().split('\t')
outdict[tmp[1]].append((tmp[0]))
return dict(outdict)
This returns a dictionary where the actual names as keys and all their nicknames as a list of values for that key.
Now, when I have a
namelist = ['KEN', 'KENNY', 'KENNETH', 'MITCH', 'MITCHELL', 'LIZ', 'LIZZIE', 'ELIZABETH']
then I want to output only the different names i.e., ['KENNETH', 'MITCHELL', 'ELIZABETH'], which I'm able to do by:
l = load_names()
temp =[]
for i in namelist:
v = l.get(i)
if v is not None:
l3 = [x for x in namelist if x not in v]
temp.append(l3)
print(temp)
s = set.intersection(*map(set,temp))
s = list(s)
print(s)
However, I also want it to deal the case like ['KEN', 'KENNY', 'MITCH', 'MITCHELL', 'LIZ', 'LIZZIE', 'ELIZABETH'], this should output ['KEN', 'MITCHELL', 'ELIZABETH'] i.e., the two nicknames 'KEN' and 'KENNY' belong to the same key so, I want to consider them as one and have only one of them in the final list. Also, if I have a namelist as ['KENNETH, 'ZHAO', 'MITCH', 'MITCHELL']then it should output: ['KENNETH', 'ZHAO', 'MITCHELL'] i.e., for such names that do not occur in the dictionary at all (in either key or value), then output list should still have that name.
How do I get that?
UPATE:
outdict = {'KENNETH': ['KEN', 'KENNY'], 'MITCHELL': ['MITCH'], 'ELIZABETH' : ['LIZ', 'LIZZIE'], .....}
namelist1 = ['KEN', 'KENNY', 'KENNETH', 'MITCH', 'MITCHELL', 'LIZ', 'LIZZIE', 'ELIZABETH']
output1 = `['KENNETH', 'MITCHELL', 'ELIZABETH']`
I'm getting the above output from the code I have put.
However, I want to be able to get the following outputs also when the namelists are as follows:
namelist2 = ['KEN', 'KENNY', 'MITCH', 'MITCHELL', 'LIZ', 'LIZZIE', 'ELIZABETH']`
output2 = `['KEN', 'MITCHELL', 'ELIZABETH']`
namelist3 = ['KENNETH, 'ZHAO', 'MITCH', 'MITCHELL']`
output3 = `['KENNETH', 'ZHAO', 'MITCHELL']`
Got the answer myself:
l = load_names()
temp =[]
e = {}
for n in namelist:
if n in l.keys():
temp.append(n)
for ix in namelist:
for key, i in l.items():
if key in namelist:
continue
else:
b=0
if ix in i:
b=1
f=0
for x in temp:
if x in i:
f=1
break
if f == 0:
temp.append(ix)
break
b=0
for k2, loc in l.items():
if ix in loc:
b=1
break
elif ix == k2:
b=1
break
if b == 0:
temp.append(ix)
break
print(temp)
try this,
def getOddout(st):
out = []
for s in st:
if not any([s in r for r in st if s != r]):
out.append(s)
return out
namelist = ['KEN', 'KENNY', 'KENNETH', 'MITCH', 'MITCHELL', 'LIZ', 'LIZZIE', 'ELIZABETH']
print getOddout(namelist)
if its not put your dict object
Adding the lists to a dict and then returning the keys works, but there has to be a better way.
l = load_names()
e = {}
for k, v in l.items():
for n in namelist:
if n in v and k not in e:
e[k] = v
print('dict', e.keys())
Outputs
['ELIZABETH', 'MITCHELL', 'KENNETH']

Replacing names of a file

I have the file and I want to replace names according to the pair of objects in the list and elements inside:
def replace_variables(i, distance = 0, T1 ='0', T2 = '0', Phi = '0' ):
i_[0][0] = '-2'
i_[1][0] = '2'
input_name = input_name.replace('distance',distance).replace('one',T1).replace('two',T2).replace('Phi',Phi)
print input_name
input_name = 'file_distance_one_two_Phi.txt'
a = [['distance','+2','-2'], ['T1','+2','-2'], ['T2','+2','-2'], ['Phi','+2','-2']]
new_list = list(itertools.permutations(a, 2))
for i in new_list:
replace_variables(i, x, y)
But I'm getting back:
file_distance_T1_0_0.txt, file_-2_T2_0_0.txt, file_distance_-2_0_0_0.txt, and so on
I want to get:
file_-2,-2_0_0.txt, file_-2_0_-1_0.txt, file_2_-2_0_0.txt and so forth

Categories