JS to python lambda/list comp conversion - python

Is it possible to convert this js code in python using the same map reduce?
var fs = require('fs')
var output = fs.readFileSync('data.txt', 'utf8')
.trim()
.split('\n')
.map(line => line.split('\t'))
.reduce((orders, line) => {
orders[line[0]] = orders[line[0]] || []
orders[line[0]].push({
name: line[1],
price: line[2],
quantity: line[3]
})
return orders
}, {})
console.log(output)
So far I have only until the map part of the code:
txt = open('data.txt').read()
mylist = map(lambda x: x.split('\t'), txt.strip().split('\n'))
Not sure if this is possible to do it in lambda/list comp. Any ways will do. Thanks guys!
---- UPDATE 1 ----
Thanks #Univerio for the answer
Additional learning. Do you guys have any idea whats the problem with this being thrown as NoneType for {}
with open("data.txt") as txt:
output = reduce(lambda x,y : x.setdefault(y[0], []).append({"name": y[1], "price": y[2], "quantity": y[3]}).items(),\
map(lambda x: x.split('\t'), txt.read().strip().split('\n')),\
{})
print output
---- UPDATE 2 ----
Well it is ugly. But got it working with the same map reduce.
def update_orders(orders, line):
orders.setdefault(line[0], []).append({"name": line[1], "price": line[2], "quantity": line[3]})
#orders[line[0]] = orders.get(line[0], []) + [{"name": line[1], "price": line[2], "quantity": line[3]}]
return orders
with open("data.txt") as txt:
output = reduce(lambda x,y : update_orders(x, y),\
map(lambda x: x.split('\t'), txt.read().strip().split('\n')),\
{})
print output

It's more idiomatic in Python (IMO, anyway) to use a loop instead of reduce. You can also take advantage of streaming file access this way:
with open("data.txt") as f:
output = {}
for line in f:
key, name, price, quantity = line.strip().split("\t")
output.setdefault(key, []).append({"name": name, "price": price, "quantity": quantity})
This doesn't do exactly what the JS version does with respect to whitespace handling, but it should be fine for sane inputs.

Related

in python, read csv data and. convert unique values into python's dictionary

How do I convert below comma delimited records -
COL1,COL2,COL3,COL4
A101,P501,U901,US_A
A101,P501,U902,US_B
A101,P502,U901,US_A
A102,P501,U901,US_A
A102,P502,U902,US_B
into python dictionary -
result = {
"A101": {
"P501": {"U901": "US_A", "U902": "US_B"},
"P502": {"U901": "US_A"}
},
"A102": {
"P501": {"U901": "US_A"},
"P502": {"U902": "US_B"}
}
}
Thank you for your help!
Approach
We can process the rows of the CSV file as follows:
Convert each row in CSV file from a list to a nested dictionary using Convert a list to nested dictionary i.e. line reduce(lambda x, y: {y: x}, reversed(row))) in code below.
Merge the nested dictionaries using
Merge nested dictionaries in Python using merge_dict function below
Code
import csv
def csv_to_nested_dict(filenm):
' CSV file to nested dictionary '
with open(filenm, 'r') as csvfile:
csv_reader = csv.reader(csvfile, delimiter=',')
next(csv_reader) # skip header row
result = {}
for row in csv_reader:
# Convert row to nested dictionary and
# Merge into result
result = merge_dict(result,
reduce(lambda x, y: {y: x}, reversed(row))) # row to nested dictionary
return result
def merge_dict(dict1, dict2):
' Merges nested dictionaries '
for key, val in dict1.items():
if type(val) == dict:
if key in dict2 and type(dict2[key] == dict):
merge_dict(dict1[key], dict2[key])
else:
if key in dict2:
dict1[key] = dict2[key]
for key, val in dict2.items():
if not key in dict1:
dict1[key] = val
return dict1
Test
Usage:
res = csv_to_nested_dict('test.txt') # result
# Use json to pretty print nested dictionary res
import json
print(json.dumps(res, indent = 4))
Input File test.txt
COL1,COL2,COL3,COL4
A101,P501,U901,US_A
A101,P501,U902,US_B
A101,P502,U901,US_A
A102,P501,U901,US_A
A102,P502,U902,US_B
Output
{
"A101": {
"P501": {
"U901": "US_A",
"U902": "US_B"
},
"P502": {
"U901": "US_A"
}
},
"A102": {
"P501": {
"U901": "US_A"
},
"P502": {
"U902": "US_B"
}
}
}
here is simple version of solution -
def dict_reader(file_name):
with open(file_name, 'r') as csvfile:
reader = csv.DictReader(csvfile)
try:
data = dict()
for row in reader:
col1, col2, col3, col4 = (row["col1"], row["col2"], row["col3"], row["col4"])
if col1 in data:
if col2 in data[col1]:
data[col1][col2].update({col3: col4})
else:
data[col1][col2] = {col3: col4}
else:
data[col1] = {col2: {col3: col4}}
except csv.Error as e:
sys.exit('file {}, line {}: {}'.format(file_name, reader.line_num, e))
finally:
return data
it is not very elegant solution but works.

Question with extracting data from Json using Python

I am building a bot game for my friends in LINE. I'm a beginning coder. I'm trying to call an object in json which includes a string + integer. I've looked around but nothing seems to fit what I need. What would be the best/simple solution?
My code is amateur, please go easy on me. :P
I'm trying to have Python extract through Json, "Name" + "Stat".
Right now it only extracts "Name" and randomly selects an item. Is there any way to select the item + the stat, display the item and calculate the stat? Thanks.
Python 3:
if text == 'FIGHT':
with open('items.json', 'r') as f:
data = json.load(f)
armor1 = [v for d in data['armor'] for k,v in d.items() if k == 'name']
weapon1 = [v for d in data['weapon'] for k,v in d.items() if k == 'name']
magic1 = [v for d in data['magic'] for k,v in d.items() if k == 'name']
armor2 = random.choice(armor1)
weapon2 = random.choice(weapon1)
magic2 = random.choice(magic1)
calc = add(int(armor2), int(weapon2), int(magic2))
line_bot_api.reply_message(
event.reply_token,
TextSendMessage('Armor = ' + (armor2)),
TextSendMessage('Weapon = ' + (weapon2)),
TextSendMessage('Magic = ' + (magic2)),
TextSendMessage('You have a score of ' + str(calc) + '.'),
TextSendMessage('Waiting for next opponent...')
)
Json:
"armor": [
{
"name":"Cardboard armor 10 DEF" ,
"stats":"10" },
{
"name":"Plastic armor 20 DEF" ,
"stats":"20" },
{
"name":"Rubber armor 30 DEF" ,
"stats":"30" },
{
"name":"Metal armor 40 DEF" ,
"stats":"40" },
{
"name":"Indestructable armor 50 DEF" ,
"stats":"50" }
],
After trying just about everything.. The solution was:
if text == 'FIGHT':
with open('items.json', 'r') as f:
data = json.load(f)
armor2 = random.choice(data['armor'])
weapon2 = random.choice(data['weapon'])
magic2 = random.choice(data['magic'])
calc = add(armor2['stats'], weapon2['stats'], magic2['stats'])
line_bot_api.reply_message(
event.reply_token, [
TextSendMessage('Armor = ' + (armor2['name'])),
TextSendMessage('Weapon = ' + (weapon2['name'])),
TextSendMessage('Magic = ' + (magic2['name'])),
TextSendMessage('Total = ' + str(calc) + '.')
]
)
Thanks to everyone and special thanks to my friend Sae who helped me. :)

Split dictionary key and list of values from dict

I want to split keys and values and display the dictionary result below mentioned format. I'm reading a file and splitting the data into list and later moving to dictionary.
Please help me to get the result.
INPUT FILE - commands.txt
login url=http://demo.url.net username=test#url.net password=mytester
create-folder foldername=demo
select-folder foldername=test123
logout
Expected result format
print result_dict
"0": {
"login": [
{
"url": "http://demo.url.net",
"username": "test#url.net",
"password": "mytester"
}
]
},
"1": {
"create-folder": {
"foldername": "demo"
}
},
"2": {
"select-folder": {
"foldername": "test-folder"
}
},
"3": {
"logout": {}
}
CODE
file=os.path.abspath('catalog/commands.txt')
list_output=[f.rstrip().split() for f in open(file).readlines()]
print list_output
counter=0
for data in list_output:
csvdata[counter]=data[0:]
counter=counter+1
print csvdata
for key,val in csvdata.iteritems():
for item in val:
if '=' in item:
key,value=item.split("=")
result[key]=value
print result
As a function:
from collections import defaultdict
from itertools import count
def read_file(file_path):
result = defaultdict(dict)
item = count()
with open(file_path) as f:
for line in f:
if not line:
continue
parts = line.split()
result[next(item)][parts[0]] = dict(p.split('=') for p in parts[1:])
return dict(result)
Better example and explanation:
s = """
login url=http://demo.url.net username=test#url.net password=mytester
create-folder foldername=demo
select-folder foldername=test123
logout
"""
from collections import defaultdict
from itertools import count
result_dict = defaultdict(dict)
item = count()
# pretend you opened the file and are reading it line by line
for line in s.splitlines():
if not line:
continue # skip empty lines
parts = line.split()
result_dict[next(item)][parts[0]] = dict(p.split('=') for p in parts[1:])
With pretty print:
>>> pprint(dict(result_dict))
{0: {'login': {'password': 'mytester',
'url': 'http://demo.url.net',
'username': 'test#url.net'}},
1: {'create-folder': {'foldername': 'demo'}},
2: {'select-folder': {'foldername': 'test123'}},
3: {'logout': {}}}
lines = ["login url=http://demo.url.net username=test#url.net password=mytester",
"create-folder foldername=demo",
"select-folder foldername=test123",
"logout"]
result = {}
for no, line in enumerate(lines):
values = line.split()
pairs = [v.split('=') for v in values[1:]]
result[str(no)] = {values[0]: [dict(pairs)] if len(pairs) > 1 else dict(pairs)}
import pprint
pprint.pprint(result)
Output:
{'0': {'login': [{'password': 'mytester',
'url': 'http://demo.url.net',
'username': 'test#url.net'}]},
'1': {'create-folder': {'foldername': 'demo'}},
'2': {'select-folder': {'foldername': 'test123'}},
'3': {'logout': {}}}
But are you sure you need the extra list inside the login value? If not, just change [dict(pairs)] if len(pairs) > 1 else dict(pairs) to dict(pairs).
r = dict()
f = open('commands.txt')
for i, line in enumerate(f.readlines()):
r[str(i)] = dict()
actions = line.split()
list_actions = {}
for action in actions[1:]:
if "=" in action:
k, v = action.split('=')
list_actions[k] = v
if len(actions[1:]) > 1:
r[str(i)][actions[0]] = [list_actions]
else:
r[str(i)][actions[0]] = list_actions
print r
Should be work

re reading a csv file in python without loading it again

I made the following code which works but I want to improve it. I don't want to re-read the file, but if I delete sales_input.seek(0) it won't iterate throw each row in sales. How can i improve this?
def computeCritics(mode, cleaned_sales_input = "data/cleaned_sales.csv"):
if mode == 1:
print "creating customer.critics.recommendations"
critics_output = open("data/customer/customer.critics.recommendations",
"wb")
ID = getCustomerSet(cleaned_sales_input)
sales_dict = pickle.load(open("data/customer/books.dict.recommendations",
"r"))
else:
print "creating books.critics.recommendations"
critics_output = open("data/books/books.critics.recommendations",
"wb")
ID = getBookSet(cleaned_sales_input)
sales_dict = pickle.load(open("data/books/users.dict.recommendations",
"r"))
critics = {}
# make critics dict and pickle it
for i in ID:
with open(cleaned_sales_input, 'rb') as sales_input:
sales = csv.reader(sales_input) # read new
for j in sales:
if mode == 1:
if int(i) == int(j[2]):
sales_dict[int(j[6])] = 1
else:
if int(i) == int(j[6]):
sales_dict[int(j[2])] = 1
critics[int(i)] = sales_dict
pickle.dump(critics, critics_output)
print "done"
cleaned_sales_input looks like
6042772,2723,3546414,9782072488887,1,9.99,314968
6042769,2723,3546414,9782072488887,1,9.99,314968
...
where number 6 is the book ID and number 0 is the customer ID
I want to get a dict wich looks like
critics = {
CustomerID1: {
BookID1: 1,
BookID2: 0,
........
BookIDX: 0
},
CustomerID2: {
BookID1: 0,
BookID2: 1,
...
}
}
or
critics = {
BookID1: {
CustomerID1: 1,
CustomerID2: 0,
........
CustomerIDX: 0
},
BookID1: {
CustomerID1: 0,
CustomerID2: 1,
...
CustomerIDX: 0
}
}
I hope this isn't to much information
Here are some suggestions:
Let's first look at this code pattern:
for i in ID:
for j in sales:
if int(i) == int(j[2])
notice that i is only being compared with j[2]. That's its only purpose in the loop. int(i) == int(j[2]) can only be True at most once for each i.
So, we can completely remove the for i in ID loop by rewriting it as
for j in sales:
key = j[2]
if key in ID:
Based on the function names getCustomerSet and getBookSet, it sounds as if
ID is a set (as opposed to a list or tuple). We want ID to be a set since
testing membership in a set is O(1) (as opposed to O(n) for a list or tuple).
Next, consider this line:
critics[int(i)] = sales_dict
There is a potential pitfall here. This line is assigning sales_dict to
critics[int(i)] for each i in ID. Each key int(i) is being mapped to the very same dict. As we loop through sales and ID, we are modifying sales_dict like this, for example:
sales_dict[int(j[6])] = 1
But this will cause all values in critics to be modified simultaneously, since all keys in critics point to the same dict, sales_dict. I doubt that is what you want.
To avoid this pitfall, we need to make copies of the sales_dict:
critics = {i:sales_dict.copy() for i in ID}
def computeCritics(mode, cleaned_sales_input="data/cleaned_sales.csv"):
if mode == 1:
filename = 'customer.critics.recommendations'
path = os.path.join("data/customer", filename)
ID = getCustomerSet(cleaned_sales_input)
sales_dict = pickle.load(
open("data/customer/books.dict.recommendations", "r"))
key_idx, other_idx = 2, 6
else:
filename = 'books.critics.recommendations'
path = os.path.join("data/books", filename)
ID = getBookSet(cleaned_sales_input)
sales_dict = pickle.load(
open("data/books/users.dict.recommendations", "r"))
key_idx, other_idx = 6, 2
print "creating {}".format(filename)
ID = {int(item) for item in ID}
critics = {i:sales_dict.copy() for i in ID}
with open(path, "wb") as critics_output:
# make critics dict and pickle it
with open(cleaned_sales_input, 'rb') as sales_input:
sales = csv.reader(sales_input) # read new
for j in sales:
key = int(j[key_idx])
if key in ID:
other_key = int(j[other_idx])
critics[key][other_key] = 1
critics[key] = sales_dict
pickle.dump(dict(critics), critics_output)
print "done"
#unutbu's answer is better but if you are stuck with this structure you can put the whole file in memory:
sales = []
with open(cleaned_sales_input, 'rb') as sales_input:
sales_reader = csv.reader(sales_input)
[sales.append(line) for line in sales_reader]
for i in ID:
for j in sales:
#do stuff

Python - Convert JSON key/values into key/value where value is an array

I have a JSON file with numerous entries like this:
{
"area1": "California",
"area2": "Sierra Eastside",
"area3": "Bishop Area",
"area4": "Volcanic Tablelands (Happy/Sad Boulders)",
"area5": "Fish Slough Boulders",
"grade": "V6 ",
"route": "The Orgasm",
"type1": "Boulder",
"type2": "NONE",
"type3": "NONE",
"type4": "NONE",
},
I want to take the area and type entries and turn them into arrays:
{
"area": ["California","Sierra Eastside","Bishop Area","Volcanic Tablelands (Happy/Sad Boulders)","Fish Slough Boulders"]
"grade": "V6 ",
"route": "The Orgasm",
"type": ["Boulder","NONE","NONE","NONE"]
},
I have this code which almost works:
json_data=open('../json/routes_test.json')
datas = json.load(json_data)
datas_arrays = []
area_keys = ['area1','area2','area3','area4','area5']
type_keys = ['type1','type2','type3','type4']
for data in datas:
areaArray = []
typeArray = []
deleteArray = []
for k, v in data.iteritems():
for area_key in area_keys:
if (k == area_key):
areaArray.append(v)
deleteArray.append(k)
for type_key in type_keys:
if (k == type_key):
typeArray.append(v)
deleteArray.append(k)
for k in deleteArray:
del data[k]
data['area'] = areaArray
data['type'] = typeArray
datas_arrays.append(data)
print datas_arrays
print "********"
out = json.dumps(datas_arrays, sort_keys=True,indent=4, separators=(',', ': '))
print out
f_out= open('../json/toues_test_intoarrays.json', 'w')
f_out.write(out)
f_out.close()
The problem is that the area array is all out of order and the type array is backwards, which I can't have. I find it strange that one is unordered and one is ordered but backwards. To me it seems like the iteration should assure they're placed in order.
Python dictionaries have an arbitrary ordering, they are not sorted. You want to use your prebuilt lists of keys instead:
with open('../json/routes_test.json') as json_data:
datas = json.load(json_data)
area_keys = ['area1','area2','area3','area4','area5']
type_keys = ['type1','type2','type3','type4']
for data in datas:
data['area'] = [data[k] for k in area_keys]
data['type'] = [data[k] for k in type_keys]
for k in area_keys + type_keys:
del data[k]
out = json.dumps(datas, sort_keys=True, indent=4, separators=(',', ': '))
print out
with open('../json/toues_test_intoarrays.json', 'w') as f_out:
f_out.write(out)
which changes the dictionaries in-place.
You could even determine the area and type keys from each entry:
for data in datas:
keys = sorted(data.keys())
area_keys = [k for k in keys if k.startswith('area')]
data['area'] = [data[k] for k in area_keys]
type_keys = [k for k in keys if k.startswith('type')]
data['type'] = [data[k] for k in type_keys]
for k in area_keys + type_keys:
del data[k]
and omit the list literals with the 'area1', 'area2' etc. hardcoded lists altogether.
Iterate the keys in order.
for k, v in sorted(data.iteritems()):
This will fail once you get past 9, but it will do for now.

Categories