Split in different columns the content of a cell - python

I have a column with 1,5k lines and each line has this structure:
" [{'id': 4099, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'}] "
I want to get to a result in which the keys of the inner dictionary become new columns and the values are copied underneath. For example:
id name ......
4099 xxxxxxxx + 30 filter ......
I've tried:
import ast
# Acess only the first line and try to split each into columns
li_column = my_df.loc[0,'line_items']
li_column = ast.literal_eval(li_column)
Then I was able to get into a list with 1 item that has a dictionary inside, now I'm stuck.

I can't convert your string with ast but you could use .apply() to do it for all rows
df['line_items'] = df['line_items'].apply(dirtyjson.loads)
Later you could get dictionary from list
df['line_items'] = df['line_items'].str[0]
And later you can use again .apply() with pd.Series to create new DataFrame from dictionaries.
new_df = df['line_items'].apply(pd.Series)
As I said ast doesn't work for me with your strings. Standard json has also problem to convert it. But module dirtyjson convert it correctly.
import pandas as pd
df = pd.DataFrame({
'line_items': [
" [{'id': 4099, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'}] ",
" [{'id': 4199, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'}] ",
]
})
#import ast
#df['line_items'] = df['line_items'].apply(ast.literal_eval)
import dirtyjson
df['line_items'] = df['line_items'].apply(dirtyjson.loads)
df['line_items'] = df['line_items'].str[0]
new_df = df['line_items'].apply(pd.Series)
print(new_df)
Result:
id name ... price parent_name
0 4099 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
1 4199 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
[2 rows x 15 columns]
EDIT:
If you need to add it to existing dataframe
df = df.join(new_df)
# remove old column
del df['line_items']
print(df)
EDIT:
If every list may have more dictionaries then you can use explode() instead of str[0] - and it will put every dictionary in separated row.
df = df.explode('line_items').reset_index(drop=True) # extract from list
import pandas as pd
df = pd.DataFrame({
#'A': ['123', '456', '789'],
'line_items': [
""" [{'id': 4099, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'},
{'id': 5099, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'}] """,
""" [{'id': 4100, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'},
{'id': 5100, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'}] """,
""" [{'id': 4101, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'},
{'id': 5101, 'name': 'xxxxxxxx + 30 filter', 'product_id': 6546, 'variation_id': 3352, 'quantity': 1, 'tax_class': '', 'subtotal': '110.89', 'subtotal_tax': '0.00', 'total': '29.90', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 39083, 'key': 'pa_size', 'value': 'l', 'display_key': 'Size', 'display_value': 'L'}, {'id': 39094, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': 'FS00055.L', 'price': 29.9, 'parent_name': 'xxxxxxxx + 30 filter'}] """,
]
})
import dirtyjson
df['line_items'] = df['line_items'].apply(dirtyjson.loads)
#df['line_items'] = df['line_items'].str[0] # extract first item from list
df = df.explode('line_items').reset_index(drop=True) # extract all items from list
new_df = df['line_items'].apply(pd.Series)
print(new_df)
df = df.join(new_df)
del df['line_items']
print(df)
id name ... price parent_name
0 4099 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
1 5099 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
2 4100 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
3 5100 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
4 4101 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
5 5101 xxxxxxxx + 30 filter ... 29.9 xxxxxxxx + 30 filter
[6 rows x 15 columns]

Related

Remove duplicates from list of lists by column value

I have a list of list that look like this, they have been sorted so that duplicate IDs are arranged with the one I want to keep at the top..
[
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
What is the simplest way to remove rows that have duplicate IDs but always keep the first one? In this instance the end result would look like this...
[
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
I know I can remove duplicates from lists by converting to set like set(my_list) but in this instance it is duplicates by ID that I want to remove by
Since you already hav the list sorted properly, a simple way to do this is to use itertools.groupby to grab the first element of each group in a list comprehension:
from itertools import groupby
l= [
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
[next(g) for k, g in groupby(l, key=lambda d: d['id'])]
# [{'id': '23', 'type': 'car', 'price': '445'},
# {'id': '125', 'type': 'truck', 'price': '998'},
# {'id': '87', 'type': 'bike', 'price': '50'}]
I would probably convert to Pandas DataFrame and then use drop_duplicates
import pandas as pd
data = [
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
df = pd.DataFrame(data)
df.drop_duplicates(subset=['id'], inplace=True)
print(df.to_dict('records'))
# Output
# [{'id': '23', 'type': 'car', 'price': '445'},
# {'id': '125', 'type': 'truck', 'price': '998'},
# {'id': '87', 'type': 'bike', 'price': '50'}]
Here's an answer that involves no external modules or unnecessary manipulation of the data:
data = [
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
seen = set()
result = [row for row in data if row['id'] not in seen and not seen.add(row['id'])]
print(result)
Result:
[{'id': '23', 'type': 'car', 'price': '445'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '87', 'type': 'bike', 'price': '50'}]
Note that the not seen.add(row['id'])] part of the list comprehension will always be True. It's just a way of noting that a unique entry has been seen by adding it to the seen set.
Let's take the name of the given list as data.
unique_ids = []
result = []
for item in data:
if item["id"] not in unique_ids:
result.append(item)
unique_ids.append(item["id"])
print(result)
The result will be,
[{'id': '23', 'type': 'car', 'price': '445'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '87', 'type': 'bike', 'price': '50'}]

TypeError only occurring in while loop

I am getting a TypeError in this code
import requests
import json
orders_collected = 1
all_orders_collected = False
all_orders = requests.get(
'API location').json()
while all_orders_collected == False:
if all_orders[orders_collected - 1]['status'] == "processing":
orders_collected += 1
all_orders.append(requests.get('API location next page').json())
else:
all_orders_collected = True
for order in all_orders:
print(order)
output:
if all_orders[orders_collected - 1]['status'] == "processing":
TypeError: list indices must be integers or slices, not str
However this line will only give this error when its within the while loop. it will work perfectly fine when outside the while loop and I am stumped as to why the while loop will be changing that condition. the output of the API is dictionary inside a list.
it would be much appreciated if someone could give an explanation as to why that's happening.
An example of the data is:
[{'id': 5339, 'parent_id': 0, 'status': 'processing', 'currency': 'ZAR', 'version': '5.0.0', 'prices_include_tax': False, 'date_created': '2021-03-04T18:06:28', 'date_modified': '2021-03-04T16:09:20', 'discount_total': '0.00', 'discount_tax': '0.00', 'shipping_total': '3.00', 'shipping_tax': '0.00', 'cart_tax': '0.00', 'total': '228.00', 'total_tax': '0.00', 'customer_id': 1, 'order_key': 'wc_order_g6StHaZMVIuoV', 'billing': {'first_name': 'redacted', 'last_name': 'redacted', 'company': '', 'address_1': 'redacted', 'address_2': '', 'city': 'redacted', 'state': 'redacted', 'postcode': '', 'country': 'redacted', 'email': 'redacted', 'phone': 'redacted'}, 'shipping': {'first_name': 'redacted', 'last_name': 'redacted', 'company': '', 'address_1': 'redacted', 'address_2': '', 'city': redacted', 'state': 'redacted', 'postcode': '', 'country': 'redacted'}, 'payment_method': 'cod', 'payment_method_title': 'Pay With EFT', 'transaction_id': '', 'customer_ip_address': 'redacted', 'customer_user_agent': 'redacted', 'created_via': 'checkout', 'customer_note': '', 'date_completed': '2021-02-20T06:28:43', 'date_paid': '2021-02-20T06:28:43', 'cart_hash': 'redacted', 'number': '5339', 'meta_data': [{'id': 44882, 'key': 'is_vat_exempt', 'value': 'no'}, {'id': 44887, 'key': '_new_order_email_sent', 'value': 'true'}], 'line_items': [{'id': 3159, 'name': 'Salads', 'product_id': 4278, 'variation_id': 4280, 'quantity': 1, 'tax_class': '', 'subtotal': '45.00', 'subtotal_tax': '0.00', 'total': '45.00', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 45521, 'key': 'bases', 'value': 'Quinoa', 'display_key': 'Bases', 'display_value': 'Quinoa'}, {'id': 45522, 'key': 'proteins', 'value': 'Steak Strips', 'display_key': 'Proteins', 'display_value': 'Steak Strips'}, {'id': 45523, 'key': 'first-component', 'value': 'Mixed Peppers', 'display_key': 'First Component', 'display_value': 'Mixed Peppers'}, {'id': 45524, 'key': 'second-component', 'value': 'Organic Sprouts', 'display_key': 'Second Component', 'display_value': 'Organic Sprouts'}, {'id': 45525, 'key': 'third-component', 'value': 'Carrots', 'display_key': 'Third Component', 'display_value': 'Carrots'}, {'id': 45526, 'key': 'fourth-component', 'value': 'Pumpkin Seeds', 'display_key': 'Fourth Component', 'display_value': 'Pumpkin Seeds'}, {'id': 45527, 'key': 'fifth-component', 'value': 'Feta', 'display_key': 'Fifth Component', 'display_value': 'Feta'}, {'id': 45528, 'key': 'sixth-component', 'value': 'Roast Butternut', 'display_key': 'Sixth Component', 'display_value': 'Roast Butternut'}, {'id': 45529, 'key': 'size', 'value': 'Medium', 'display_key': 'Size', 'display_value': 'Medium'}, {'id': 45607, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': '', 'price': 45, 'parent_name': 'Salads - SOLD OUT'}, {'id': 3160, 'name': 'Salads', 'product_id': 4278, 'variation_id': 4280, 'quantity': 1, 'tax_class': '', 'subtotal': '45.00', 'subtotal_tax': '0.00', 'total': '45.00', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 45539, 'key': 'bases', 'value': 'Baby Spinach', 'display_key': 'Bases', 'display_value': 'Baby Spinach'}, {'id': 45540, 'key': 'proteins', 'value': 'Grilled Chicken', 'display_key': 'Proteins', 'display_value': 'Grilled Chicken'}, {'id': 45541, 'key': 'first-component', 'value': 'Feta', 'display_key': 'First Component', 'display_value': 'Feta'}, {'id': 45542, 'key': 'second-component', 'value': 'Carrots', 'display_key': 'Second Component', 'display_value': 'Carrots'}, {'id': 45543, 'key': 'third-component', 'value': 'Pumpkin Seeds', 'display_key': 'Third Component', 'display_value': 'Pumpkin Seeds'}, {'id': 45544, 'key': 'fourth-component', 'value': 'Chickpea and Red beans', 'display_key': 'Fourth Component', 'display_value': 'Chickpea and Red beans'}, {'id': 45545, 'key': 'fifth-component', 'value': 'Roast Butternut', 'display_key': 'Fifth Component', 'display_value': 'Roast Butternut'}, {'id': 45546, 'key': 'sixth-component', 'value': 'Organic Sprouts', 'display_key': 'Sixth Component', 'display_value': 'Organic Sprouts'}, {'id': 45547, 'key': 'size', 'value': 'Medium', 'display_key': 'Size', 'display_value': 'Medium'}, {'id': 45608, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': '', 'price': 45, 'parent_name': 'Salads - SOLD OUT'}, {'id': 3161, 'name': 'Salads', 'product_id': 4278, 'variation_id': 4280, 'quantity': 1, 'tax_class': '', 'subtotal': '45.00', 'subtotal_tax': '0.00', 'total': '45.00', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 45557, 'key': 'bases', 'value': 'Quinoa', 'display_key': 'Bases', 'display_value': 'Quinoa'}, {'id': 45558, 'key': 'proteins', 'value': 'Chickpea and red beans', 'display_key': 'Proteins', 'display_value': 'Chickpea and red beans'}, {'id': 45559, 'key': 'first-component', 'value': 'Sugar Snap Peas', 'display_key': 'First Component', 'display_value': 'Sugar Snap Peas'}, {'id': 45560, 'key': 'second-component', 'value': 'Red Onion', 'display_key': 'Second Component', 'display_value': 'Red Onion'}, {'id': 45561, 'key': 'third-component', 'value': 'Carrots', 'display_key': 'Third Component', 'display_value': 'Carrots'}, {'id': 45562, 'key': 'fourth-component', 'value': 'Pumpkin Seeds', 'display_key': 'Fourth Component', 'display_value': 'Pumpkin Seeds'}, {'id': 45563, 'key': 'fifth-component', 'value': 'Roast Butternut', 'display_key': 'Fifth Component', 'display_value': 'Roast Butternut'}, {'id': 45564, 'key': 'sixth-component', 'value': 'Feta', 'display_key': 'Sixth Component', 'display_value': 'Feta'}, {'id': 45565, 'key': 'size', 'value': 'Medium', 'display_key': 'Size', 'display_value': 'Medium'}, {'id': 45609, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': '', 'price': 45, 'parent_name': 'Salads - SOLD OUT'}, {'id': 3162, 'name': 'Salads', 'product_id': 4278, 'variation_id': 4280, 'quantity': 1, 'tax_class': '', 'subtotal': '45.00', 'subtotal_tax': '0.00', 'total': '45.00', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 45575, 'key': 'bases', 'value': 'Baby Spinach', 'display_key': 'Bases', 'display_value': 'Baby Spinach'}, {'id': 45576, 'key': 'proteins', 'value': 'Steak Strips', 'display_key': 'Proteins', 'display_value': 'Steak Strips'}, {'id': 45577, 'key': 'first-component', 'value': 'Roast Butternut', 'display_key': 'First Component', 'display_value': 'Roast Butternut'}, {'id': 45578, 'key': 'second-component', 'value': 'Feta', 'display_key': 'Second Component', 'display_value': 'Feta'}, {'id': 45579, 'key': 'third-component', 'value': 'Carrots', 'display_key': 'Third Component', 'display_value': 'Carrots'}, {'id': 45580, 'key': 'fourth-component', 'value': 'Pumpkin Seeds', 'display_key': 'Fourth Component', 'display_value': 'Pumpkin Seeds'}, {'id': 45581, 'key': 'fifth-component', 'value': 'Chickpea and Red beans', 'display_key': 'Fifth Component', 'display_value': 'Chickpea and Red beans'}, {'id': 45582, 'key': 'sixth-component', 'value': 'Organic Sprouts', 'display_key': 'Sixth Component', 'display_value': 'Organic Sprouts'}, {'id': 45583, 'key': 'size', 'value': 'Medium', 'display_key': 'Size', 'display_value': 'Medium'}, {'id': 45610, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': '', 'price': 45, 'parent_name': 'Salads - SOLD OUT'}, {'id': 3163, 'name': 'Salads', 'product_id': 4278, 'variation_id': 4280, 'quantity': 1, 'tax_class': '', 'subtotal': '45.00', 'subtotal_tax': '0.00', 'total': '45.00', 'total_tax': '0.00', 'taxes': [], 'meta_data': [{'id': 45593, 'key': 'bases', 'value': 'Wholewheat protein pasta', 'display_key': 'Bases', 'display_value': 'Wholewheat protein pasta'}, {'id': 45594, 'key': 'proteins', 'value': 'Bacon', 'display_key': 'Proteins', 'display_value': 'Bacon'}, {'id': 45595, 'key': 'first-component', 'value': 'Mini Broccoli', 'display_key': 'First Component', 'display_value': 'Mini Broccoli'}, {'id': 45596, 'key': 'second-component', 'value': 'Baby Cob', 'display_key': 'Second Component', 'display_value': 'Baby Cob'}, {'id': 45597, 'key': 'third-component', 'value': 'Carrots', 'display_key': 'Third Component', 'display_value': 'Carrots'}, {'id': 45598, 'key': 'fourth-component', 'value': 'Pumpkin Seeds', 'display_key': 'Fourth Component', 'display_value': 'Pumpkin Seeds'}, {'id': 45599, 'key': 'fifth-component', 'value': 'Feta', 'display_key': 'Fifth Component', 'display_value': 'Feta'}, {'id': 45600, 'key': 'sixth-component', 'value': 'Roast Butternut', 'display_key': 'Sixth Component', 'display_value': 'Roast Butternut'}, {'id': 45601, 'key': 'size', 'value': 'Medium', 'display_key': 'Size', 'display_value': 'Medium'}, {'id': 45611, 'key': '_reduced_stock', 'value': '1', 'display_key': '_reduced_stock', 'display_value': '1'}], 'sku': '', 'price': 45, 'parent_name': 'Salads - SOLD OUT'}], 'tax_lines': [], 'shipping_lines': [{'id': 3164, 'method_title': 'Distance Rate (1.5 km; 4 mins)', 'method_id': 'distance_rate', 'instance_id': '3', 'total': '3.00', 'total_tax': '0.00', 'taxes': [], 'meta_data': []}], 'fee_lines': [], 'coupon_lines': [], 'refunds': [], 'date_created_gmt': '2021-03-04T18:06:28', 'date_modified_gmt': '2021-03-04T16:09:20', 'date_completed_gmt': '2021-02-20T06:28:43', 'date_paid_gmt': '2021-02-20T06:28:43', 'currency_symbol': 'R', '_links': {'self': [{'href': 'redacted'}], 'collection': [{'href': 'redacted'}], 'customer': [{'href': 'redacted'}]}}]
"the output of the API is dictionary inside a list"
quote from this, maybe you add a list in a list ?
after this code: all_orders.append(requests.get('API location next page').json()) , the all_orders will have inner list.
maybe get the value by:
all_orders[orders_collected - 1][some index]['status']
bugfix:
change append to extend
all_orders.extend(requests.get('API location next page').json())

How to explode Panda column with data having different dict and list of dict

I have a panda dataframe with different set of values like first one is an list or array and other elements or not
>>> df_3['integration-outbound:IntegrationEntity.integrationEntityDetails.supplier.forms.form.records.record']
0 [{'Internalid': '24348', 'isDelete': 'false', 'fields': {'field': [{'id': 'CATEGOR_LEVEL_1', 'value': 'MR'}, {'id': 'LOW_PRODSERV', 'value': 'RES'}, {'id': 'LOW_LEVEL_2', 'value': 'keylevel221'}, {'id': 'LOW_LEVEL_3', 'value': 'keylevel3127'}, {'id': 'LOW_LEVEL_4', 'value': 'keylevel4434'}, {'id': 'LOW_LEVEL_5', 'value': 'keylevel5545'}]}}, {'Internalid': '24349', 'isDelete': 'false', 'fields': {'field': [{'id': 'CATEGOR_LEVEL_1', 'value': 'MR'}, {'id': 'LOW_PRODSERV', 'value': 'RES'}, {'id': 'LOW_LEVEL_2', 'value': 'keylevel221'}, {'id': 'LOW_LEVEL_3', 'value': 'keylevel3125'}, {'id': 'LOW_LEVEL_4', 'value': 'keylevel4268'}, {'id': 'LOW_LEVEL_5', 'value': 'keylevel5418'}]}}, {'Internalid': '24350', 'isDelete': 'false', 'fields': {'field': [{'id': 'CATEGOR_LEVEL_1', 'value': 'MR'}, {'id': 'LOW_PRODSERV', 'value': 'RES'}, {'id': 'LOW_LEVEL_2', 'value': 'keylevel221'}, {'id': 'LOW_LEVEL_3', 'value': 'keylevel3122'}, {'id': 'LOW_LEVEL_4', 'value': 'keylevel425'}, {'id': 'LOW_LEVEL_5', 'value': 'keylevel5221'}]}}]
0 {'isDelete': 'false', 'fields': {'field': [{'id': 'S_EAST', 'value': 'N'}, {'id': 'W_EST', 'value': 'N'}, {'id': 'M_WEST', 'value': 'N'}, {'id': 'N_EAST', 'value': 'N'}, {'id': 'LOW_AREYOU_ASSET', 'value': '-1'}, {'id': 'LOW_SWART_PROG', 'value': '-1'}]}}
0 {'isDelete': 'false', 'fields': {'field': {'id': 'LOW_COD_CONDUCT', 'value': '-1'}}}
0 {'isDelete': 'false', 'fields': {'field': [{'id': 'LOW_SUPPLIER_TYPE', 'value': '2'}, {'id': 'LOW_DO_INT_BOTH', 'value': '1'}]}}
I want explode this into multiple rows. The first row is list and other rows or not ?
>>> type(df_3)
<class 'pandas.core.frame.DataFrame'>
>>> type(df_3['integration-outbound:IntegrationEntity.integrationEntityDetails.supplier.forms.form.records.record'])
<class 'pandas.core.series.Series'>
Expected output -
{'Internalid': '24348', 'isDelete': 'false', 'fields': {'field': [{'id': 'CATEGOR_LEVEL_1', 'value': 'MR'}, {'id': 'LOW_PRODSERV', 'value': 'RES'}, {'id': 'LOW_LEVEL_2', 'value': 'keylevel221'}, {'id': 'LOW_LEVEL_3', 'value': 'keylevel3127'}, {'id': 'LOW_LEVEL_4', 'value': 'keylevel4434'}, {'id': 'LOW_LEVEL_5', 'value': 'keylevel5545'}]}}
{'Internalid': '24349', 'isDelete': 'false', 'fields': {'field': [{'id': 'CATEGOR_LEVEL_1', 'value': 'MR'}, {'id': 'LOW_PRODSERV', 'value': 'RES'}, {'id': 'LOW_LEVEL_2', 'value': 'keylevel221'}, {'id': 'LOW_LEVEL_3', 'value': 'keylevel3125'}, {'id': 'LOW_LEVEL_4', 'value': 'keylevel4268'}, {'id': 'LOW_LEVEL_5', 'value': 'keylevel5418'}]}}
{'Internalid': '24350', 'isDelete': 'false', 'fields': {'field': [{'id': 'CATEGOR_LEVEL_1', 'value': 'MR'}, {'id': 'LOW_PRODSERV', 'value': 'RES'}, {'id': 'LOW_LEVEL_2', 'value': 'keylevel221'}, {'id': 'LOW_LEVEL_3', 'value': 'keylevel3122'}, {'id': 'LOW_LEVEL_4', 'value': 'keylevel425'}, {'id': 'LOW_LEVEL_5', 'value': 'keylevel5221'}]}}]
{'isDelete': 'false', 'fields': {'field': [{'id': 'S_EAST', 'value': 'N'}, {'id': 'W_EST', 'value': 'N'}, {'id': 'M_WEST', 'value': 'N'}, {'id': 'N_EAST', 'value': 'N'}, {'id': 'LOW_AREYOU_ASSET', 'value': '-1'}, {'id': 'LOW_SWART_PROG', 'value': '-1'}]}}
{'isDelete': 'false', 'fields': {'field': {'id': 'LOW_COD_CONDUCT', 'value': '-1'}}}
{'isDelete': 'false', 'fields': {'field': [{'id': 'LOW_SUPPLIER_TYPE', 'value': '2'}, {'id': 'LOW_DO_INT_BOTH', 'value': '1'}]}}
i tried to explode this columns
>>> df_3.explode('integration-outbound:IntegrationEntity.integrationEntityDetails.supplier.forms.form.records.record')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib64/python3.6/site-packages/pandas/core/frame.py", line 6318, in explode
result = df[column].explode()
File "/usr/local/lib64/python3.6/site-packages/pandas/core/series.py", line 3504, in explode
values, counts = reshape.explode(np.asarray(self.array))
File "pandas/_libs/reshape.pyx", line 129, in pandas._libs.reshape.explode
KeyError: 0
I can run through each row and try to find out if its a list and implement something but it doesnt seems right
if str(type(df_3.loc[i,'{}'.format(c)])) == "<class 'list'>":
Is there any way we ca use an explode function on such kind of data
alternative way using pandas-read-xml
from pandas_read_xml import flatten, fully_flatten
df = flatten(df)
I was able to do it, but the exploded rows are all filtered to the top of the DataFrame (in case there are more list type object in lower rows).
pd.concat((df.iloc[[type(item) == list for item in df['Column']]].explode('Column'),
df.iloc[[type(item) != list for item in df['Column']]]))
It essentially does what you've said: check if object type is list, if so, explode. Then concatenate this exploded Series with the rest of the data (i.e. the non-lists). Performance doesn't seem to hurt much from longer DataFrames.
Output:
Column
0 {'Internalid': '24348', 'isDelete': 'false', '...
0 {'Internalid': '24349', 'isDelete': 'false', '...
0 {'Internalid': '24350', 'isDelete': 'false', '...
1 {'isDelete': 'false', 'fields': {'field': [{'i...
2 {'isDelete': 'false', 'fields': {'field': {'id...
3 {'isDelete': 'false', 'fields': {'field': [{'i...

How to separate values in a dictionary to be put into CSV? [duplicate]

This question already has answers here:
How can I convert JSON to CSV?
(26 answers)
Closed 3 years ago.
I am trying to write my JSON output to CSV, but I'm not sure how to separate my values into individual columns
This is my current code
with open('dict.csv', 'w') as csv_file:
writer = csv.writer(csv_file)
for key, value in response.json().items():
writer.writerow([value])
print(value)
This is the csv file I am getting:
current csv file
This is the desired csv file/output I want to get:
desired output
This is an example of my JSON Output
[{'id': '123', 'custom_id': '12', 'company': 28, 'company_name': 'Sunshine'}, {'id': '224', 'custom_id': '14', 'company': 38, 'company_name': 'Flowers'},
{'id': '888', 'custom_id': '10', 'company': 99, 'company_name': 'Fields'}]
how about this JSON format? (a more complicated one)
[{'id': '777', 'custom_id': '000112', 'company': 28, 'company_name':
'Weddings Inc', 'delivery_address': '25 olive park terrace, 61234', 'delivery_timeslot': {'lower': '2019-12-06T10:00:00Z', 'upper': '2019-12-06T13:00:00Z', 'bounds': '[)'}, 'sender_name': 'Joline', 'sender_email': '', 'sender_contact': '91234567', 'removed': None, 'recipient_name': 'Joline', 'recipient_contact': '91866655', 'notes': '', 'items': [{'id': 21668, 'name': 'Loose hair flowers', 'quantity': 1, 'metadata': {}, 'removed': None}, {'id': 21667, 'name': "Groom's Boutonniere", 'quantity': 1, 'metadata': {}, 'removed': None}, {'id': 21666, 'name': 'Bridal Bouquet', 'quantity': 1, 'metadata': {}, 'removed': None}], 'latitude': '1.1234550920764211111', 'longitude': '103.864352476201000000', 'created': '2019-08-15T05:40:30.385467Z', 'updated': '2019-08-15T05:41:27.930110Z', 'status': 'pending', 'verbose_status': 'Pending', 'logs': [{'id': 56363, 'order': '50c402', 'order_custom_id': '000112', 'order_delivery_address': '25 olive park terrace, 61234', 'order_delivery_timeslot': {'lower': '2019-12-06T10:00:00Z', 'upper': '2019-12-06T13:00:00Z', 'bounds': '[)'}, 'message': 'Order was created.', 'failure_reason': None, 'success_code': None, 'success_description': None, 'created': '2019-08-15T05:40:30.431790Z', 'removed': None}, {'id': 56364, 'order': '50c402d8-7c76-45b5-b883-e2fb887a507e', 'order_custom_id': 'INV-000112', 'order_delivery_address': '25 olive park terrace, 61234', 'order_delivery_timeslot': {'lower': '2019-12-06T10:00:00Z', 'upper': '2019-12-06T13:00:00Z', 'bounds': '[)'}, 'message': 'Order is pending.', 'failure_reason': None, 'success_code': None, 'success_description': None, 'created': '2019-08-15T05:40:30.433139Z', 'removed': None}], 'reschedule_requests': [], 'signature': None},
{'id': '241', 'custom_id': '000123', 'company': 22, 'company_name': 'Pearl Pte Ltd', 'delivery_address': '90 Merchant Road, Hotel Royal, 223344', 'delivery_timeslot': {'lower': '2019-11-29T10:00:00Z', 'upper': '2019-11-29T13:00:00Z', 'bounds': '[)'}, 'sender_name': 'Vera Smith', 'sender_email': '', 'sender_contact': '81234567', 'removed': None, 'recipient_name': 'Vera Smith', 'recipient_contact': '81234561', 'notes': '', 'items': [{'id': 22975, 'name': 'Custom wrapped bouquet', 'quantity': 2, 'metadata': {}, 'removed': None}, {'id': 22974, 'name': "Parents' boutonniere x 3", 'quantity': 1, 'metadata': {}, 'removed': None}, {'id': 22973, 'name': "Groom's boutonniere", 'quantity': 1, 'metadata': {}, 'removed': None}, {'id': 22972, 'name': 'Loose hair flowers', 'quantity': 1, 'metadata': {}, 'removed': None}, {'id': 22971, 'name': 'Bridal Bouquet', 'quantity': 1, 'metadata': {}, 'removed': None}], 'latitude': '1.28821802835873000000', 'longitude': '103.84569230314800000000', 'created': '2019-08-30T03:20:17.477528Z', 'updated': '2019-08-30T03:29:25.307856Z', 'status': 'pending', 'verbose_status': 'Pending', 'logs': [{'id': 59847, 'order': '24117085-9104-4442-841b-4a734f801d39', 'order_custom_id': 'INV-000123', 'order_delivery_address': '90 Merchant Road, Hotel Royal, 223344', 'order_delivery_timeslot': {'lower': '2019-11-29T10:00:00Z', 'upper': '2019-11-29T13:00:00Z', 'bounds': '[)'}, 'message': 'Order was created.', 'failure_reason': None, 'success_code': None, 'success_description': None, 'created': '2019-08-30T03:20:17.511250Z', 'removed': None}, {'id': 59848, 'order': '24117085-9104-4442-841b-4a734f801d39', 'order_custom_id': 'INV-000123', 'order_delivery_address': '90 Merchant Road, Hotel Royal, 223344', 'order_delivery_timeslot': {'lower': '2019-11-29T10:00:00Z', 'upper': '2019-11-29T13:00:00Z', 'bounds': '[)'}, 'message': 'Order is pending.', 'failure_reason': None, 'success_code': None, 'success_description': None, 'created': '2019-08-30T03:20:17.513132Z', 'removed': None}], 'reschedule_requests': [], 'signature': None}]
Use pandas library:
df.to_csv() - Write object to a comma-separated values (csv) file.
Ex.
import pandas as pd
data = [{'id': '123', 'custom_id': '12', 'company': 28, 'company_name': 'Sunshine'},
{'id': '224', 'custom_id': '14', 'company': 38, 'company_name': 'Flowers'},
{'id': '888', 'custom_id': '10', 'company': 99, 'company_name': 'Fields'}]
df = pd.DataFrame(data)
df.to_csv('sample.csv')
Try:
import csv
csv_file = 'my_file.csv'
csv_columns = ['id', 'custom_id', 'company', 'company_name']
dict_data = [{'id': '123', 'custom_id': '12', 'company': 28, 'company_name': 'Sunshine'}, {'id': '224', 'custom_id': '14', 'company': 38, 'company_name': 'Flowers'}, {'id': '888', 'custom_id': '10', 'company': 99, 'company_name': 'Fields'}]
try:
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for data in dict_data:
writer.writerow(data)
except IOError:
print("I/O error")
Given your response data in json format
response = [{'id': '123', 'custom_id': '12', 'company': 28, 'company_name': 'Sunshine'},
{'id': '224', 'custom_id': '14', 'company': 38, 'company_name': 'Flowers'},
{'id': '888', 'custom_id': '10', 'company': 99, 'company_name': 'Fields'}]
You can convert it to a list of lists using
header = [response[0].keys()]
data = [row.values() for row in response]
csv_list = header + data
And then save it to csv using
with open('dict.csv', "w") as f:
for row in csv_list:
f.write("%s\n" % ','.join(str(col) for col in row))
This should yield your desired output

How to get/filter values in python3 json list dictionary response?

Below is result I got from API query.
[{'type':'book','title': 'example1', 'id': 12456, 'price': '8.20', 'qty': '12', 'status': 'available'},
{'type':'book','title': 'example2', 'id': 12457, 'price': '10.50', 'qty': '5', 'status': 'none'}]
How do I specify in code to get value pairs of title, price, & status only?
So result will be like:
[{'title': 'example1', 'price': '8.20', 'status': 'available'},
{'title': 'example2', 'price': '10.50', 'status': 'none'}]
You can use a dictionary comprehension within a list comprehension:
L = [{'type':'book','title': 'example1', 'id': 12456, 'price': '8.20', 'qty': '12', 'status': 'available'},
{'type':'book','title': 'example2', 'id': 12457, 'price': '10.50', 'qty': '5', 'status': 'none'}]
keys = ['title', 'price', 'status']
res = [{k: d[k] for k in keys} for d in L]
print(res)
[{'price': '8.20', 'status': 'available', 'title': 'example1'},
{'price': '10.50', 'status': 'none', 'title': 'example2'}]

Categories