TensorFlow Object Detection API print objects found on image to console - python
I'm trying to return list of objects that have been found at image with TF Object Detection API.
To do that I'm using print([category_index.get(i) for i in classes[0]]) to print list of objects that have been found or print(num_detections) to display number of found objects, but in both cases it gives me list with 300 values or simply value [300.] correspondingly.
How it`s possible to return only that objects that are on image? Or if there is some mistake please help to figure out what is wrong.
I was using Faster RCNN models config file and checkpoints while training. Be sure it really detects few objects at image, here it is:
My code:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
PATH_TO_CKPT = 'frozen_graph/frozen_inference_graph.pb'
PATH_TO_LABELS = 'object_detection/pascal_label_map.pbtxt'
NUM_CLASSES = 7
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images/'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 2) ]
IMAGE_SIZE = (12, 8)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
sess.run(tf.global_variables_initializer())
img = 1
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imsave('RESULTS/' + str(img) + '.jpg', image_np)
img += 1
# Return found objects
print([category_index.get(i) for i in classes[0]])
print(boxes.shape)
print(num_detections)
Which gives following result:
[{'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'chesterfield_blue', 'id': 1}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_gold', 'id': 5}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_red', 'id': 7}, {'name': 'chesterfield_red', 'id': 2}, {'name': 'marlboro_red', 'id': 7}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_red', 'id': 7}, {'name': 'lucky_strike_blue', 'id': 3}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'lucky_strike_red', 'id': 4}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'marlboro_mentol', 'id': 6}, {'name': 'lucky_strike_red', 'id': 4}]
(1, 300, 4)
[ 300.]
Thanks in advance for any information!
UPD:
Thousand thanks for everyone who helped with this question.
Following line of code is exactly what I needed, it gives me list with objects that were found so I can do other operations on them.
print [category_index.get(value) for index,value in enumerate(classes[0]) if scores[0,index] > 0.5]
As far as I can see you have 300 detections. visualize_boxes_and_labels_on_image_array shows very few of them because min_score_thresh=.5 (this is the default value) is too high for the most of them.
If you want to add such filtering to the output you can write:
min_score_thresh = 0.5
print([category_index.get(i) for i in classes[0] if scores[0, i] > min_score_thresh)
You can change min_score_thresh to choose threshold value you need. It may be useful to print the score values with the category names.
From the function signature visualize_boxes_and_labels_on_image_array, you have to set the arguments max_boxes_to_draw, min_score_thresh,
visualize_boxes_and_labels_on_image_array(image,
boxes,
classes,
scores,
category_index,
instance_masks=None,
keypoints=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4)
Try to set the min_score_thresh to 0. Then you will probably see 300 detections.
open visualization_utils.py and add--> print(class_name) after
else:
class_name = 'N/A'
display_str = '{}: {}%'.format(
class_name,
int(100*scores[i]))
this will print the detected objects
adding print(class_name) after
else:
class_name = 'N/A'
display_str = '{}: {}%'.format(
class_name,
int(100*scores[i]))
in visualization_utils.py file prints the detected object.
I wonder where to add print command to print timestamps as well as percentage of accuracy in output.
// this will load the labels and categories along with category index
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
//to print the identified object do the following :
print category instead of category index. The index holds the numeric value and the category contains the name of the objects. Once identified with the mentioned threshold the
min_score_thresh = 0.5
print([category.get(1)] for i in classes[0] if scores[0, i] > min_score_thresh)
this will print the identified category.
Related
Python: Descending order and just 3 objects has a high value [duplicate]
This question already has answers here: How do I sort a list of dictionaries by a value of the dictionary? (20 answers) Closed 6 months ago. I have an array object like that, Not sort value, I want descending order and just 3 objects has a high value: [{'id': 1, 'value': 3}, {'id': 2, 'value': 6}, {'id': 3, 'value': 8}, {'id': 4, 'value': 8}, {'id': 5, 'value': 10}, {'id': 6, 'value': 9}, {'id': 7, 'value': 8}, {'id': 8, 'value': 4}, {'id': 9, 'value': 5}] I want result is descending order and just 3 objects have a high value, like this [{'id': 5, 'value': 10}, {'id': 6, 'value': 9}, {'id': 7, 'value': 8}, {'id': 3, 'value': 8}, {'id': 4, 'value': 8},] Please help me, thanks
t = [{'id': 1, 'value': 3}, {'id': 2, 'value': 6}, {'id': 3, 'value': 8}, {'id': 4, 'value': 8}, {'id': 5, 'value': 10}, {'id': 6, 'value': 9}, {'id': 7, 'value': 8}] newlist = sorted(t, key=lambda d: d['value']) newlist.reverse() print(newlist[:3]) # [{'id': 5, 'value': 10}, {'id': 6, 'value': 9}, {'id': 7, 'value': 8}] More info about list slicing More info about reverse() More info
Split a list of dictionaries into multiple chunks
I have this one list list_dict = [ {'id': 1}, {'item': 'apple'}, {'id': 2}, {'item': 'pear'}, {'id': 1}, {'item': 'peach'}, {'id': 2}, {'item': 'kiwi'}, {'id': 3}, {'item': 'banana'}, {'id': 4}, {'item': 'mango'}, {'id': 1}, {'item': 'watermelon'}, {'id': 2}, {'item': 'plum'}, {'id': 3}, {'item': 'grapes'}] and I want to split like this (start to make sublist when "id" is 1) result = [ [{'id': 1}, {'item': 'apple'}, {'id': 2}, {'item': 'pear'}], [{'id': 1}, {'item': 'peach'}, {'id': 2}, {'item': 'kiwi'}, {'id': 3}, {'item': 'banana'}, {'id': 4}, {'item': 'mango'}], [{'id': 1}, {'item': 'watermelon'}, {'id': 2}, {'item': 'plum'}, {'id': 3}, {'item': 'grapes'}]]
Some nested for loop will work list_dict = [{"id":1},{"item":"apple"},{"id":2},{"item":"pear"},{"id":1},{"item":"peach"},{"id":2},{"item":"kiwi"},{"id":3},{"item":"banana"},{"id":4},{"item":"mango"},{"id":1},{"item":"watermelon"},{"id":2},{"item":"plum"},{"id":3},{"item":"grapes"}] output = [] temp = [] for i in list_dict: if i.get('id', -1) == 1 and temp: output.append(temp.copy()) temp.clear() temp.append(i) else: temp.append(i) output.append(temp.copy()) print(output) #[[{'id': 1}, {'item': 'apple'}, {'id': 2}, {'item': 'pear'}], [{'id': 1}, {'item': 'peach'}, {'id': 2}, {'item': 'kiwi'}, {'id': 3}, {'item': 'banana'}, {'id': 4}, {'item': 'mango'}], [{'id': 1}, {'item': 'watermelon'}, {'id': 2}, {'item': 'plum'}, {'id': 3}, {'item': 'grapes'}]]
Maybe something like this? main_list = [] current_list = [] for el in list_dict: if el.get("id", 0) == 1: if current_list: main_list.append(current_list) current_list = [el] else: current_list.append(el) if current_list: main_list.append(current_list) print(main_list)
Use the column of a dataframe that has a list of dictionaries to create other columns for the dataframe
I have a column in my dataframe of type object that has values like: for i in df3['placeholders'][:10]: Output: [{'type': 'experience', 'label': '0-1 Yrs'}, {'type': 'salary', 'label': '1,00,000 - 1,25,000 PA.'}, {'type': 'location', 'label': 'Chennai'}] [{'type': 'date', 'label': '08 October - 13 October'}, {'type': 'salary', 'label': 'Not disclosed'}, {'type': 'location', 'label': 'Chennai'}] [{'type': 'education', 'label': 'B.Com'}, {'type': 'salary', 'label': 'Not disclosed'}, {'type': 'location', 'label': 'Mumbai Suburbs, Navi Mumbai, Mumbai'}] [{'type': 'experience', 'label': '0-2 Yrs'}, {'type': 'salary', 'label': '50,000 - 2,00,000 PA.'}, {'type': 'location', 'label': 'Chennai'}] [{'type': 'experience', 'label': '0-1 Yrs'}, {'type': 'salary', 'label': '2,00,000 - 2,25,000 PA.'}, {'type': 'location', 'label': 'Bengaluru(JP Nagar)'}] [{'type': 'experience', 'label': '0-3 Yrs'}, {'type': 'salary', 'label': '80,000 - 2,00,000 PA.'}, {'type': 'location', 'label': 'Hyderabad'}] [{'type': 'experience', 'label': '0-5 Yrs'}, {'type': 'salary', 'label': 'Not disclosed'}, {'type': 'location', 'label': 'Hyderabad'}] [{'type': 'experience', 'label': '0-1 Yrs'}, {'type': 'salary', 'label': '1,25,000 - 2,00,000 PA.'}, {'type': 'location', 'label': 'Mumbai'}] [{'type': 'date', 'label': '08 October - 17 October'}, {'type': 'salary', 'label': 'Not disclosed'}, {'type': 'location', 'label': 'Pune(Bavdhan)'}] [{'type': 'experience', 'label': '0-2 Yrs'}, {'type': 'salary', 'label': 'Not disclosed'}, {'type': 'location', 'label': 'Jaipur'}] [{'type': 'experience', 'label': '0-0 Yrs'}, {'type': 'salary', 'label': '1,00,000 - 1,50,000 PA.'}, {'type': 'location', 'label': 'Delhi NCR(Sector-81 Noida)'}] I want to add more columns to my existing dataframe by extracting features from this column such that value of "type"= Column name value of "label"= value under the column The final expected output: df.head(3) Output: ..... experience, salary, location, date, education ..... 0-1 Yrs, 1,00,000 - 1,25,000 PA., Chennai, nan, nan ..... nan, 1,00,000 - 1,25,000 PA., Chennai, 08 October - 13 October, nan ..... nan, Not disclosed, Mumbai Suburbs, Navi Mumbai, Mumbai, nan, B.Com The first answer worked. [EDIT 2] Later, I tried the same code suggested in the first response for a new dataset with same issue. I got the following error: <ipython-input-23-ad8e644044af> in <listcomp>(.0) ----> 1 new_columns = set([d['Name'] for l in dfr.RatingDistribution.values for d in l ]) 2 # Make a dict of dicts 3 col_val_dict = {} 4 for col_name in new_columns: 5 col_val_dict[col_name] = {} TypeError: 'float' object is not iterable My Input column: RatingDistribution [{'Name': 'Work-Life Balance', 'count': 5}, {'Name': 'Skill Development', 'count': 5}, {'Name': 'Salary & Benefits', 'count': 5}, {'Name': 'Job Security', 'count': 5}, {'Name': 'Company Culture', 'count': 5}, {'Name': 'Career Growth', 'count': 5}, {'Name': 'Work Satisfaction', 'count': 5}] [{'Name': 'Work-Life Balance', 'count': 4}, {'Name': 'Skill Development', 'count': 5}, {'Name': 'Salary & Benefits', 'count': 4}, {'Name': 'Job Security', 'count': 4}, {'Name': 'Company Culture', 'count': 3}, {'Name': 'Career Growth', 'count': 3}, {'Name': 'Work Satisfaction', 'count': 5}] [{'Name': 'Work-Life Balance', 'count': 3}, {'Name': 'Skill Development', 'count': 4}, {'Name': 'Salary & Benefits', 'count': 5}, {'Name': 'Job Security', 'count': 4}, {'Name': 'Company Culture', 'count': 5}, {'Name': 'Career Growth', 'count': 4}, {'Name': 'Work Satisfaction', 'count': 4}] [{'Name': 'Work-Life Balance', 'count': 5}, {'Name': 'Skill Development', 'count': 5}, {'Name': 'Salary & Benefits', 'count': 5}, {'Name': 'Job Security', 'count': 5}, {'Name': 'Company Culture', 'count': 5}, {'Name': 'Career Growth', 'count': 5}, {'Name': 'Work Satisfaction', 'count': 5}] [{'Name': 'Work-Life Balance', 'count': 3}, {'Name': 'Skill Development', 'count': 5}, {'Name': 'Salary & Benefits', 'count': 3}, {'Name': 'Job Security', 'count': 3}, {'Name': 'Company Culture', 'count': 3}, {'Name': 'Career Growth', 'count': 3}, {'Name': 'Work Satisfaction', 'count': 4}] [{'Name': 'Work-Life Balance', 'count': 3}, {'Name': 'Skill Development', 'count': 5}, {'Name': 'Salary & Benefits', 'count': 5}, {'Name': 'Job Security', 'count': 1}, {'Name': 'Company Culture', 'count': 3}, {'Name': 'Career Growth', 'count': 1}, {'Name': 'Work Satisfaction', 'count': 1}] My code: new_columns = set([d['Name'] for l in dfr.RatingDistribution.values for d in l ]) # Make a dict of dicts col_val_dict = {} for col_name in new_columns: col_val_dict[col_name] = {} # For each column name look to see if a row has that as a type # If so, get the label for that dict # otherwise fill it with NaN for i,l in enumerate(dfr.placeholders.values): the_label = [d['count'] for d in l if d['Name'] == col_name] if the_label: col_val_dict[col_name][i] = the_label[0] else: col_val_dict[col_name][i] = np.NaN # Merge this new dfa with the old one merged_dfa = pd.concat([dfr,pd.DataFrame(col_val_dict)],axis='columns') dfr.shape I'm getting error in the very first line. I'm not able to figure out why it is throwing me the float error. PLEASE HELP
# Get the unique types (column names) new_columns = set([d['type'] for l in df3.placeholders.values for d in l ]) # Make a dict of dicts col_val_dict = {} for col_name in new_columns: col_val_dict[col_name] = {} # For each column name look to see if a row has that as a type # If so, get the label for that dict # otherwise fill it with NaN for i,l in enumerate(df3.placeholders.values): the_label = [d['label'] for d in l if d['type'] == col_name] if the_label: col_val_dict[col_name][i] = the_label[0] else: col_val_dict[col_name][i] = np.NaN # Merge this new df with the old one merged_df = pd.concat([df3,pd.DataFrame(col_val_dict)],axis='columns')
Remove duplicates from a list of a list of unordered dictionaries
Consider the following: [ [ {'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99} ], [ {'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}, {'name': 'bob', 'score': 99} ], [ {'name': 'bob', 'score': 99}, {'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19} ], [ {'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69} ] ] Ignoring the order of the dictionaries within each list, how can duplicates be removed such that the output would be only two of the lists: one with bob and one with stu? Output something like: [ [ {'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99} ], [ {'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69} ] ]
You could try something like this dict_list = [[{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99}], [{'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}, {'name': 'bob', 'score': 99}], [{'name': 'bob', 'score': 99}, {'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}], [{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69}]] # create list of names you've seen before name_lists = [] # create lists of unique lists unique_lists = [] # loop over each list you have for L in dict_list: # get list of names names = [i['name'] for i in L] # check if you've seen this set of names before if set(names) not in [set(n) for n in name_lists]: print(names) # save these names name_lists.append(names) # add this list to your list of unique names unique_lists.append(L) Output: ['fred', 'frank', 'bob'] ['fred', 'frank', 'stu'] unique_lists Output: [[{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99}], [{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69}]] Note that this method will save only the scores for the first set of unique names and discard scores when the set of names is duplicated. If it is expected that the same names may have different scores, you may want to save every unique set of scores. In this case, you can follow the method given by PacketLoss below: name_lists = [] unique_lists = [] for di, d in enumerate(dict_list): # get list of name, score tuples r = [(i['name'], i['score']) for i in d] # sort tuples alphabetically by name r.sort(key=lambda tup: tup[0]) # check if these names and scores have been seen before if r not in name_lists: name_lists.append(r) unique_lists.append(dict_list[di])
Due to the ordering being off, a simple == will not match, we can work around this by gathering the data, sorting it as a list of tuples and checking if the match has been seen before. data = [[{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99}], [{'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}, {'name': 'bob', 'score': 99}], [{'name': 'bob', 'score': 99}, {'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}], [{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69}]] seen = list() result = list() for idx, d in enumerate(data): r = [(i['name'], i['score']) for i in d] r.sort(key=lambda tup: tup[0]) if r not in seen: seen.append(r) result.append(data[idx]) With this method, we are checking that both the scores and names are a complete match, meaning if one score in a duplicate changed to 98 it would no longer be counted as a duplicate. Output: [[{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99}], [{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69}]] Output with modifying scores in data: data = [[{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99}], [{'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}, {'name': 'bob', 'score': 99}], [{'name': 'bob', 'score': 98}, {'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}], [{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69}]] [[{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'bob', 'score': 99}], [{'name': 'bob', 'score': 98}, {'name': 'frank', 'score': 100}, {'name': 'fred', 'score': 19}], [{'name': 'fred', 'score': 19}, {'name': 'frank', 'score': 100}, {'name': 'stu', 'score': 69}]]
How to use list comprehensions to make a dict having list of list as values
I Have a list as following and I want to convert that as output shown below using List comprehensions. Any help is appreciated. a = [{'type': 'abc', 'values': 1}, {'type': 'abc', 'values': 2}, {'type': 'abc', 'values': 3}, {'type': 'xyz', 'values': 4}, {'type': 'xyz', 'values': 5}, {'type': 'pqr', 'values': 6}, {'type': 'pqr', 'values': 8}, {'type': 'abc', 'values': 9}, {'type': 'mno', 'values': 10}, {'type': 'def', 'values': 11}] This is the output I am expecting. output = {'abc': [1,2,3,9], 'xyz': [4,5], 'pqr': [6,8], 'mno': [10], 'def': [11]}
from operator import itemgetter from itertools import groupby a = [{'type': 'abc', 'values': 1}, {'type': 'abc', 'values': 2}, {'type': 'abc', 'values': 3}, {'type': 'xyz', 'values': 4}, {'type': 'xyz', 'values': 5}, {'type': 'pqr', 'values': 6}, {'type': 'pqr', 'values': 8}, {'type': 'abc', 'values': 9}, {'type': 'mno', 'values': 10}, {'type': 'def', 'values': 11}] typegetter = itemgetter('type') valuesgetter = itemgetter('values') groups = groupby(sorted(a, key=typegetter), key=typegetter) print {k:list(map(valuesgetter, v)) for k, v in groups}
a = [{'type': 'abc', 'values': 1}, {'type': 'abc', 'values': 2}, {'type': 'abc', 'values': 3}, {'type': 'xyz', 'values': 4}, {'type': 'xyz', 'values': 5}, {'type': 'pqr', 'values': 6}, {'type': 'pqr', 'values': 8}, {'type': 'abc', 'values': 9}, {'type': 'mno', 'values': 10}, {'type': 'def', 'values': 11}] output = {} for item in a: output[item['type']] = [item['values']] if output.get(item['type'], None) is None else output[item['type']] + [item['values']] print output