I am trying to find the dominant topic in each article through following:
lda_model is the model, corpus consist of id2word.doc2bow(text), texts is text data on which model is applied.
def format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=data):
# Init output
sent_topics_df = pd.DataFrame()
# Get main topic in each document
for i, row in enumerate(ldamodel[corpus]):
row = sorted(row, key=lambda x: (x[1]), reverse=True)
# Get the Dominant topic, Perc Contribution and Keywords for each document
for j, (topic_num, prop_topic) in enumerate(row):
if j == 0: # => dominant topic
wp = ldamodel.show_topic(topic_num)
topic_keywords = ", ".join([word for word, prop in wp])
sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
else:
break
sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']
# Add original text to the end of the output
contents = pd.Series(texts)
sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
return(sent_topics_df)
df_topic_sents_keywords = format_topics_sentences(ldamodel=optimal_model, corpus=corpus, texts=data)
# Format
df_dominant_topic = df_topic_sents_keywords.reset_index()
df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']
# Show
df_dominant_topic.head(10)
For the following block it seems I am getting the error:
for i, row in enumerate(model_gr[corpus][0:1]):
print(row)
row = sorted(row, key=lambda x: (x[1]), reverse=True)
Getting the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [186], in <cell line: 1>()
1 for i, row in enumerate(lda_model_gr[corpus]):
2 print(row)
----> 3 row = sorted(row, key=lambda x: (x[1]), reverse=True)
TypeError: '<' not supported between instances of 'int' and 'tuple'
It will be helpful, if the error can be resolved.
print(row) produces:
([(5, 0.28596485), (6, 0.16911359), (8, 0.030976538), (10, 0.011954311), (11, 0.068368636), (13, 0.34729925), (15, 0.016343366), (17, 0.05741114)], [(0, [6, 11]), (1, [5]), (2, [5]), (3, [6, 5]), (4, [13, 6, 5, 11, 10]), (5, [13, 5, 17, 10, 15, 19]), (6, [5, 13, 11, 17]), (7, [5, 13, 11]), (8, [5, 13, 17, 10]), (9, [5, 13, 8, 6, 17]), (10, [13, 6, 5, 11, 17, 10]), (11, [13, 6, 5, 11, 10, 17, 15, 16, 19]), (12, [5, 13, 11, 19, 10]), (13, [5, 13]), (14, [8]), (15, [13, 5, 11, 10]), (16, [6]), (17, [13, 5, 19, 11]), (18, [11, 13, 19]), (19, [5]), (20, [5, 13]), (21, [13, 6, 11, 15, 19]), (22, [13, 17, 6, 15, 11, 10, 19]), (23, [5, 11, 13, 16, 15]), (24, [5, 13, 6, 15, 19]), (25, [6, 5, 13]), (26, [6, 5, 13, 15]), (27, [11, 6]), (28, [13, 5, 17, 15, 11, 19]), (29, [17, 10, 13]), (30, [13, 6, 5, 10, 15]), (31, [6, 5, 13, 11]), (32, [5, 11]), (33, [5]), (34, [13, 10, 5, 11, 17, 6, 16]), (35, [5, 17, 13, 11]), (36, [5]), (37, []), (38, [6, 13, 5, 11]), (39, [13, 8]), (40, [13, 8]), (41, [10, 5, 13]), (42, [13, 5, 11, 6]), (43, []), (44, [6, 16, 19]), (45, [5, 6, 11]), (46, [5, 13]), (47, [5, 19, 11]), (48, [13, 15]), (49, [13, 15]), (50, [5]), (51, [5, 17]), (52, []), (53, [13, 6, 5, 2]), (54, []), (55, [6, 13, 11, 10]), (56, [13, 5]), (57, [11]), (58, [15]), (59, [5, 15, 16]), (60, [6]), (61, [6, 13, 5, 11, 17, 10]), (62, [17, 13, 8, 15, 10]), (63, [8]), (64, [13, 6, 15, 19, 11]), (65, [6]), (66, [5, 13]), (67, [13, 17, 5]), (68, [6, 13, 5, 11, 19, 10]), (69, [5, 13, 17]), (70, [13, 5, 11, 15, 10, 17]), (71, [13, 6, 11, 15, 19, 10]), (72, [17, 15, 19, 11, 10, 13, 16]), (73, [6, 5]), (74, [5, 17, 13, 11]), (75, []), (76, [11, 13, 5, 17, 10, 6, 19]), (77, [13]), (78, [5, 13, 17, 6, 15, 10]), (79, [13, 5, 6, 11, 19]), (80, [6]), (81, [5, 13, 11, 17, 19]), (82, [6, 13, 11, 8]), (83, [13, 6, 11, 5]), (84, [11, 13, 17, 15]), (85, [6, 8, 13, 17, 10]), (86, [13, 17, 19, 10]), (87, [5]), (88, [17, 13, 6, 11]), (89, [5, 13]), (90, [13, 5, 8, 19]), (91, []), (92, [6, 17]), (93, [6, 5, 11, 10]), (94, [13]), (95, [5, 13, 11, 15, 10, 17]), (96, [5]), (97, [6, 13, 11, 5]), (98, [5, 13]), (99, [5, 15, 17, 11]), (100, [11, 17, 13, 15]), (101, [13, 11, 10, 17]), (102, [13, 5]), (103, []), (104, [13, 6, 11, 5, 15, 19]), (105, [13, 11, 17, 10, 8, 15]), (106, [6, 5, 8, 13, 11]), (107, [5, 13, 15, 10, 17]), (108, [13, 19]), (109, []), (110, [6, 5, 11, 13]), (111, [6, 13, 5, 17, 15]), (112, [5]), (113, [5, 13, 11, 6, 15]), (114, [5, 13, 11]), (115, [6]), (116, [13, 17, 11, 10, 5, 19]), (117, [5, 11, 13, 10]), (118, [6, 17, 15]), (119, [5]), (120, [5, 19]), (121, []), (122, [17, 13, 6, 19]), (123, [6, 5, 13, 11]), (124, [13, 15]), (125, [6, 13]), (126, [5, 6, 17]), (127, [6, 5, 11, 13]), (128, [13, 5, 6, 17, 10, 15, 11]), (129, [5, 6, 11, 13, 19]), (130, [5, 6, 10]), (131, [13, 6, 5, 17, 11, 10, 15]), (132, [8, 5]), (133, [16]), (134, [6, 10]), (135, [13, 5, 6, 17, 19, 11]), (136, [13, 11, 17]), (137, [13, 19, 15, 2]), (138, [17, 13, 11, 5]), (139, [13, 6, 15, 11, 19]), (140, [13, 10]), (141, [13]), (142, [13, 11, 17, 5]), (143, [10]), (144, [6, 5, 11, 10, 13]), (145, [5]), (146, [13, 5]), (147, [6, 5, 13, 10]), (148, [13, 15]), (149, [5, 6, 15, 13, 11]), (150, [13]), (151, [5]), (152, []), (153, [5, 8, 6, 13]), (154, [5, 13, 6, 11, 15]), (155, [13, 6, 5, 11, 10]), (156, [6, 5, 13, 11]), (157, [6, 5]), (158, [6, 13, 5, 11]), (159, [17, 10]), (160, [6, 13, 15, 10]), (161, [5, 17, 13, 10]), (162, [13, 10, 17]), (163, [13, 6, 5, 17, 15, 10]), (164, [5, 11, 17, 13, 6, 15]), (165, [6, 13]), (166, [5, 6, 13]), (167, [5]), (168, [5]), (169, [13, 8]), (170, [13, 17, 15]), (171, [5, 13, 11, 15, 19]), (172, [13, 5, 6, 15]), (173, [5]), (174, [5, 11, 13, 15, 10]), (175, [13, 17, 11, 15, 6]), (176, [13, 5, 11]), (177, [6, 5, 13]), (178, [13, 6]), (179, [17]), (180, [8, 19]), (181, []), (182, [5, 6, 13]), (183, [5]), (184, [13, 5, 6, 8, 11, 17, 10, 15]), (185, [6, 13, 11]), (186, [13, 5, 15, 17, 6, 11]), (187, [5, 6, 11, 13]), (188, [6, 11, 5]), (189, [17, 5, 13, 10]), (190, [11, 15, 19]), (191, [5]), (192, [6, 5]), (193, [13, 17, 11, 5]), (194, [5, 13, 11]), (195, [6, 13, 5, 17, 10, 15, 11]),, (275, [13, 5]), (276, [5, 13, 6]), (277, [13, 11, 17, 10]), (278, [13, 11, 19, 15]), (279, [13, 5, 19, 11, 10]), (280, [11, 5]), (281, [8, 5]), (282, []), (283, [5, 13, 6, 11]), (284, [5, 17, 10]), (285, [5, 6]), (286, [6, 5, 8, 13, 11, 16]), (287, [5, 8]), (288, [6, 13, 17, 5, 11, 10]), (289, [13, 15]), (290, [8]), (291, [5, 6, 13, 10]), (292, [6, 13, 5]), (293, [13, 5, 17]), (294, []), (295, [13, 11]), (296, [6, 5, 13, 11, 15, 10]), (297, [13, 5, 6, 11, 19]), (298, [6, 13, 8, 11]), (299, [5, 8, 13]), (300, [13, 11]), (301, [13, 17]), (302, [5, 6, 13, 11]), (303, [8]), (304, [13, 6, 11]), (305, [5]), (306, [6, 11, 10]), (307, [5, 6, 13, 11]), (308, [13]), (309, [17, 19, 15]), (310, [5, 13]), (311, [13, 5, 11, 17]), (312, [11, 15]), (313, [13, 5, 19]), (314, [6, 13, 5, 11, 10]), (315, [11, 8, 5, 10, 13, 17]), (316, [13, 17]), (317, [6, 5, 13, 11]), (318, [8, 17, 13, 11]), (319, [5, 13, 8, 17, 10]), (320, []), (321, [15]), (322, [6, 5, 11, 10]), (323, [5, 13]), (324, []), (325, [17, 5]), (326, [5, 13, 17, 10, 8]), (327, [6, 13, 5, 11, 15, 10]), (328, [8, 9]), (329, []), (330, [5, 17]), (331, [13, 17, 15]), (332, [5, 17, 13, 10]), (333, [6, 5, 13]), (334, [6, 5]), (335, [6, 5, 10]), (336, [13, 6, 17, 5, 15, 10]), (337, [13, 8, 5]), (338, [11]), (339, [5]), (340, [5]), (341, [6, 5, 13, 11]), (342, [5, 17, 13, 19, 10]), (343, [15]), (344, [8, 13]), (345, [13]), (346, [16]), (347, [5, 13, 17, 6, 19]), (348, [15]), (349, [17, 5]), (350, [19]), (351, [5, 17, 13, 10]), (352, [5, 6, 13, 15]), (353, [6]), (354, [5, 6, 13, 11, 15, 17, 10]), (355, [6]), (356, [8, 11, 13]), (357, [6, 5]), (358, [13, 6, 5, 15]), (359, [13, 5, 6, 17, 15, 2]), (360, [13, 6, 11, 5, 15, 10, 19])], [(0, [(6, 0.5968742), (11, 0.39671573)]), (1, [(5, 0.9998293)]), (2, [(5, 0.99847865)]), (3, [(5, 0.4248721), (6, 0.57491225)]), (4, [(5, 0.11363209), (6, 0.25848317), (10, 0.015054545), (11, 0.049808282), (13, 0.5597861)]), (5, [(5, 0.36136225), (10, 0.05261353), (13, 0.42551985), (15, 0.047734324), (17, 0.091814265), (19, 0.020699687)]), (6, [(5, 0.5205156), (11, 0.14817742), (13, 0.29779604), (17, 0.023321701)]), (7, [(5, 0.53978944), (11, 0.046022266), (13, 0.41354123)]), (8, [(5, 9.711002), (10, 0.044227965), (13, 2.7743065), (17, 0.45317963)]), (9, [(5, 0.5029905), (6, 0.06789804), (8, 0.17515083), (13, 0.23967993), (17, 0.011837956)]), (10, [(5, 0.08292074), (6, 0.2702401), (10, 0.010763487), (11, 0.039147977), (13, 0.56089103), (17, 0.03516239)]), (11, [(5, 1.1803184), (6, 1.2289463), (10, 0.072587386), (11, 0.1286138), (13, 2.2562687), (15, 0.030974178), (16, 0.025578478), (17, 0.062436875), (19, 0.010315418)]), (12, [(5, 2.6865792), (10, 0.013735286), (11, 0.39467436), (13, 0.86165273), (19, 0.04294668)]), (13, [(5, 0.81406885), (13, 0.18492718)]), (14, [(8, 0.99394715)]), (15, [(5, 0.27748784), (10, 0.03525126), (11, 0.13823411), (13, 0.54791397)]), (16, [(6, 0.99653596)]), (17, [(5, 0.29293177), (11, 0.027939592), (13, 0.6416563), (19, 0.037042562)]), (18, [(11, 0.6325371), (13, 0.3222076), (19, 0.043747585)]), (19, [(5, 0.9973073)]), (20, [(5, 1.66975), (13, 0.3300581)]), (21, [(6, 0.23515475), (11, 0.22100899), (13, 0.48247397), (15, 0.028825004), (19, 0.028546877)]), (22, [(6, 0.06745948), (10, 0.04103736), (11, 0.051079858), (13, 0.62590384), (15, 0.059846856), (17, 0.11718548), (19, 0.035339724)]), (23, [(5, 0.45226714), (11, 0.3453865), (13, 0.17784867), (15, 0.012043779), (16, 0.012286034)]), (24, [(5, 1.6629748), (6, 0.40630534), (13, 0.8110439), (15, 0.10047675), (19, 0.014751689)]), (25, [(5, 0.9039499), (6, 2.2460606), (13, 0.84837)]), (26, [(5, 0.27978677), (6, 1.6560999), (13, 0.05006357), (15, 0.011488569)]), (27, [(6, 0.2689592), (11, 0.7294827)]), (28, [(5, 0.32642248), (11, 0.01570714), (13, 0.5786019), (15, 0.031102525), (17, 0.03735402), (19, 0.010725663)]), (29, [(10, 0.2839394), (13, 0.09658243), (17, 0.61893487)]), (30, [(5, 0.033488367), (6, 0.32328427), (10, 0.03210918), (13, 0.5788379), (15, 0.028409567)]), (31, [(5, 0.23556039), (6, 0.6963146), (11, 0.018590989), (13, 0.04946159)]), (32, [(5, 1.55593), (11, 0.44189468)]), (33, [(5, 0.999542)]), (34, [(5, 0.06774567), (6, 0.026741652), (10, 0.10180665), (11, 0.03999391), (13, 0.7012891), (16, 0.011515695), (17, 0.037500005)]), (35, [(5, 0.74752474), (11, 0.031898193), (13, 0.036603693), (17, 0.18352643)]), (36, [(5, 0.9991604)]), (37, []), (38, [(5, 0.10876127), (6, 0.6256257), (11, 0.046303306), (13, 0.21748954)]), (39, [(8, 1.6055592), (13, 14.393496)]), (40, [(8, 0.11110507), (13, 0.8872032)]), (41, [(5, 0.3278241), (10, 0.57416123), (13, 0.09722562)]), (42, [(5, 4.0781856), (6, 0.0147080785), (11, 0.029067446), (13, 13.869116)]), (43, []), (44, [(6, 0.96126324), (16, 0.02429104), (19, 0.011483919)]), (45, [(5, 0.5638497), (6, 0.33563486), (11, 0.093593456)]), (46, [(5, 0.6678608), (13, 0.3220007)]), (47, [(5, 0.8983715), (11, 0.024045162), (19, 0.07151932)]), (48, [(13, 2.2934313), (15, 0.7017795)]), (49, [(13, 0.6742898), (15, 0.3240861)]), (50, [(5, 0.98399025)]), (51, [(5, 0.55043167), (17, 0.44937566)]), (52, []), (53, [(2, 0.018934213), (5, 0.023143088), (6, 0.060607478), (13, 0.8957889)]), (54, []), (55, [(6, 0.9087631), (10, 0.021585837), (11, 0.4411331), (13, 0.61902004)]), (56, [(5, 1.7933416), (13, 5.20598)]), (57, [(11, 0.9943286)]), (58, [(15, 0.79481506)]), (59, [(5, 1.9571952), (15, 0.027285596), (16, 0.01508758)]), (60, [(6, 0.99718803)]), (61, [(5, 0.68004125), (6, 1.5184997), (10, 0.017957926), (11, 0.07278491), (13, 0.68644667), (17, 0.022267561)]), (62, [(8, 0.092339344), (10, 0.032022662), (13, 0.38628605), (15, 0.0624752), (17, 0.42331445)]), (63, [(8, 0.9978272)]), (64, [(6, 0.5092547), (11, 0.033407502), (13, 3.0600002), (15, 0.21058686), (19, 0.18120919)]), (65, [(6, 0.9928961)]), (66, [(5, 0.7713471), (13, 0.22808385)]), (67, [(5, 0.1877071), (13, 0.51865894), (17, 0.29328796)]), (68, [(5, 0.23156661), (6, 0.34173933), (10, 0.020027157), (11, 0.10749307), (13, 0.25148773), (19, 0.039078534)]), (69, [(5, 0.5765264), (13, 0.38200068), (17, 0.041211095)]), (70, [(5, 0.64642644), (10, 0.028461495), (11, 0.15770131), (13, 1.0490456), (15, 0.10653486), (17, 0.01148939)]), (71, [(6, 0.30732286), (10, 0.020098355), (11, 0.12924197), (13, 0.42746255), (15, 0.082123965), (19, 0.0326494)]), (72, [(10, 0.06666289), (11, 0.074706614), (13, 0.02957316), (15, 0.09477477), (16, 0.027797878), (17, 0.62801933), (19, 0.077810526)]), (73, [(5, 0.33678973), (6, 0.6623889)]), (74, [(5, 0.4380288), (11, 0.01636293), (13, 0.13888715), (17, 0.4012021)]), (75, []), (76, [(5, 0.3799037), (6, 0.035008635), (10, 0.14057027), (11, 2.1272829), (13, 0.9193346), (17, 0.36919028), (19, 0.022129333)]), (77, [(13, 0.9991247)]), (78, [(5, 1.474765), (6, 0.18323798), (10, 0.06402798), , (152, []), (153, [(5, 1.5941594), (6, 0.18276767), (8, 0.19326493), (13, 0.020113932)]), (154, [(5, 0.6979168), (6, 0.07696864), (11, 0.047029614), (13, 0.14639422), (15, 0.031047199)]), (155, [(5, 0.36604568), (6, 0.55187136), (10, 0.020418452), (11, 0.32162476), (13, 0.7396605)]), (156, [(5, 0.24038687), (6, 0.5832998), (11, 0.04140416), (13, 0.13416393)]), (157, [(5, 0.045482207), (6, 0.95414346)]), (158, [(5, 0.38259345), (6, 1.0526536), (11, 0.026666924), (13, 0.537109)]), (159, [(10, 0.20570913), (17, 0.77071345)]), (160, [(6, 0.6524595), (10, 0.010792802), (13, 0.28757605), (15, 0.04094646)]), (161, [(5, 2.0589871), (10, 0.12468771), (13, 0.36602888), (17, 1.4495771)]), (162, [(10, 0.83827), (13, 1.5094748), (17, 0.648649)]), (163, [(5, 0.06295568), (6, 0.120614536), (10, 0.016728334), (13, 0.71049577), (15, 0.039553523), (17, 0.0468485)]), (164, [(5, 1.1376314), (6, 0.09673809), (11, 0.70456254), (13, 0.36210164), (15, 0.02989925), (17, 0.6472362)]), (165, [(6, 0.526527), (13, 0.4723202)]), (166, [(5, 0.5426562), (6, 0.31422392), (13, 0.13896792)]), (167, [(5, 0.99809223)]), (168, [(5, 1.9922048)]), (169, [(8, 0.9059666), (13, 1.0862237)]), (170, [(13, 0.57812846), (15, 0.05317868), (17, 0.35934457)]), (171, [(5, 0.5823431), (11, 0.063469894), (13, 0.30021068), (15, 0.030986404), (19, 0.020744948)]), (172, [(5, 0.31448206), (6, 0.2913496), (13, 0.3773734), (15, 0.014470374)]), (173, [(5, 0.99857277)]), (174, [(5, 0.39143753), (10, 0.010697413), (11, 0.34335187), (13, 0.21986036), (15, 0.020828338)]), (175, [(6, 0.014833884), (11, 0.040551532), (13, 0.8129229), (15, 0.017847287), (17, 0.11353503)]), (176, [(5, 0.9076623), (11, 0.13005015), (13, 0.96164423)]), (177, [(5, 0.36702418), (6, 0.500949), (13, 0.13109966)]), (178, [(6, 0.23344707), (13, 0.7657816)]), (179, [(17, 0.99329185)]), (180, [(8, 0.96392804), (19, 0.034365065)]), (181, []), (182, [(5, 0.4440022), (6, 0.40883103), (13, 0.13924591)]), (183, [(5, 0.9996914)]), (184, [(5, 1.0638502), (6, 1.0610642), (8, 0.33454508), (10, 0.04025773), (11, 0.25050408), (13, 2.1360524), (15, 0.029602587), (17, 0.07840062)]), (185, [(6, 1.1214322), (11, 0.2584127), (13, 0.61718404)]), (186, [(5, 0.36601546), (6, 0.034629893), (11, 0.022759123), (13, 0.47535434), (15, 0.055777993), (17, 0.041516583)]), (187, [(5, 0.7087368), (6, 0.12540294), (11, 0.088850796), (13, 0.06893186)]), (188, [(5, 0.021329494), (6, 0.5414871), (11, 0.43284756)]), (189, [(5, 0.312698), (10, 0.1022781), (13, 0.13576092), (17, 0.44829604)]), (190, [(11, 0.8761378), (15, 0.08133569), (19, 0.04094883)]), (191, [(5, 0.99900436)]), (192, [(5, 0.42474878), (6, 0.572349)]), (193, [(5, 0.032193925), (11, 0.12152363), (13, 0.68100035), (17, 0.15653577)]), (194, [(5, 0.44289756), (11, 0.19419241), (13, 0.36238703)]), (195, [(5, 0.61203754), (6, 1.1338954), (10, 0.027556242), (11, 0.012187406), (13, 1.1299992), (15, 0.026951024), (17, 0.047400687)]), (196, [(5, 0.81643176), (15, 0.14716317), (19, 0.036043458)]), (197, [(11, 0.17854498), (13, 0.8136139)]), (198, [(11, 1.4906676), (13, 0.07986825), (15, 0.39435294), (19, 0.034823406)]), (199, [(13, 0.96775484), (19, 0.022879016)]), (200, [(5, 0.50008476), (13, 0.13684608), (17, 1.3437235), (19, 0.019204019)]), (201, [(5, 0.28973544), (6, 0.6771141), (19, 0.030683247)]), (202, [(5, 0.72740936), (6, 0.70894116), (11, 0.08029629), (13, 0.46888885)]), (203, [(5, 0.9990135)]), (204, [(13, 0.10984679), (17, 0.8860627)]), (205, []), (206, [(5, 2.1814685), (10, 0.010261451), (13, 0.10968065), (17, 0.6919027)]), (207, [(5, 1.6329234), (11, 0.16604121), (13, 0.09986477), (19, 0.09936662)]), (208, []), (209, []), (210, [(6, 0.98902684), (10, 0.010309203)]), (211, [(5, 0.8766025), (10, 0.025843045), (11, 0.15526077), (13, 0.9418555)]), (212, []), (213, [(13, 0.99704975)]), (214, [(5, 0.99885416)]), (215, [(5, 0.21299508), (6, 0.7658666), (10, 0.020584367)]), (216, [(6, 0.6826777), (10, 0.012727588), (11, 0.2534499), (13, 0.034274943), (16, 0.012877893)]), (217, [(5, 0.11966399), (6, 1.3196429), (10, 0.020960262), (11, 0.015896216), (13, 0.50265044), (15, 0.020670826)]), (218, [(5, 2.2757893), (6, 1.3534186), (10, 0.029892182), (11, 0.13136972), (13, 0.20872839)]), (219, [(5, 0.3468926), (6, 0.6506149)]), (220, [(5, 1.8485144), (13, 0.14501871)]), (221, [(5, 0.25912252), (6, 0.17810631), (8, 0.24370039), (10, 0.037707243), (11, 0.20553175), (13, 0.07483299)]), (222, [(5, 0.7876651), (13, 2.1635923), (16, 0.015844675), (19, 0.028262476)]), (223, [(5, 1.0295696), (6, 1.5752087), (10, 0.07156202), (11, 0.2412992), (13, 2.0537336), (16, 0.012706131)]), (224, []), (225, [(13, 0.9998467)]), (226, [(5, 0.19591874), (6, 1.0906304), (10, 0.1905979), (11, 0.105449244), (13, 3.1307368), (15, 0.06470487), (17, 0.20054035), (19, 0.018632406)]), (227, [(6, 0.9987916)]), (228, [(5, 0.97771984), (16, 0.0218728)]), (229, [(5, 0.99859655)]), (230, [(5, 0.6475402), (6, 0.32668734), (11, 0.02392782)]), (231, [(6, 0.1344872), (11, 1.1646075), (13, 0.7007387)]), (232, [(8, 0.10104955), (10, 0.030233635), (11, 0.06184386), (13, 0.59889644), (15, 0.046360116), (17, 0.13581054), (19, 0.025584122)]), (233, [(5, 0.40037483), (6, 0.14609358), (10, 0.027402246), (13, 0.41244912)]), (234, [(8, 0.019712333), (19, 0.9443963)]), (235, [(5, 0.10418052), (6, 0.6320547), (11, 0.2169608), (13, 0.033399854), (19, 0.010283742)]), (236, [(5, 0.9959411)]), (237, [(5, 0.23990116), (6, 1.7178776), (10, 0.034481186)]), (238, [(8, 0.14724466), (13, 1.850922)]), (239, []), (240, [(6, 0.8977852), (15, 0.09624253)]), (241, [(6, 0.025402123), (8, 0.90599746), (11, 0.06850036)]), (242, [(5, 0.547783), (6, 0.28439674), (10, 0.014434091), (13, 0.13190828), (15, 0.0142463455)]), (243, [(6, 0.13199379), (10, 0.057886217), (11, 0.044038814), (13, 0.7375677), (16, 0.01539999), (19, 0.012251117)]), (244, [(17, 0.9888149)]), (245, [(5, 1.2593493), (8, 0.12706786), (11, 0.2334989), (13, 0.3034959), (17, 0.07299953)]), (246, []), (247, [(8, 0.018343199), (10, 0.069553435), (11, 0.3669271), (13, 0.34456688), (17, 0.19741665)]), (248, [(5, 0.5976373), (10, 0.018292593), (13, 0.38294467)]), (249, [(5, 0.18479073), (6, 0.6413081), (11, 0.38013488), (13, 0.35495862), (15, 0.03424214), (17, 0.40325254)]), (250, [(6, 0.08034917), (11, 1.9176077)]), (251, [(5, 0.20393085), (6, 0.78029704), (11, 0.014970283)]), (252, [(5, 0.64575917), (13, 0.022136863), (15, 0.0187605), (17, 0.30664405)]), (253, [(5, 1.442483), (11, 0.16993758), (13, 0.83663845), (17, 0.5453098)]), (254, [(5, 0.35664973), (11, 0.14691326), (13, 0.19329375), (17, 0.2940971)]), (255, []), (256, [(5, 0.41977447), (6, 0.117519066), (11, 0.060630452), (13, 0.39950612)]), (257, [(5, 1.1185334), (6, 0.6945151), (13, 0.18598245)]), (258, [(6, 0.7178949), (17, 0.27780265)]), (259, [(5, 0.2315536), (6, 1.2198883), (11, 0.3824548), (13, 0.15637466)]), (260, [(5, 0.7238699), (6, 4.682581), (11, 0.40098134), (13, 1.1820863)]), (261, [(6, 0.95957875), (11, 0.040013768)]), (262, [(5, 1.015397), (10, 0.06664863), (13, 0.9090345)]), (263, [(5, 0.47510156), (6, 0.49131647), (11, 0.03326682)]), (264, [(5, 0.2209457), (6, 0.44723123), (11, 0.21056525), (13, 0.0959901), (15, 0.020397339)]), (265, [(13, 1.9956943)]), (266, [(6, 0.3809251), (10, 0.014788828), (13, 0.38318476), (17, 0.22062777)]), (267, [(5, 0.029857425), (6, 0.9402752), (10, 0.02590593)]), (268, [(5, 0.64380306), (13, 0.34743822)]), (269, [(5, 0.11750559), (6, 0.8823499)]), (270, [(6, 0.44937772), (11, 0.5390605), (15, 0.011173154)]), (271, [(11, 0.2647513), (13, 0.7337882)]), (272, [(5, 0.26731408), (10, 0.030186307), (13, 1.643601), (17, 0.04851736)]), (273, [(11, 0.25765947), (13, 0.6533741), (19, 0.08843155)]), (274, [(5, 2.0963056), (6, 0.017009757), (13, 1.8549889), (19, 0.029400624)]), (275, [(5, 0.27814716), (13, 4.7212873)]), (276, [(5, 3.3794577), (6, 0.35977948), (13, 2.2527797)]), (277, [(10, 0.022175033), (11, 0.40720117), (13, 2.1680253), (17, 0.40236053)]), (278, [(11, 0.09760688), (13, 0.8362114), (15, 0.020611858), (19, 0.040899552)]), (279, [(5, 0.50572246), (10, 0.025718477), (11, 0.039611906), (13, 1.3801457), (19, 0.04631573)]), (280, [(5, 0.14597909), (11, 0.8513083)]), (281, [(5, 0.48851472), (8, 0.5101291)]), (282, []), (283, [(5, 1.9939348), (6, 0.44371378), (11, 0.103731446), (13, 0.45728233)]), (284, [(5, 1.4199508), (10, 0.03615384), (17, 0.5426531)]), (285, [(5, 0.8740896), (6, 0.12192376)]), (286, [(5, 0.40902787), (6, 1.3055351), (8, 0.21426035), (11, 0.011262591), (13, 0.047578946), (16, 0.010891321)]), (287, [(5, 1.8233716), (8, 0.17567201)]), (288, [(5, 0.09208407), (6, 0.6540262), (10, 0.015362142), (11, 0.03480859), (13, 0.10554817), (17, 0.09803497)]), (289, [(13, 0.8756386), (15, 0.11996283)]), (290, [(8, 0.9914311)]), (291, [(5, 0.52997047), (6, 0.32427138), (10, 0.02762359), (13, 0.117458716)]), (292, [(5, 0.121682875), (6, 0.9736432), (13, 0.8890763)]), (293, [(5, 0.95656765), (13, 1.7425864), (17, 0.2989374)]), (294, []), (295, [(11, 0.26136807), (13, 0.7278068)]), (296, [(5, 0.7181937), (6, 2.721199), (10, 0.016427245), (11, 0.08950079), (13, 0.41189432), (15, 0.032551423)]), (297, [(5, 0.542341), (6, 0.46200514), (11, 0.32819828), (13, 0.60858375), (19, 0.05845866)]), (298, [(6, 0.60774845), (8, 0.058460683), (11, 0.020373259), (13, 0.3131928)]), (299, [(5, 2.1014476), (8, 1.6672884), (13, 0.23035948)]), (300, [(11, 0.017025072), (13, 0.9675423)]), (301, [(13, 0.9309519), (17, 0.06890718)]), (302, [(5, 1.3083501), (6, 0.40939847), (11, 0.04729645), (13, 0.23426363)]), (303, [(8, 0.9910873)]), (304, [(6, 0.362415), (11, 0.05419311), (13, 0.5763127)]), (305, [(5, 0.99957144)]), (306, [(6, 0.8227331), (10, 0.067680344), (11, 0.10437946)]), (307, [(5, 1.2720524), (6, 0.61079097), (11, 0.01399885), (13, 0.082511365)]), (308, [(13, 0.99104434)]), (309, [(15, 0.017685179), (17, 1.9443939), (19, 0.03626969)]), (310, [(5, 0.9182231), (13, 0.081057556)]), (311, [(5, 0.27469856), (11, 0.077621914), (13, 0.5723921), (17, 0.06384972)]), (312, [(11, 0.9375039), (15, 0.052889265)]), (313, [(5, 0.26359212), (13, 0.70027715), (19, 0.034536064)]), (314, [(5, 0.2868577), (6, 0.33469313), (10, 0.013444621), (11, 0.028426826), (13, 0.333877)]), (315, [(5, 0.23929082), (8, 0.24916436), (10, 0.053220075), (11, 0.3889066), (13, 0.050798014), (17, 0.011353827)]), (316, [(13, 0.857545), (17, 0.14135489)]), (317, [(5, 0.29502746), (6, 0.5368782), (11, 0.08066518), (13, 0.0864927)]), (318, [(8, 0.85062176), (11, 0.5525968), (13, 0.79487807), (17, 0.80160546)]), (319, [(5, 5.760585), (8, 0.20784947), (10, 0.06649702), (13, 0.8233435), (17, 0.14133)]), (320, []), (321, [(15, 0.8437948)]), (322, [(5, 0.08627563), (6, 0.8453305), (10, 0.027926847), (11, 0.03973386)]), (323, [(5, 1.7549213), (13, 0.24465045)]), (324, []), (325, [(5, 0.23698263), (17, 0.76168483)]), (326, [(5, 0.9412229), (8, 0.015167902), (10, 0.06843942), (13, 0.82948655), (17, 0.13536783)]), (327, [(5, 0.5699474), (6, 2.3699284), (10, 0.016917715), (11, 0.061381046), (13, 1.91574), (15, 0.05792646)]), (328, [(8, 0.9131497), (9, 0.048321426)]), (329, []), (330, [(5, 0.80192566), (17, 0.19617847)]), (331, [(13, 5.362196), (15, 0.12975213), (17, 3.5078437)]), (332, [(5, 0.5936004), (10, 0.021148894), (13, 0.044429537), (17, 0.3357255)]), (333, [(5, 0.056643162), (6, 0.9235007), (13, 0.019360073)]), (334, [(5, 0.23667754), (6, 1.7632058)]), (335, [(5, 0.2927878), (6, 0.6749614), (10, 0.03172394)]), (336, [(5, 0.037975095), (6, 0.24890976), (10, 0.01476361), (13, 0.5511085), (15, 0.034392715), (17, 0.103100955)]), (337, [(5, 0.2744202), (8, 0.56620514), (13, 7.1583757)]), (338, [(11, 0.99053866)]), (339, [(5, 0.9992218)]), (340, [(5, 0.9999391)]), (341, [(5, 0.39050665), (6, 0.46527576), (11, 0.03496145), (13, 0.108623475)]), (342, [(5, 0.61833644), (10, 0.015222529), (13, 0.10572725), (17, 0.2338722), (19, 0.019023256)]), (343, [(15, 0.928358)]), (344, [(8, 1.199619), (13, 0.799196)]), (345, [(13, 0.9962762)]), (346, [(16, 0.9200263)]), (347, [(5, 2.1576533), (6, 0.08436395), (13, 0.95397663), (17, 0.7777478), (19, 0.014683125)]), (348, [(15, 1.8959556)]), (349, [(5, 0.30803165), (17, 0.691142)]), (350, [(19, 0.6248793)]), (351, [(5, 3.3922043), (10, 0.05677398), (13, 0.19132452), (17, 0.35881528)]), (352, [(5, 1.1335676), (6, 0.4307446), (13, 0.41215315), (15, 0.012671193)]), (353, [(6, 0.99875885)]), (354, [(5, 0.3504603), (6, 0.30245423), (10, 0.014050029), (11, 0.038248967), (13, 0.2535254), (15, 0.02151125), (17, 0.014715961)]), (355, [(6, 0.9978003)]), (356, [(8, 0.6938963), (11, 0.20359914), (13, 0.092883095)]), (357, [(5, 0.3649056), (6, 0.63446707)]), (358, [(5, 0.30298933), (6, 0.31410143), (13, 0.34828243), (15, 0.024025684)]), (359, [(2, 0.01208612), (5, 0.49561238), (6, 0.44600818), (13, 3.842865), (15, 0.0686453), (17, 0.13172367)]), (360, [(5, 1.7383987), (6, 3.6478453), (10, 0.079790324), (11, 2.616148), (13, 3.7626178), (15, 0.11112045), (19, 0.040238842)])])
Your data is borked.
Replacing ), with ),\n, and adding some indentation manually, the first few lines of your input looks like this:
(
[
(5, 0.28596485),
(6, 0.16911359),
(8, 0.030976538),
(10, 0.011954311),
(11, 0.068368636),
(13, 0.34729925),
(15, 0.016343366),
(17, 0.05741114)
],
[
(0, [6, 11]),
(1, [5]),
(2, [5]),
(3, [6, 5]),
a little later you have
[
(0, [(6, 0.5968742), (11, 0.39671573)]),
(1, [(5, 0.9998293)]),
(2, [(5, 0.99847865)]),
(3, [(5, 0.4248721),
so x[1] seems like it could be a tuple, a list, or a list of tuples..?
Maybe try a sanity check:
print({type(x[1]) for x in row})
(hint: it's better if you edit your question with code, since the comments destroy any formatting)
Since all the x[1] are tuples you probably have a situation similar to:
>>> lst = [
... (1, 2),
... (1, (2,3)),
... ]
>>> print({type(x) for x in lst})
{<class 'tuple'>}
>>> sorted(lst)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: '<' not supported between instances of 'tuple' and 'int'
>>>
since tuple comparisons compare each individual item. That will definitely be a bit more challenging to track down... maybe:
def get_tuple_type(tpl):
"Return a tuple of the types of individual tuple elements."
return tuple([type(item) for item in tpl])
types = {get_tuple_type(row[0][1])} # initialize with the type of the first element
for x in row:
xtype = get_tuple_type(x[1])
if xtype not in types:
print(f'offending value: {x!r} has type {xtype!r} - not any of {types!r}')
if you find compatible (but not strictly equal) types you can manually copy them into the types set and run again until you find the problem value.