Related
I'm using GeneticAlgorithm to select the features.
So I used EvolutionaryFS library
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, BatchNormalization, Activation
from tensorflow.python.keras.utils import np_utils
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from EvolutionaryFS import GeneticAlgorithmFS
seed = 0
np.random.seed(seed)
df = pd.read_csv("/content/drive/MyDrive/RT_predict/Urine_DnS/Dataset/0607/0607Dragon_0607edit.csv")
dataset = df.values
X = dataset[:,0:-1]
Y = dataset[:,-1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=seed)
input_dim = X.shape[1]
def build_model(n1_neurons=1000, n2_neurons=500):
model = keras.models.Sequential()
model.add(keras.layers.InputLayer(input_shape=input_dim))
model.add(keras.layers.Dense(n1_neurons, activation="relu"))
model.add(keras.layers.Dense(n2_neurons, activation="relu"))
model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'mse'])
return model
data_dict={0:{'x_train':X_train,'y_train':Y_train,'x_test':X_test,'y_test':Y_test}}
columns_list=list(df.columns)
model_object=build_model
evoObj=GeneticAlgorithmFS(model=model_object,data_dict=data_dict,cost_function='mean_squared_error',average='',cost_function_improvement='decrease',columns_list=columns_list,generations=100,population=50,prob_crossover=0.9,prob_mutation=0.1,run_time=60000)
best_columns=evoObj.GetBestFeatures()
print(best_columns)
and I got error like this:
IndexError Traceback (most recent call
last) <ipython-input-20-33e6ab735f97> in <module>()
47 model_object=build_model
48 evoObj=GeneticAlgorithmFS(model=model_object,data_dict=data_dict,cost_function='mean_squared_error',average='',cost_function_improvement='decrease',columns_list=columns_list,generations=100,population=50,prob_crossover=0.9,prob_mutation=0.1,run_time=60000)
---> 49 best_columns=evoObj.GetBestFeatures()
50 print(best_columns)
2 frames /usr/local/lib/python3.7/dist-packages/EvolutionaryFS.py in
_getCost(self, population_array)
95 for i in self.data_dict.keys():
96
---> 97 x_train=self.data_dict[i]['x_train'][columns_list]
98 y_train=self.data_dict[i]['y_train']
99
IndexError: only integers, slices (`:`), ellipsis (`...`),
numpy.newaxis (`None`) and integer or boolean arrays are valid indices
I think there is a problem about dataset, but I can't solve this problem.
Edited at July 6th.
I did advise that StatguyUser suggested, and I got this error message when I inactive
best_columns=evoObj.GetBestFeatures()
print(best_columns)
['Unnamed: 0', 'MW', 'Sv', 'Se', 'Sp', ..., 'ALOGP', 'Normalized RT (min)']
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-12-a63bc4c481bb> in <module>()
46 print(columns_list)
47
---> 48 print(data_dict[0]['x_train'][columns_list].shape)
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
Edited at July 26th.
I did advise that StatguyUser suggested, but it not works.
My error message is like this
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477', '478', '479', '480', '481', '482', '483', '484', '485', '486', '487', '488', '489', '490', '491', '492', '493', '494', '495', '496', '497', '498', '499', '500', '501', '502', '503', '504', '505', '506', '507', '508', '509', '510', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527', '528', '529', '530', '531', '532', '533', '534', '535', '536', '537', '538', '539', '540', '541', '542', '543', '544', '545', '546', '547', '548', '549', '550', '551', '552', '553', '554', '555', '556', '557', '558', '559', '560', '561', '562', '563', '564', '565', '566', '567', '568', '569', '570', '571', '572', '573', '574', '575', '576', '577', '578', '579', '580', '581', '582', '583', '584', '585', '586', '587', '588', '589', '590', '591', '592', '593', '594', '595', '596', '597', '598', '599', '600', '601', '602', '603', '604', '605', '606', '607', '608', '609', '610', '611', '612', '613', '614', '615', '616', '617', '618', '619', '620', '621', '622', '623', '624', '625', '626', '627', '628', '629', '630', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640', '641', '642', '643', '644', '645', '646', '647', '648', '649', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '660', '661', '662', '663', '664', '665', '666', '667', '668', '669', '670', '671', '672', '673', '674', '675', '676', '677', '678', '679', '680', '681', '682', '683', '684', '685', '686', '687', '688', '689', '690', '691', '692', '693', '694', '695', '696', '697', '698', '699', '700', '701', '702', '703', '704', '705', '706', '707', '708', '709', '710', '711', '712', '713', '714', '715', '716', '717', '718', '719', '720', '721', '722', '723', '724', '725', '726', '727', '728', '729', '730', '731', '732', '733', '734', '735', '736', '737', '738', '739', '740', '741', '742', '743', '744', '745', '746', '747', '748', '749', '750', '751', '752', '753', '754', '755', '756', '757', '758', '759', '760', '761', '762', '763', '764', '765', '766', '767', '768', '769', '770', '771', '772', '773', '774', '775', '776', '777', '778', '779', '780', '781', '782', '783', '784', '785', '786', '787', '788', '789', '790', '791', '792', '793', '794', '795', '796', '797', '798', '799', '800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '813', '814', '815', '816', '817', '818', '819', '820', '821', '822', '823', '824', '825', '826', '827', '828', '829', '830', '831', '832', '833', '834', '835', '836', '837', '838', '839', '840', '841', '842', '843', '844', '845', '846', '847', '848', '849', '850', '851', '852', '853', '854', '855', '856', '857', '858', '859', '860', '861', '862', '863', '864', '865', '866', '867', '868', '869', '870', '871', '872', '873', '874', '875', '876', '877', '878', '879', '880', '881', '882', '883', '884', '885', '886', '887', '888', '889', '890', '891', '892', '893', '894', '895', '896', '897', '898', '899', '900', '901', '902', '903', '904', '905', '906', '907', '908', '909', '910', '911', '912', '913', '914', '915', '916', '917', '918', '919', '920', '921', '922', '923', '924', '925', '926', '927', '928', '929', '930', '931', '932', '933', '934', '935', '936', '937', '938', '939', '940', '941', '942', '943', '944', '945', '946', '947', '948', '949', '950', '951', '952', '953', '954', '955', '956', '957', '958', '959', '960', '961', '962', '963', '964', '965', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '976', '977', '978', '979', '980', '981', '982', '983', '984', '985', '986', '987', '988', '989', '990', '991', '992', '993', '994', '995', '996', '997', '998', '999', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '1077', '1078', '1079', '1080', '1081', '1082', '1083', '1084', '1085', '1086', '1087', '1088', '1089', '1090', '1091', '1092', '1093', '1094', '1095', '1096', '1097', '1098', '1099', '1100', '1101', '1102', '1103', '1104', '1105', '1106', '1107', '1108', '1109', '1110', '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1119', '1120', '1121', '1122', '1123', '1124', '1125', '1126', '1127', '1128', '1129', '1130', '1131', '1132', '1133', '1134', '1135', '1136', '1137', '1138', '1139', '1140', '1141', '1142', '1143', '1144', '1145', '1146', '1147', '1148', '1149', '1150', '1151', '1152', '1153', '1154', '1155', '1156', '1157', '1158', '1159', '1160', '1161', '1162', '1163', '1164', '1165', '1166', '1167', '1168', '1169', '1170', '1171', '1172', '1173', '1174', '1175', '1176', '1177', '1178', '1179', '1180', '1181', '1182', '1183', '1184', '1185', '1186', '1187', '1188', '1189', '1190', '1191', '1192', '1193', '1194', '1195', '1196', '1197', '1198', '1199', '1200', '1201', '1202', '1203', '1204', '1205', '1206', '1207', '1208', '1209', '1210', '1211', '1212', '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1220', '1221', '1222', '1223', '1224', '1225', '1226', '1227', '1228', '1229', '1230', '1231', '1232', '1233', '1234', '1235', '1236', '1237', '1238', '1239', '1240', '1241', '1242', '1243', '1244', '1245', '1246', '1247', '1248', '1249', '1250', '1251', '1252', '1253', '1254', '1255', '1256', '1257', '1258', '1259', '1260', '1261', '1262', '1263', '1264', '1265', '1266', '1267', '1268', '1269', '1270', '1271', '1272', '1273', '1274', '1275', '1276', '1277', '1278', '1279', '1280', '1281', '1282', '1283', '1284', '1285', '1286', '1287', '1288', '1289', '1290', '1291', '1292', '1293', '1294', '1295', '1296', '1297', '1298', '1299', '1300', '1301', '1302', '1303', '1304', '1305', '1306', '1307', '1308', '1309', '1310', '1311', '1312', '1313', '1314', '1315', '1316', '1317', '1318', '1319', '1320', '1321', '1322', '1323', '1324', '1325', '1326', '1327', '1328', '1329', '1330', '1331', '1332', '1333', '1334', '1335', '1336', '1337', '1338', '1339', '1340', '1341', '1342', '1343', '1344', '1345', '1346', '1347', '1348', '1349', '1350', '1351', '1352', '1353', '1354', '1355', '1356', '1357', '1358', '1359', '1360', '1361', '1362', '1363', '1364', '1365', '1366', '1367', '1368', '1369', '1370', '1371', '1372', '1373', '1374', '1375', '1376', '1377', '1378', '1379', '1380', '1381', '1382', '1383', '1384', '1385', '1386', '1387', '1388', '1389', '1390', '1391', '1392', '1393', '1394', '1395', '1396', '1397', '1398', '1399', '1400', '1401', '1402', '1403', '1404', '1405', '1406', '1407', '1408', '1409', '1410', '1411', '1412', '1413', '1414', '1415', '1416', '1417', '1418', '1419', '1420', '1421', '1422', '1423', '1424', '1425', '1426', '1427', '1428', '1429', '1430', '1431', '1432', '1433', '1434', '1435', '1436', '1437', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1448', '1449', '1450', '1451', '1452', '1453', '1454', '1455', '1456', '1457', '1458', '1459', '1460', '1461', '1462', '1463', '1464', '1465', '1466', '1467', '1468', '1469', '1470', '1471', '1472', '1473', '1474', '1475', '1476', '1477', '1478', '1479', '1480', '1481', '1482', '1483', '1484', '1485', '1486', '1487', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1498', '1499', '1500', '1501', '1502', '1503', '1504', '1505', '1506', '1507', '1508', '1509', '1510', '1511', '1512', '1513', '1514', '1515', '1516', '1517', '1518', '1519', '1520', '1521', '1522', '1523', '1524', '1525', '1526', '1527', '1528', '1529', '1530', '1531', '1532', '1533', '1534', '1535', '1536', '1537', '1538', '1539', '1540', '1541', '1542', '1543', '1544', '1545', '1546', '1547', '1548', '1549', '1550', '1551', '1552', '1553', '1554', '1555', '1556', '1557', '1558', '1559', '1560', '1561', '1562', '1563', '1564', '1565', '1566', '1567', '1568', '1569', '1570', '1571', '1572', '1573', '1574', '1575', '1576', '1577', '1578', '1579', '1580', '1581', '1582', '1583', '1584', '1585', '1586', '1587', '1588', '1589', '1590', '1591', '1592', '1593', '1594', '1595', '1596', '1597', '1598', '1599', '1600', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1617', '1618', '1619', '1620', '1621', '1622', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1633', '1634', '1635', '1636', '1637', '1638', '1639', '1640', '1641', '1642', '1643', '1644', '1645', '1646', '1647', '1648', '1649', '1650', '1651', '1652', '1653', '1654', '1655', '1656', '1657', '1658', '1659', '1660', '1661', '1662', '1663', '1664', '1665', '1666', '1667', '1668', '1669', '1670', '1671', '1672', '1673', '1674', '1675', '1676', '1677', '1678', '1679', '1680', '1681', '1682', '1683', '1684', '1685', '1686', '1687', '1688', '1689', '1690', '1691', '1692', '1693', '1694', '1695', '1696', '1697', '1698', '1699', '1700', '1701', '1702', '1703', '1704', '1705', '1706', '1707', '1708', '1709', '1710', '1711', '1712', '1713', '1714', '1715', '1716', '1717', '1718', '1719', '1720', '1721', '1722', '1723', '1724', '1725', '1726', '1727', '1728', '1729', '1730', '1731', '1732', '1733', '1734', '1735', '1736', '1737', '1738', '1739', '1740', '1741', '1742', '1743', '1744', '1745', '1746', '1747', '1748', '1749', '1750', '1751', '1752', '1753', '1754', '1755', '1756', '1757', '1758', '1759', '1760', '1761', '1762', '1763', '1764', '1765', '1766', '1767', '1768', '1769', '1770', '1771', '1772', '1773', '1774', '1775', '1776', '1777', '1778', '1779', '1780', '1781', '1782', '1783', '1784', '1785', '1786', '1787', '1788', '1789', '1790', '1791', '1792', '1793', '1794', '1795', '1796', '1797', '1798', '1799', '1800', '1801', '1802', '1803', '1804', '1805', '1806', '1807', '1808', '1809', '1810', '1811', '1812', '1813', '1814', '1815', '1816', '1817', '1818', '1819', '1820', '1821', '1822', '1823', '1824', '1825', '1826', '1827', '1828', '1829', '1830', '1831', '1832', '1833', '1834', '1835', '1836', '1837', '1838', '1839', '1840', '1841', '1842', '1843', '1844', '1845', '1846', '1847', '1848', '1849', '1850', '1851', '1852', '1853', '1854', '1855', '1856', '1857', '1858', '1859', '1860', '1861', '1862', '1863', '1864', '1865', '1866', '1867', '1868', '1869', '1870', '1871', '1872']
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-16-03f83ec536c1> in <module>()
46 print(columns_list)
47
---> 48 print(data_dict[0]['x_train'][columns_list].shape)
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
Issue as it seems is because of column name with special characters that pandas is unable to recognize.
Rename column names for 'Unnamed: 0' , both in pandas data frame and Python list. This might not be causing issue, but it's best practice.
Real issue appears to be column names with special characters. To solve this, replace all special characters such as round brackets (, ), percentage %, backward and forward slash \, /, square bracket [, ] from the names of your pandas data frame columns. If you need any special character, then only use space or underscore_ in both your pandas column names and Python list which has these column names. This should solve your issue.
Let me know if you are still facing this issue.
Edit: It appears that issue is because of x_train is numpy matrix and not dataframe
I have a list of 256 data elements. I want to filter this data using elliptical filter.
import matplotlib.pyplot as plt
from scipy.signal import *
import numpy as np
def elliptical_bandpass():
Fs=256
lowcut=5
highcut=30
order=5
Rp = 0.5; # Passband Ripple (dB)
Rs = 30; # Stopband Ripple (dB)
nyq = Fs/2 #Nyquist frequency
wp = lowcut / nyq
ws = highcut / nyq
c3=['221', '262', '333', '429', '522', '592', '630', '656', '668', '645', '581', '486', '395', '324', '265', '214', '172', '171', '214', '282', '353', '420', '498', '584', '650', '679', '661', '622', '571', '503', '415', '316', '240', '200', '185', '188', '204', '256', '344', '443', '527', '582', '627', '665', '676', '644', '567', '481', '404', '337', '271', '204', '168', '175', '218', '277', '340', '419', '513', '599', '653', '662', '649', '622', '578', '506', '407', '317', '252', '213', '188', '173', '194', '258', '352', '445', '517', '578', '632', '671', '672', '626', '561', '491', '422', '341', '254', '188', '165', '184', '224', '271', '337', '424', '522', '598', '638', '652', '653', '637', '585', '497', '397', '314', '258', '215', '180', '172', '202', '272', '352', '427', '502', '579', '649', '680', '664', '615', '555', '498', '424', '335', '251', '195', '180', '187', '212', '258', '338', '442', '533', '594', '628', '649', '661', '640', '579', '490', '402', '332', '266', '206', '164', '166', '216', '285', '357', '425', '501', '584', '644', '669', '655', '624', '580', '509', '414', '311', '236', '202', '190', '191', '207', '258', '345', '441', '521', '577', '626', '667', '676', '643', '567', '483', '407', '334', '261', '194', '162', '176', '222', '280', '342', '422', '517', '603', '654', '662', '650', '626', '579', '505', '404', '315', '252', '213', '187', '173', '196', '262', '352', '442', '513', '580', '642', '679', '674', '622', '553', '483', '413', '336', '254', '196', '177', '191', '221', '260', '328', '422', '524', '603', '640', '655', '656', '637', '583', '492', '397', '319', '263', '217', '176', '168', '204', '278', '361', '436', '509', '583', '645', '672', '656', '616', '565', '507', '425', '325', '238', '188', '179', '190', '213', '260', '338', '440']
n, Wn = ellipord(wp, ws, Rp,Rs)
print('Wn IS ----', Wn)
b,a=ellip(order,Rp,Rs,[wp, ws], btype='band') #get filter coefficients
print('b coeff from filter code -- ',b)
print('a coeff from filter code -- ',a)
c3_filtered=lfilter(b,a,c3)
print('filtered data-',c3_filtered)
print('len of filtered data', len(c3_filtered))
w, h = freqz(b, a, worN=2000) #used to plot the frequency response
plt.figure()
plt.plot((Fs * 0.5 / np.pi) * w, abs(h), label="order = %d" % order)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Gain')
plt.grid(True)
plt.legend(loc='best')
plt.show()
elliptical_bandpass()
When I run this, I see filter design and coefficients to be correct, but I get an error using lfilter
File "C:\Users\gtec\AppData\Local\Programs\Python\Python37-32\lib\site-packages\scipy\signal\signaltools.py", line 1354, in lfilter
return sigtools._linear_filter(b, a, x, axis)
SystemError: returned NULL without setting an error
Previously I was using python2.7 and it executed without any errors. Now I am using Python3.7.0
The problem is that c3 is a list of strings. lfilter expects a sequence of numerical values. It will not automatically convert strings to numbers, so you'll have to convert those strings to numbers in your code before calling lfilter.
Do something like
c3 = [float(t) for t in c3]
before passing c3 to lfilter.
Even better would be to look back at how you actually create c3 in your "real" code (assuming the code in the question is a simplified example). It would make sense to convert the strings to numbers at the point where c3 is created.
(The cryptic error message is a bug in lfilter; you should have gotten a nicer error message. :)
I asked this question once but was very inconsistent in my wording. Here is my full code. I have a dataArray and wish to add numbers within the 5th column but only if within the same row, column 7 has a 0.
#!/usr/bin/python
#Date: 4.24.18
#importing necessary modules
import csv
import collections
import sys
from array import array
#variables for ease of use in script
fileName = 'medicaldata.tsv'
filePath = '/home/pjvaglic/Desktop/scripts/pythonScripts/final/data/'
dataURL = 'http://pages.mtu.edu/~toarney/sat3310/final/'
dataArray = []
sumBeds = 0
count = 0
countFac = 0
sumNSal = 0
sumNSalR = 0
#download file from MTU
downloadFile = urllib2.urlopen(dataURL + fileName)
#opening the file
with open(filePath + fileName, 'w') as output:
output.write(downloadFile.read())
output.close()
#count number of lines in the data file, take off the header, print results to screen
count = open(filePath + fileName).readlines()
print "There are", len(count)-1, "facilities accounted for in", filePath + fileName
#keep track of number of facilities
countFac = len(count)-1
#open data file, put everything in an array, cut everything at the tab delimiter
with open(filePath + fileName, 'rt') as inputfile:
next(inputfile)
dataArray = csv.reader(inputfile, delimiter='\t')
#sum the amount of beds are in the first column
for row in dataArray:
sumBeds += int(row[0])
print "There are ", sumBeds, "in the medical file."
print "There are about", sumBeds/countFac, "beds per facility."
#this line does not work for my purposes.
#list = [[row[4] for row in dataArray if row[6] == '1']]
#print list
Here is the dataArray. The last column has 0's and 1's. I believe they are strings. For example, in the first row it has a 0, so I want to take 5230 and add that to 6304 and then 6590, so forth and so on. Just rows that include a 0 in the last column.
['244', '128', '385', '23521', '5230', '5334', '0']
['59', '155', '203', '9160', '2459', '493', '1']
['120', '281', '392', '21900', '6304', '6115', '0']
['120', '291', '419', '22354', '6590', '6346', '0']
['120', '238', '363', '17421', '5362', '6225', '0']
['65', '180', '234', '10531', '3622', '449', '1']
['120', '306', '372', '22147', '4406', '4998', '1']
['90', '214', '305', '14025', '4173', '966', '1']
['96', '155', '169', '8812', '1955', '1260', '0']
['120', '133', '188', '11729', '3224', '6442', '1']
['62', '148', '192', '8896', '2409', '1236', '0']
['120', '274', '426', '20987', '2066', '3360', '1']
['116', '154', '321', '17655', '5946', '4231', '0']
['59', '120', '164', '7085', '1925', '1280', '1']
['80', '261', '284', '13089', '4166', '1123', '1']
['120', '338', '375', '21453', '5257', '5206', '1']
['80', '77', '133', '7790', '1988', '4443', '1']
['100', '204', '318', '18309', '4156', '4585', '1']
['60', '97', '213', '8872', '1914', '1675', '1']
['110', '178', '280', '17881', '5173', '5686', '1']
['120', '232', '336', '17004', '4630', '907', '0']
['135', '316', '442', '23829', '7489', '3351', '0']
['59', '163', '191', '9424', '2051', '1756', '1']
['60', '96', '202', '12474', '3803', '2123', '0']
['25', '74', '83', '4078', '2008', '4531', '1']
['221', '514', '776', '36029', '1288', '2543', '1']
['64', '91', '214', '8782', '4729', '4446', '1']
['62', '146', '204', '8951', '2367', '1064', '0']
['108', '255', '366', '17446', '5933', '2987', '1']
['62', '144', '220', '6164', '2782', '411', '1']
['90', '151', '286', '2853', '4651', '4197', '0']
['146', '100', '375', '21334', '6857', '1198', '0']
['62', '174', '189', '8082', '2143', '1209', '1']
['30', '54', '88', '3948', '3025', '137', '1']
['79', '213', '278', '11649', '2905', '1279', '0']
['44', '127', '158', '7850', '1498', '1273', '1']
['120', '208', '423', '29035', '6236', '3524', '0']
['100', '255', '300', '17532', '3547', '2561', '1']
['49', '110', '177', '8197', '2810', '3874', '1']
['123', '208', '336', '22555', '6059', '6402', '1']
['82', '114', '136', '8459', '1995', '1911', '1']
['58', '166', '205', '10412', '2245', '1122', '1']
['110', '228', '323', '16661', '4029', '3893', '1']
['62', '183', '222', '12406', '2784', '2212', '1']
['86', '62', '200', '11312', '3720', '2959', '1']
['102', '326', '355', '14499', '3866', '3006', '1']
['135', '157', '471', '24274', '7485', '1344', '0']
['78', '154', '203', '9327', '3672', '1242', '1']
['83', '224', '390', '12362', '3995', '1484', '1']
['60', '48', '213', '10644', '2820', '1154', '0']
['54', '119', '144', '7556', '2088', '245', '1']
['120', '217', '327', '20182', '4432', '6274', '0']
I know there is a short hand way of placing all those numbers within a list and use a sum function to add them up. I'm just not sure of how to go about it.
There are 2 ways. Below I use only an extract of your data.
Setup
We assume you begin with a list of lists of strings.
lst = [['244', '128', '385', '23521', '5230', '5334', '0'],
['59', '155', '203', '9160', '2459', '493', '1'],
['120', '281', '392', '21900', '6304', '6115', '0'],
['120', '291', '419', '22354', '6590', '6346', '0'],
['120', '238', '363', '17421', '5362', '6225', '0'],
['65', '180', '234', '10531', '3622', '449', '1'],
['120', '306', '372', '22147', '4406', '4998', '1'],
['90', '214', '305', '14025', '4173', '966', '1'],
['96', '155', '169', '8812', '1955', '1260', '0']]
Pure Python
A = [[int(i) for i in row] for row in lst]
res = sum(row[4] for row in A if row[6] == 0)
# 25441
Vectorised solution
You can use a 3rd party library such as numpy:
import numpy as np
A = np.array(lst, dtype=int)
res = A[np.where(A[:, 6] == 0), 4].sum()
# 25441
Turn your data file into an array of arrays.
['244', '128', '385', '23521', '5230', '5334', '0']
['59', '155', '203', '9160', '2459', '493', '1']
['120', '281', '392', '21900', '6304', '6115', '0']
Instead:
[['244', '128', '385', '23521', '5230', '5334', '0'],
['59', '155', '203', '9160', '2459', '493', '1'],
['120', '281', '392', '21900', '6304', '6115', '0']]
Then iterate over the elements in the array of arrays looking for the string '0' then adding the element [i][4] to your total sum. You'll need to convert the strings to a number value to add them though, otherwise you'll get one long string of numbers instead of a sum.
var sum = 0;
for (i = 0; while i < dataArray.length; i ++) {
if (dataArray[i][7] === '0') {
var sum += Number(dataArray[i][4])
}
};
At the end of the loop you'll have your total in var sum and can do with it as you please.
Just realized your working in python, my answer is in javascript. Whoops. Might not be the best answer but if you find the python version of the above solution it should get you on the right track. Cheers
This question already has answers here:
flatten list in python
(5 answers)
Closed 6 years ago.
nlist = [[('4698874', '0', '58'), ('838286', '58', '310', '1.01')],('2588097', '368', '179', '1.01'), ('2746740', '547', '342', '1.44'),('3873988', '889', '259', '1.01'), ('808046', '1148', '236', '1.01'), ('2588498', '1384', '158', '1.01'), ('2492893', '1542', '196', '1.02'), ('2168413', '1738', '165', '1.02'), ('1778345', '1903', '448', '1.07'), ('2989691', '2351', '194', '0.99'), [('4698875', '2545', '256'), ('2985955', '2801', '257', '1.54')], [('4698876', '3058', '177'), ('1728736', '3235', '270', '0.96')], ('2615446', '3505', '172', '0.93'),[('4698877', '3677', '177'), ('4698878', '3854', '144'), ('515524', '3998', '134', '1.10')], [('4698879', '4132', '172'), ('4698880', '4304', '98'), ('2444241', '4402', '146', '1.04')], ('4698881', '4548', '-1', '1.00'), ()]
I was wondering is there a one-liner to unlist this non-recursively such that the remaining elements are all tuples.
nlist = [('4698874', '0', '58'), ('838286', '58', '310', '1.01'),('2588097', '368', '179', '1.01'), ('2746740', '547', '342', '1.44'),('3873988', '889', '259', '1.01'), ('808046', '1148', '236', '1.01'), ('2588498', '1384', '158', '1.01'), ('2492893', '1542', '196', '1.02'), ('2168413', '1738', '165', '1.02'), ('1778345', '1903', '448', '1.07'), ('2989691', '2351', '194', '0.99'), ('4698875', '2545', '256'), ('2985955', '2801', '257', '1.54'), ('4698876', '3058', '177'), ('1728736', '3235', '270', '0.96')], ('2615446', '3505', '172', '0.93'),('4698877', '3677', '177'), ('4698878', '3854', '144'), ('515524', '3998', '134', '1.10'), ('4698879', '4132', '172'), ('4698880', '4304', '98'), ('2444241', '4402', '146', '1.04'), ('4698881', '4548', '-1', '1.00')]
Thank you so much for guidance.
Use itertools.chain.
from itertools import chain
def unlist(nlist):
return list(chain(*[[n] if isinstance(n, tuple) else n for n in nlist]))
This question already has answers here:
Split Strings into words with multiple word boundary delimiters
(31 answers)
Closed 9 years ago.
Using .read() to read a file, how would I split on two objects at once? I'm trying to split on commas, and "\n" simultaneously, but when I split on commas first, it turns my string into a list, in which I cannot split again.
Here is the string I'm trying to split:
'States, Total Score, Critical Reading, Mathematics, Writing, Participation (%)\nWashington,1564,524,532,508,41.2000\nNewHampshire,1554,520,524,510,64.0000\nMassachusetts,1547,512,526,509,72.1000\nOregon,1546,523,524,499,37.1000\nVermont,1546,519,512,506,64.0000\nArizona,1544,519,525,500,22.4000\nConnecticut,1536,509,514,513,71.2000\nAlaska,1524,518,515,491,32.7000\nVirginia,1521,512,512,497,56.0000\nCalifornia,1517,501,516,500,37.5000\nNewJersey,1506,495,514,497,69.0000\nMaryland,1502,501,506,495,56.7000\nNorthCarolina,1485,497,511,477,45.5000\nRhodeIsland,1477,494,495,488,60.8000\nIndiana,1476,494,505,477,52.0000\nFlorida,1473,496,498,479,44.7000\nPennsylvania,1473,492,501,480,62.3000\nNevada,1470,496,501,473,25.9000\nDelaware,1469,493,495,481,59.2000\nTexas,1462,484,505,473,41.5000\nNewYork,1461,484,499,478,59.6000\nHawaii,1458,483,505,470,47.1000\nGeorgia,1453,488,490,475,46.5000\nSouthCarolina,1447,484,495,468,40.7000\nMaine,1389,468,467,454,87.1000\nIowa,1798,603,613,582,2.7000\nMinnesota,1781,594,607,580,6.0000\nWisconsin,1778,595,604,579,3.8000\nMissouri,1768,593,595,580,3.6000\nMichigan,1766,585,605,576,3.8000\nSouthDakota,1766,592,603,571,2.0000\nIllinois,1762,585,600,577,4.6700\nKansas,1752,590,595,567,4.7000\nNebraska,1746,585,593,568,3.9000\nNorthDakota,1733,580,594,559,3.4000\nKentucky,1713,575,575,563,5.0000\nTennessee,1712,576,571,565,6.4000\nColorado,1695,568,572,555,14.1000\nArkansas,1684,566,566,552,3.5000\nOklahoma,1684,569,568,547,3.8000\nWyoming,1683,570,567,546,3.6000\nUtah,1674,568,559,547,4.5000\nMississippi,1666,566,548,552,2.2000\nLouisiana,1652,555,550,547,4.0000\nAlabama,1650,556,550,544,5.4000\nNewMexico,1636,553,549,534,7.1000\nOhio,1609,538,548,522,17.2000\nIdaho,1601,543,541,517,14.6000\nMontana,1593,538,538,517,20.0000\nWest Virginia,1522,515,507,500,13.2000\n'
You can use a list comprehension:
>>> strs = 'States, Total Score, Critical Reading, Mathematics, Writing, Participation (%)\nWashington,1564,524,532,508,41.2000\nNewHampshire,1554,520,524,510,64.0000\nMassachusetts,1547,512,526,509,72.1000\nOregon,1546,523,524,499,37.1000\nVermont,1546,519,512,506,64.0000\nArizona,1544,519,525,500,22.4000\nConnecticut,1536,509,514,513,71.2000\nAlaska,1524,518,515,491,32.7000\nVirginia,1521,512,512,497,56.0000\nCalifornia,1517,501,516,500,37.5000\nNewJersey,1506,495,514,497,69.0000\nMaryland,1502,501,506,495,56.7000\nNorthCarolina,1485,497,511,477,45.5000\nRhodeIsland,1477,494,495,488,60.8000\nIndiana,1476,494,505,477,52.0000\nFlorida,1473,496,498,479,44.7000\nPennsylvania,1473,492,501,480,62.3000\nNevada,1470,496,501,473,25.9000\nDelaware,1469,493,495,481,59.2000\nTexas,1462,484,505,473,41.5000\nNewYork,1461,484,499,478,59.6000\nHawaii,1458,483,505,470,47.1000\nGeorgia,1453,488,490,475,46.5000\nSouthCarolina,1447,484,495,468,40.7000\nMaine,1389,468,467,454,87.1000\nIowa,1798,603,613,582,2.7000\nMinnesota,1781,594,607,580,6.0000\nWisconsin,1778,595,604,579,3.8000\nMissouri,1768,593,595,580,3.6000\nMichigan,1766,585,605,576,3.8000\nSouthDakota,1766,592,603,571,2.0000\nIllinois,1762,585,600,577,4.6700\nKansas,1752,590,595,567,4.7000\nNebraska,1746,585,593,568,3.9000\nNorthDakota,1733,580,594,559,3.4000\nKentucky,1713,575,575,563,5.0000\nTennessee,1712,576,571,565,6.4000\nColorado,1695,568,572,555,14.1000\nArkansas,1684,566,566,552,3.5000\nOklahoma,1684,569,568,547,3.8000\nWyoming,1683,570,567,546,3.6000\nUtah,1674,568,559,547,4.5000\nMississippi,1666,566,548,552,2.2000\nLouisiana,1652,555,550,547,4.0000\nAlabama,1650,556,550,544,5.4000\nNewMexico,1636,553,549,534,7.1000\nOhio,1609,538,548,522,17.2000\nIdaho,1601,543,541,517,14.6000\nMontana,1593,538,538,517,20.0000\nWest Virginia,1522,515,507,500,13.2000\n'
>>> [ y for x in strs.splitlines() for y in x.split(",")]
['States', ' Total Score', ' Critical Reading', ' Mathematics', ' Writing', ' Participation (%)', 'Washington', '1564', '524', '532', '508', '41.2000', 'NewHampshire', '1554', '520', '524', '510', '64.0000', 'Massachusetts', '1547', '512', '526', '509', '72.1000', 'Oregon', '1546', '523', '524', '499', '37.1000', 'Vermont', '1546', '519', '512', '506', '64.0000', 'Arizona', '1544', '519', '525', '500', '22.4000', 'Connecticut', '1536', '509', '514', '513', '71.2000', 'Alaska', '1524', '518', '515', '491', '32.7000', 'Virginia', '1521', '512', '512', '497', '56.0000', 'California', '1517', '501', '516', '500', '37.5000', 'NewJersey', '1506', '495', '514', '497', '69.0000', 'Maryland', '1502', '501', '506', '495', '56.7000', 'NorthCarolina', '1485', '497', '511', '477', '45.5000', 'RhodeIsland', '1477', '494', '495', '488', '60.8000', 'Indiana', '1476', '494', '505', '477', '52.0000', 'Florida', '1473', '496', '498', '479', '44.7000', 'Pennsylvania', '1473', '492', '501', '480', '62.3000', 'Nevada', '1470', '496', '501', '473', '25.9000', 'Delaware', '1469', '493', '495', '481', '59.2000', 'Texas', '1462', '484', '505', '473', '41.5000', 'NewYork', '1461', '484', '499', '478', '59.6000', 'Hawaii', '1458', '483', '505', '470', '47.1000', 'Georgia', '1453', '488', '490', '475', '46.5000', 'SouthCarolina', '1447', '484', '495', '468', '40.7000', 'Maine', '1389', '468', '467', '454', '87.1000', 'Iowa', '1798', '603', '613', '582', '2.7000', 'Minnesota', '1781', '594', '607', '580', '6.0000', 'Wisconsin', '1778', '595', '604', '579', '3.8000', 'Missouri', '1768', '593', '595', '580', '3.6000', 'Michigan', '1766', '585', '605', '576', '3.8000', 'SouthDakota', '1766', '592', '603', '571', '2.0000', 'Illinois', '1762', '585', '600', '577', '4.6700', 'Kansas', '1752', '590', '595', '567', '4.7000', 'Nebraska', '1746', '585', '593', '568', '3.9000', 'NorthDakota', '1733', '580', '594', '559', '3.4000', 'Kentucky', '1713', '575', '575', '563', '5.0000', 'Tennessee', '1712', '576', '571', '565', '6.4000', 'Colorado', '1695', '568', '572', '555', '14.1000', 'Arkansas', '1684', '566', '566', '552', '3.5000', 'Oklahoma', '1684', '569', '568', '547', '3.8000', 'Wyoming', '1683', '570', '567', '546', '3.6000', 'Utah', '1674', '568', '559', '547', '4.5000', 'Mississippi', '1666', '566', '548', '552', '2.2000', 'Louisiana', '1652', '555', '550', '547', '4.0000', 'Alabama', '1650', '556', '550', '544', '5.4000', 'NewMexico', '1636', '553', '549', '534', '7.1000', 'Ohio', '1609', '538', '548', '522', '17.2000', 'Idaho', '1601', '543', '541', '517', '14.6000', 'Montana', '1593', '538', '538', '517', '20.0000', 'West Virginia', '1522', '515', '507', '500', '13.2000']
If you want a list of lists containing each line split at ,:
>>> [x.split(",") for x in strs.splitlines()]
[['States', ' Total Score', ' Critical Reading', ' Mathematics', ' Writing', ' Participation (%)'], ['Washington', '1564', '524', '532', '508', '41.2000'], ['NewHampshire', '1554', '520', '524', '510', '64.0000'], ['Massachusetts', '1547', '512', '526', '509', '72.1000'], ['Oregon', '1546', '523', '524', '499', '37.1000'], ['Vermont', '1546', '519', '512', '506', '64.0000'], ['Arizona', '1544', '519', '525', '500', '22.4000'], ['Connecticut', '1536', '509', '514', '513', '71.2000'], ['Alaska', '1524', '518', '515', '491', '32.7000'], ['Virginia', '1521', '512', '512', '497', '56.0000'], ['California', '1517', '501', '516', '500', '37.5000'], ['NewJersey', '1506', '495', '514', '497', '69.0000'], ['Maryland', '1502', '501', '506', '495', '56.7000'], ['NorthCarolina', '1485', '497', '511', '477', '45.5000'], ['RhodeIsland', '1477', '494', '495', '488', '60.8000'], ['Indiana', '1476', '494', '505', '477', '52.0000'], ['Florida', '1473', '496', '498', '479', '44.7000'], ['Pennsylvania', '1473', '492', '501', '480', '62.3000'], ['Nevada', '1470', '496', '501', '473', '25.9000'], ['Delaware', '1469', '493', '495', '481', '59.2000'], ['Texas', '1462', '484', '505', '473', '41.5000'], ['NewYork', '1461', '484', '499', '478', '59.6000'], ['Hawaii', '1458', '483', '505', '470', '47.1000'], ['Georgia', '1453', '488', '490', '475', '46.5000'], ['SouthCarolina', '1447', '484', '495', '468', '40.7000'], ['Maine', '1389', '468', '467', '454', '87.1000'], ['Iowa', '1798', '603', '613', '582', '2.7000'], ['Minnesota', '1781', '594', '607', '580', '6.0000'], ['Wisconsin', '1778', '595', '604', '579', '3.8000'], ['Missouri', '1768', '593', '595', '580', '3.6000'], ['Michigan', '1766', '585', '605', '576', '3.8000'], ['SouthDakota', '1766', '592', '603', '571', '2.0000'], ['Illinois', '1762', '585', '600', '577', '4.6700'], ['Kansas', '1752', '590', '595', '567', '4.7000'], ['Nebraska', '1746', '585', '593', '568', '3.9000'], ['NorthDakota', '1733', '580', '594', '559', '3.4000'], ['Kentucky', '1713', '575', '575', '563', '5.0000'], ['Tennessee', '1712', '576', '571', '565', '6.4000'], ['Colorado', '1695', '568', '572', '555', '14.1000'], ['Arkansas', '1684', '566', '566', '552', '3.5000'], ['Oklahoma', '1684', '569', '568', '547', '3.8000'], ['Wyoming', '1683', '570', '567', '546', '3.6000'], ['Utah', '1674', '568', '559', '547', '4.5000'], ['Mississippi', '1666', '566', '548', '552', '2.2000'], ['Louisiana', '1652', '555', '550', '547', '4.0000'], ['Alabama', '1650', '556', '550', '544', '5.4000'], ['NewMexico', '1636', '553', '549', '534', '7.1000'], ['Ohio', '1609', '538', '548', '522', '17.2000'], ['Idaho', '1601', '543', '541', '517', '14.6000'], ['Montana', '1593', '538', '538', '517', '20.0000'], ['West Virginia', '1522', '515', '507', '500', '13.2000']]
Instead of generating the whole list at once you can use itertools.chain to get elements lazily (Or even better if you iterate over one line at once, prefer #Martijn Pieters's solution in that case):
>>> from itertools import chain
>>> for elem in chain(*(x.split(",") for x in strs.splitlines())):
... print elem
...
States
Total Score
Critical Reading
Mathematics
Writing
Participation (%)
Washington
...
Don't read the whole file in one go, read per line, then split:
with open(filepath) as f:
for line in f:
print line.strip().split(',')
You could also first split on newlines, then loop and split on commas:
lines = [line.split(',') for line in somestring.splitlines()]
But for comma-separated files, your best bet is to use the csv module:
import csv
with open(filepath, 'rb') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
print row
This gives you the rows as:
['States', ' Total Score', ' Critical Reading', ' Mathematics', ' Writing', ' Participation (%)']
['Washington', '1564', '524', '532', '508', '41.2000']
['NewHampshire', '1554', '520', '524', '510', '64.0000']
Since you have a first row with headers, you could use a DictReader as well and get dictionaries mapping headers to values:
with open(filepath, 'rb') as f:
reader = csv.DictReader(f, delimiter=',')
for row in reader:
print row
# address columns as: row['States'], row['Total Score']
which outputs rows as:
{' Writing': '508', ' Total Score': '1564', ' Critical Reading': '524', 'States': 'Washington', ' Mathematics': '532', ' Participation (%)': '41.2000'}
there's re.split for multiple characters split:
import re
re.split("\n| ","this is\na short\ntest...")
>>> ['this', 'is', 'a', 'short', 'test...']
you could use the split() from the re function where you are able to define a regex for the splitting
look at this: python split string based on regular expression