Python + convert gpx to csv - python
I have a list of .gpx files that I would like to convert to csv using python.
I have the following
#Set the working directory to the INDIR variable
os.chdir(INDIR)
def parsegpx(f):
#Parse a GPX file into a list of dictoinaries.
#Each dict is one row of the final dataset
points2 = []
with open(f, 'r') as gpxfile:
# print f
gpx = gpxpy.parse(gpxfile)
for track in gpx.tracks:
for segment in track.segments:
for point in segment.points:
dict = {'Timestamp' : point.time,
'Latitude' : point.latitude,
'Longitude' : point.longitude,
'Elevation' : point.elevation
}
points2.append(dict)
return points2
#Parse the gpx files into a pandas dataframe
dirs = os.listdir(INDIR)
df2 = pd.concat([pd.DataFrame(parsegpx(f)) for f in dirs], keys=files)
However, if I run the code, I get the following error:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 3131: invalid start byte
Can anyone help me with that? I tried to add encoding latin-1 to with open but it does not work.
Thanks,
Fede
Could we see the gpx file, where the errors occurs? I tried the file https://github.com/tkrajina/gpxpy/blob/master/test_files/cerknicko-jezero-no-creator.gpx with your codesnippet
import gpxpy
def parsegpx():
points2 = list()
with open('cerknicko-jezero-no-creator.gpx','r') as gpxfile:
gpx = gpxpy.parse(gpxfile)
for track in gpx.tracks:
for segment in track.segments:
for point in segment.points:
dict = {'Timestamp': point.time,
'Latitude': point.latitude,
'Longitude': point.longitude,
'Elevation': point.elevation
}
points2.append(dict)
print(points2)
if __name__ == '__main__':
parsegpx()
and get correct results:
[{'Timestamp': datetime.datetime(2010, 8, 5, 14, 23, 59), 'Elevation': 542.320923, 'Latitude': 45.772175035, 'Longitude': 14.357659249}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 25, 8), 'Elevation': 550.972656, 'Latitude': 45.772089791, 'Longitude': 14.357567383}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 26, 36), 'Elevation': 553.856689, 'Latitude': 45.772063639, 'Longitude': 14.357461184}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 26, 46), 'Elevation': 555.779297, 'Latitude': 45.772002535, 'Longitude': 14.357343167}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 26, 56), 'Elevation': 555.779297, 'Latitude': 45.771906478, 'Longitude': 14.357355237}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 27, 5), 'Elevation': 555.298584, 'Latitude': 45.771813104, 'Longitude': 14.357374264}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 27, 14), 'Elevation': 553.856689, 'Latitude': 45.771737499, 'Longitude': 14.357434446}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 27, 28), 'Elevation': 552.414551, 'Latitude': 45.77164907, 'Longitude': 14.357447689}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 27, 46), 'Elevation': 551.934082, 'Latitude': 45.771551002, 'Longitude': 14.35746965}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 27, 55), 'Elevation': 552.895264, 'Latitude': 45.771471541, 'Longitude': 14.357517259}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 28, 5), 'Elevation': 552.414551, 'Latitude': 45.771403983, 'Longitude': 14.35741676}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 28, 20), 'Elevation': 552.895264, 'Latitude': 45.771397445, 'Longitude': 14.357317435}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 28, 29), 'Elevation': 552.895264, 'Latitude': 45.771366181, 'Longitude': 14.357201932}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 28, 40), 'Elevation': 552.895264, 'Latitude': 45.771278841, 'Longitude': 14.357150551}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 28, 50), 'Elevation': 552.414551, 'Latitude': 45.771187562, 'Longitude': 14.357119622}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 29, 2), 'Elevation': 552.414551, 'Latitude': 45.771096284, 'Longitude': 14.357099757}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 29, 21), 'Elevation': 551.453369, 'Latitude': 45.77100249, 'Longitude': 14.357058266}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 29, 30), 'Elevation': 550.972656, 'Latitude': 45.770905931, 'Longitude': 14.357027002}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 29, 40), 'Elevation': 551.453369, 'Latitude': 45.770820603, 'Longitude': 14.357021051}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 29, 50), 'Elevation': 551.934082, 'Latitude': 45.770730581, 'Longitude': 14.357006885}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 30, 10), 'Elevation': 551.934082, 'Latitude': 45.770663023, 'Longitude': 14.356960701}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 30, 19), 'Elevation': 550.492188, 'Latitude': 45.770596471, 'Longitude': 14.356866069}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 30, 27), 'Elevation': 550.011475, 'Latitude': 45.770515921, 'Longitude': 14.356806893}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 30, 35), 'Elevation': 550.011475, 'Latitude': 45.77044107, 'Longitude': 14.356734473}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 30, 44), 'Elevation': 549.530762, 'Latitude': 45.770396059, 'Longitude': 14.356622826}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 31, 12), 'Elevation': 548.088867, 'Latitude': 45.770342331, 'Longitude': 14.356471952}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 31, 20), 'Elevation': 547.608154, 'Latitude': 45.77032146, 'Longitude': 14.356346475}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 31, 29), 'Elevation': 547.608154, 'Latitude': 45.770324981, 'Longitude': 14.35621555}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 33, 31), 'Elevation': 548.088867, 'Latitude': 45.770286592, 'Longitude': 14.356161403}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 33, 40), 'Elevation': 548.569336, 'Latitude': 45.770196151, 'Longitude': 14.356116978}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 33, 48), 'Elevation': 548.569336, 'Latitude': 45.770110404, 'Longitude': 14.356067609}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 33, 57), 'Elevation': 548.088867, 'Latitude': 45.770022226, 'Longitude': 14.356011786}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 34, 5), 'Elevation': 547.608154, 'Latitude': 45.769934719, 'Longitude': 14.356021928}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 34, 14), 'Elevation': 548.088867, 'Latitude': 45.76983992, 'Longitude': 14.356053527}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 34, 22), 'Elevation': 549.530762, 'Latitude': 45.769755682, 'Longitude': 14.356107507}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 34, 57), 'Elevation': 550.972656, 'Latitude': 45.769677227, 'Longitude': 14.356159139}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 35, 24), 'Elevation': 550.972656, 'Latitude': 45.76961319, 'Longitude': 14.35622938}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 35, 33), 'Elevation': 551.453369, 'Latitude': 45.769536914, 'Longitude': 14.356298028}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 35, 42), 'Elevation': 550.972656, 'Latitude': 45.769446557, 'Longitude': 14.356357036}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 35, 51), 'Elevation': 551.453369, 'Latitude': 45.769356284, 'Longitude': 14.356414033}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 35, 59), 'Elevation': 551.453369, 'Latitude': 45.769272381, 'Longitude': 14.35645544}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 36, 8), 'Elevation': 551.453369, 'Latitude': 45.769192418, 'Longitude': 14.356506485}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 36, 37), 'Elevation': 551.453369, 'Latitude': 45.769169452, 'Longitude': 14.356551832}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 36, 47), 'Elevation': 551.453369, 'Latitude': 45.769182947, 'Longitude': 14.356666999}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 37, 8), 'Elevation': 549.050049, 'Latitude': 45.769151682, 'Longitude': 14.356751824}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 37, 28), 'Elevation': 546.646851, 'Latitude': 45.76906627, 'Longitude': 14.35675744}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 40, 44), 'Elevation': 548.088867, 'Latitude': 45.769023858, 'Longitude': 14.35681561}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 40, 57), 'Elevation': 546.16626, 'Latitude': 45.76893853, 'Longitude': 14.356787531}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 41, 43), 'Elevation': 548.569336, 'Latitude': 45.768877426, 'Longitude': 14.356692648}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 41, 55), 'Elevation': 550.011475, 'Latitude': 45.768804085, 'Longitude': 14.356687954}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 42, 18), 'Elevation': 552.895264, 'Latitude': 45.768726217, 'Longitude': 14.35661763}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 42, 28), 'Elevation': 553.856689, 'Latitude': 45.768643236, 'Longitude': 14.356583264}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 42, 41), 'Elevation': 553.375977, 'Latitude': 45.768552292, 'Longitude': 14.356551664}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 42, 50), 'Elevation': 552.895264, 'Latitude': 45.768532595, 'Longitude': 14.356432641}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 43, 4), 'Elevation': 553.375977, 'Latitude': 45.768547095, 'Longitude': 14.35628864}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 43, 12), 'Elevation': 553.856689, 'Latitude': 45.768509628, 'Longitude': 14.356174478}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 43, 50), 'Elevation': 553.856689, 'Latitude': 45.768411979, 'Longitude': 14.356160816}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 44, 45), 'Elevation': 551.453369, 'Latitude': 45.768287592, 'Longitude': 14.356270535}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 44, 56), 'Elevation': 552.895264, 'Latitude': 45.768202767, 'Longitude': 14.356327029}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 6), 'Elevation': 553.856689, 'Latitude': 45.768117523, 'Longitude': 14.356315462}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 15), 'Elevation': 553.375977, 'Latitude': 45.768031105, 'Longitude': 14.356269529}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 24), 'Elevation': 553.375977, 'Latitude': 45.767948879, 'Longitude': 14.356186548}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 33), 'Elevation': 553.856689, 'Latitude': 45.76787252, 'Longitude': 14.356098538}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 41), 'Elevation': 553.856689, 'Latitude': 45.767792473, 'Longitude': 14.356042296}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 50), 'Elevation': 554.337402, 'Latitude': 45.767695075, 'Longitude': 14.355997872}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 45, 58), 'Elevation': 554.337402, 'Latitude': 45.767609915, 'Longitude': 14.355941545}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 6), 'Elevation': 553.856689, 'Latitude': 45.767528694, 'Longitude': 14.355887398}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 14), 'Elevation': 552.895264, 'Latitude': 45.767441522, 'Longitude': 14.355836436}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 22), 'Elevation': 553.375977, 'Latitude': 45.767348064, 'Longitude': 14.355794108}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 30), 'Elevation': 552.895264, 'Latitude': 45.767251085, 'Longitude': 14.355773237}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 38), 'Elevation': 553.375977, 'Latitude': 45.767154777, 'Longitude': 14.355754713}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 46), 'Elevation': 553.375977, 'Latitude': 45.767059308, 'Longitude': 14.355742307}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 46, 54), 'Elevation': 553.375977, 'Latitude': 45.766966939, 'Longitude': 14.355720598}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 47, 3), 'Elevation': 553.375977, 'Latitude': 45.766870547, 'Longitude': 14.355696207}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 47, 11), 'Elevation': 553.375977, 'Latitude': 45.766783459, 'Longitude': 14.355635773}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 47, 19), 'Elevation': 553.856689, 'Latitude': 45.766693018, 'Longitude': 14.355577519}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 47, 27), 'Elevation': 554.337402, 'Latitude': 45.766603583, 'Longitude': 14.355516415}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 47, 36), 'Elevation': 553.856689, 'Latitude': 45.766510544, 'Longitude': 14.355464783}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 47, 55), 'Elevation': 553.375977, 'Latitude': 45.766405938, 'Longitude': 14.355474003}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 48, 7), 'Elevation': 552.895264, 'Latitude': 45.766316839, 'Longitude': 14.355442068}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 48, 49), 'Elevation': 552.895264, 'Latitude': 45.766348019, 'Longitude': 14.355553379}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 48, 59), 'Elevation': 552.414551, 'Latitude': 45.766360676, 'Longitude': 14.35566226}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 49, 12), 'Elevation': 551.453369, 'Latitude': 45.766296471, 'Longitude': 14.355766783}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 49, 23), 'Elevation': 550.492188, 'Latitude': 45.766229834, 'Longitude': 14.355859822}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 49, 31), 'Elevation': 550.492188, 'Latitude': 45.766162528, 'Longitude': 14.355964931}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 49, 39), 'Elevation': 550.972656, 'Latitude': 45.766103938, 'Longitude': 14.356076159}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 49, 48), 'Elevation': 550.492188, 'Latitude': 45.766047528, 'Longitude': 14.356190655}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 49, 56), 'Elevation': 550.492188, 'Latitude': 45.765984748, 'Longitude': 14.356281515}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 50, 7), 'Elevation': 549.530762, 'Latitude': 45.765921548, 'Longitude': 14.356382936}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 50, 16), 'Elevation': 549.050049, 'Latitude': 45.765888439, 'Longitude': 14.356514532}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 50, 34), 'Elevation': 550.011475, 'Latitude': 45.765891457, 'Longitude': 14.356643446}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 50, 49), 'Elevation': 550.972656, 'Latitude': 45.765914591, 'Longitude': 14.356763558}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 50, 57), 'Elevation': 551.934082, 'Latitude': 45.765939821, 'Longitude': 14.35689331}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 51, 6), 'Elevation': 550.972656, 'Latitude': 45.765952058, 'Longitude': 14.357034126}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 51, 15), 'Elevation': 549.530762, 'Latitude': 45.765955662, 'Longitude': 14.357175026}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 51, 23), 'Elevation': 548.088867, 'Latitude': 45.765975527, 'Longitude': 14.357300419}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 51, 31), 'Elevation': 546.646851, 'Latitude': 45.765997069, 'Longitude': 14.357423885}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 51, 39), 'Elevation': 546.16626, 'Latitude': 45.766027579, 'Longitude': 14.357544836}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 51, 48), 'Elevation': 545.685547, 'Latitude': 45.766058424, 'Longitude': 14.357671989}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 52, 2), 'Elevation': 545.685547, 'Latitude': 45.766093126, 'Longitude': 14.357791012}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 52, 23), 'Elevation': 547.127441, 'Latitude': 45.766090443, 'Longitude': 14.357788749}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 53, 26), 'Elevation': 548.088867, 'Latitude': 45.766053898, 'Longitude': 14.357882291}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 53, 42), 'Elevation': 548.088867, 'Latitude': 45.766070997, 'Longitude': 14.357980527}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 54, 12), 'Elevation': 548.569336, 'Latitude': 45.766096059, 'Longitude': 14.358057389}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 54, 20), 'Elevation': 547.127441, 'Latitude': 45.766129671, 'Longitude': 14.35817088}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 54, 29), 'Elevation': 546.646851, 'Latitude': 45.7661031, 'Longitude': 14.358292501}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 54, 37), 'Elevation': 546.16626, 'Latitude': 45.766075272, 'Longitude': 14.358421164}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 54, 44), 'Elevation': 546.16626, 'Latitude': 45.766054988, 'Longitude': 14.358547311}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 54, 51), 'Elevation': 545.685547, 'Latitude': 45.766025316, 'Longitude': 14.358667256}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 55, 51), 'Elevation': 542.801514, 'Latitude': 45.766022718, 'Longitude': 14.358777311}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56), 'Elevation': 543.762817, 'Latitude': 45.766049288, 'Longitude': 14.35891578}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 9), 'Elevation': 543.762817, 'Latitude': 45.766078793, 'Longitude': 14.359055925}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 17), 'Elevation': 543.762817, 'Latitude': 45.766111817, 'Longitude': 14.359177044}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 26), 'Elevation': 544.724243, 'Latitude': 45.766153894, 'Longitude': 14.359309645}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 33), 'Elevation': 544.724243, 'Latitude': 45.766190439, 'Longitude': 14.35943244}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 41), 'Elevation': 545.204834, 'Latitude': 45.766230673, 'Longitude': 14.359567473}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 50), 'Elevation': 545.685547, 'Latitude': 45.766320024, 'Longitude': 14.359635953}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 56, 58), 'Elevation': 545.204834, 'Latitude': 45.766417757, 'Longitude': 14.359656069}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 6), 'Elevation': 545.685547, 'Latitude': 45.766501743, 'Longitude': 14.359691609}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 14), 'Elevation': 545.685547, 'Latitude': 45.766521357, 'Longitude': 14.359831335}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 21), 'Elevation': 545.204834, 'Latitude': 45.766533092, 'Longitude': 14.359962847}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 28), 'Elevation': 544.243652, 'Latitude': 45.766544826, 'Longitude': 14.36009503}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 37), 'Elevation': 543.282104, 'Latitude': 45.76654952, 'Longitude': 14.360246742}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 44), 'Elevation': 544.243652, 'Latitude': 45.766525464, 'Longitude': 14.360364089}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 57, 52), 'Elevation': 545.685547, 'Latitude': 45.766478945, 'Longitude': 14.360491158}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58), 'Elevation': 546.646851, 'Latitude': 45.766446758, 'Longitude': 14.360614205}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58, 11), 'Elevation': 546.646851, 'Latitude': 45.766479615, 'Longitude': 14.360735575}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58, 24), 'Elevation': 550.011475, 'Latitude': 45.766559914, 'Longitude': 14.360776898}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58, 31), 'Elevation': 550.492188, 'Latitude': 45.766651779, 'Longitude': 14.360730127}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58, 38), 'Elevation': 550.492188, 'Latitude': 45.766742388, 'Longitude': 14.360683607}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58, 46), 'Elevation': 550.972656, 'Latitude': 45.766840708, 'Longitude': 14.360630047}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 58, 53), 'Elevation': 550.972656, 'Latitude': 45.766935255, 'Longitude': 14.360577241}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 15), 'Elevation': 550.972656, 'Latitude': 45.76722485, 'Longitude': 14.360401221}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 22), 'Elevation': 550.492188, 'Latitude': 45.767318141, 'Longitude': 14.360365933}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 30), 'Elevation': 550.492188, 'Latitude': 45.767415874, 'Longitude': 14.360308684}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 37), 'Elevation': 550.972656, 'Latitude': 45.767510673, 'Longitude': 14.360244479}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 44), 'Elevation': 551.934082, 'Latitude': 45.767606981, 'Longitude': 14.360204581}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 51), 'Elevation': 551.934082, 'Latitude': 45.767699936, 'Longitude': 14.360150099}, {'Timestamp': datetime.datetime(2010, 8, 5, 14, 59, 58), 'Elevation': 551.934082, 'Latitude': 45.767789204, 'Longitude': 14.360092515}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 5), 'Elevation': 551.934082, 'Latitude': 45.767891128, 'Longitude': 14.360040715}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 12), 'Elevation': 552.414551, 'Latitude': 45.767986178, 'Longitude': 14.359995034}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 19), 'Elevation': 552.895264, 'Latitude': 45.768079218, 'Longitude': 14.359925045}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 26), 'Elevation': 551.453369, 'Latitude': 45.76816991, 'Longitude': 14.359869137}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 33), 'Elevation': 550.972656, 'Latitude': 45.768261021, 'Longitude': 14.359815493}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 40), 'Elevation': 550.492188, 'Latitude': 45.768350959, 'Longitude': 14.359758329}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 47), 'Elevation': 550.972656, 'Latitude': 45.768450033, 'Longitude': 14.359702338}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 0, 54), 'Elevation': 550.492188, 'Latitude': 45.768536786, 'Longitude': 14.359647771}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 1), 'Elevation': 550.492188, 'Latitude': 45.768630495, 'Longitude': 14.359595804}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 8), 'Elevation': 550.011475, 'Latitude': 45.768726803, 'Longitude': 14.359546769}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 15), 'Elevation': 549.530762, 'Latitude': 45.76882001, 'Longitude': 14.359492119}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 22), 'Elevation': 550.492188, 'Latitude': 45.768925622, 'Longitude': 14.359449288}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 29), 'Elevation': 550.972656, 'Latitude': 45.76902193, 'Longitude': 14.359394386}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 36), 'Elevation': 550.492188, 'Latitude': 45.76912201, 'Longitude': 14.359344598}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 48), 'Elevation': 551.453369, 'Latitude': 45.769292163, 'Longitude': 14.359244769}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 1, 54), 'Elevation': 556.26001, 'Latitude': 45.769382687, 'Longitude': 14.359183414}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 1), 'Elevation': 553.856689, 'Latitude': 45.769459801, 'Longitude': 14.359140582}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 8), 'Elevation': 554.337402, 'Latitude': 45.769552672, 'Longitude': 14.359091381}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 15), 'Elevation': 553.856689, 'Latitude': 45.769652836, 'Longitude': 14.359047711}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 21), 'Elevation': 552.414551, 'Latitude': 45.769740511, 'Longitude': 14.359010328}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 28), 'Elevation': 551.453369, 'Latitude': 45.769836819, 'Longitude': 14.358967999}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 35), 'Elevation': 550.492188, 'Latitude': 45.769914687, 'Longitude': 14.358929778}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 2, 42), 'Elevation': 549.530762, 'Latitude': 45.770003702, 'Longitude': 14.358887114}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 3, 11), 'Elevation': 546.16626, 'Latitude': 45.770254154, 'Longitude': 14.358710088}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 3, 17), 'Elevation': 545.685547, 'Latitude': 45.770357922, 'Longitude': 14.358660635}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4), 'Elevation': 543.282104, 'Latitude': 45.770934345, 'Longitude': 14.35844304}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4, 8), 'Elevation': 544.243652, 'Latitude': 45.771037946, 'Longitude': 14.358421247}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4, 19), 'Elevation': 546.16626, 'Latitude': 45.771127967, 'Longitude': 14.358289903}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4, 29), 'Elevation': 546.646851, 'Latitude': 45.771174487, 'Longitude': 14.358226871}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4, 41), 'Elevation': 545.685547, 'Latitude': 45.771373473, 'Longitude': 14.358056802}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4, 48), 'Elevation': 546.646851, 'Latitude': 45.771503728, 'Longitude': 14.35801967}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 4, 55), 'Elevation': 545.685547, 'Latitude': 45.771614201, 'Longitude': 14.357968373}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 5, 1), 'Elevation': 544.243652, 'Latitude': 45.771710845, 'Longitude': 14.357919926}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 5, 8), 'Elevation': 543.282104, 'Latitude': 45.77182618, 'Longitude': 14.3578579}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 11, 36), 'Elevation': 546.646851, 'Latitude': 45.771829281, 'Longitude': 14.357537627}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 7), 'Elevation': 549.050049, 'Latitude': 45.771894827, 'Longitude': 14.357712474}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 13), 'Elevation': 548.088867, 'Latitude': 45.771840429, 'Longitude': 14.357796963}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 16), 'Elevation': 548.569336, 'Latitude': 45.771743115, 'Longitude': 14.357871311}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 19), 'Elevation': 548.569336, 'Latitude': 45.771622667, 'Longitude': 14.357950101}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 21), 'Elevation': 548.569336, 'Latitude': 45.771533903, 'Longitude': 14.358007936}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 23), 'Elevation': 548.569336, 'Latitude': 45.771430973, 'Longitude': 14.358066106}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 25), 'Elevation': 549.050049, 'Latitude': 45.771324607, 'Longitude': 14.358126372}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 27), 'Elevation': 549.530762, 'Latitude': 45.771206589, 'Longitude': 14.358193427}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 29), 'Elevation': 549.530762, 'Latitude': 45.771082956, 'Longitude': 14.358269367}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 31), 'Elevation': 549.530762, 'Latitude': 45.770953204, 'Longitude': 14.358345978}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 35), 'Elevation': 549.530762, 'Latitude': 45.770695461, 'Longitude': 14.358496601}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 37), 'Elevation': 549.530762, 'Latitude': 45.770566463, 'Longitude': 14.358569104}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 39), 'Elevation': 549.530762, 'Latitude': 45.77043579, 'Longitude': 14.35863968}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 41), 'Elevation': 549.530762, 'Latitude': 45.770300003, 'Longitude': 14.358712351}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 43), 'Elevation': 549.530762, 'Latitude': 45.770154744, 'Longitude': 14.35878695}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 45), 'Elevation': 550.011475, 'Latitude': 45.770012503, 'Longitude': 14.35886398}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 47), 'Elevation': 550.492188, 'Latitude': 45.769874034, 'Longitude': 14.358942099}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 49), 'Elevation': 550.972656, 'Latitude': 45.769732296, 'Longitude': 14.359018458}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 51), 'Elevation': 551.453369, 'Latitude': 45.76959081, 'Longitude': 14.35909356}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 53), 'Elevation': 550.972656, 'Latitude': 45.769447898, 'Longitude': 14.359171093}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 55), 'Elevation': 550.492188, 'Latitude': 45.769310771, 'Longitude': 14.35924829}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 57), 'Elevation': 550.011475, 'Latitude': 45.769169619, 'Longitude': 14.359329343}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 12, 59), 'Elevation': 550.011475, 'Latitude': 45.769021008, 'Longitude': 14.359407965}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 1), 'Elevation': 550.011475, 'Latitude': 45.768874073, 'Longitude': 14.359487928}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 3), 'Elevation': 550.011475, 'Latitude': 45.768733257, 'Longitude': 14.359568143}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 5), 'Elevation': 550.011475, 'Latitude': 45.768587412, 'Longitude': 14.359647604}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 7), 'Elevation': 550.492188, 'Latitude': 45.768448357, 'Longitude': 14.35972916}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 9), 'Elevation': 550.492188, 'Latitude': 45.768309217, 'Longitude': 14.359810464}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 11), 'Elevation': 550.492188, 'Latitude': 45.768167395, 'Longitude': 14.359891182}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 17), 'Elevation': 550.972656, 'Latitude': 45.767732793, 'Longitude': 14.360125121}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 19), 'Elevation': 550.492188, 'Latitude': 45.767598012, 'Longitude': 14.360196032}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 21), 'Elevation': 550.972656, 'Latitude': 45.767468764, 'Longitude': 14.360270463}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 23), 'Elevation': 550.972656, 'Latitude': 45.767328702, 'Longitude': 14.360341541}, {'Timestamp': datetime.datetime(2010, 8, 5, 15, 13, 25), 'Elevation': 550.972656, 'Latitude': 45.767197357, 'Longitude': 14.360415721}
I wrote gpxcsv just for this use case. It can create a bunch of csv files:
gpxcsv *.gpx
or if you want one big pandas frame in your example:
from gpxcsv import gpxtolist
df = pd.concat([pd.DataFrame(gpxtolist(x)) for x in list_of_files])
Just requires lxml, no need for gpxpy. It'll turn any extension tag into a column too, heartrate, temp, etc.
Related
How to change all datetime objects in a list to standard YYYY-MM-DD HH:MM:SS
When I query MySQL with Python and the query has datetime fields then I get this list as a result. [{'_id': 1, 'name': 'index', '_cdate': datetime.datetime(2020, 10, 27, 9, 4, 34), 'title': 'DataExtract'}, {'_id': 2, 'name': 'topmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 17), 'title': 'topmenu'}, {'_id': 3, 'name': 'functions_common', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 50), 'title': 'common functions'}, {'_id': 4, 'name': 'leftmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 53, 56), 'title': 'Left Menu'}, {'_id': 5, 'name': 'todo', '_cdate': datetime.datetime(2020, 11, 7, 8, 49, 38), 'title': 'Todo'}, {'_id': 6, 'name': 'cron_publish', '_cdate': datetime.datetime(2020, 12, 2, 19, 30, 11), 'title': 'Run Publish reports'}, {'_id': 7, 'name': 'test', '_cdate': datetime.datetime(2020, 12, 2, 22, 32, 54), 'title': 'test'}, {'_id': 8, 'name': 'help', '_cdate': datetime.datetime(2020, 12, 5, 7, 12, 44), 'title': 'Help'}, {'_id': 9, 'name': 'api', '_cdate': datetime.datetime(2020, 12, 5, 21, 22, 13), 'title': 'API'}, {'_id': 10, 'name': 'ben', '_cdate': datetime.datetime(2021, 10, 4, 11, 37, 3), 'title': 'List of Reports'}] How do I either get the query to return the date fields in YYYY-MM-DD HH:MM:SS format? Or how do I convert them in the returned list. When I try to change them by enumerating over the results python throw as error that the dictionary has changed.
The datetime.datetime() objects you're getting are the standard representation of these objects - if you were expecting strings instead, you could simple convert them with datetime.strftime('%Y-%m-%d %H:%M:%S', value) but keep in mind that the datetime object is a more flexible way of keeping the data around. I'd recommend only formatting the date in a specific way if you're writing it to the screen or a file format that expects a string. Example: data = [{'_id': 1, 'name': 'index', '_cdate': datetime.datetime(2020, 10, 27, 9, 4, 34), 'title': 'DataExtract'}, {'_id': 2, 'name': 'topmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 17), 'title': 'topmenu'}, {'_id': 3, 'name': 'functions_common', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 50), 'title': 'common functions'}, {'_id': 4, 'name': 'leftmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 53, 56), 'title': 'Left Menu'}, {'_id': 5, 'name': 'todo', '_cdate': datetime.datetime(2020, 11, 7, 8, 49, 38), 'title': 'Todo'}, {'_id': 6, 'name': 'cron_publish', '_cdate': datetime.datetime(2020, 12, 2, 19, 30, 11), 'title': 'Run Publish reports'}, {'_id': 7, 'name': 'test', '_cdate': datetime.datetime(2020, 12, 2, 22, 32, 54), 'title': 'test'}, {'_id': 8, 'name': 'help', '_cdate': datetime.datetime(2020, 12, 5, 7, 12, 44), 'title': 'Help'}, {'_id': 9, 'name': 'api', '_cdate': datetime.datetime(2020, 12, 5, 21, 22, 13), 'title': 'API'}, {'_id': 10, 'name': 'ben', '_cdate': datetime.datetime(2021, 10, 4, 11, 37, 3), 'title': 'List of Reports'}] for rec in data: rec['date_str'] = datetime.datetime.strftime('%Y-%m-%d %H:%M:%S', rec['_cdate']) That would add 'date_str' field to every record with the format you require. Of course, you could also modify it to overwrite the original value.
How to group items by month and year using itertools.groupby()
Problem: I am trying to take a sorted list and group it based on the month and year but having trouble returning the grouped value correctly... Assuming this data, we have a title and date/time list that has been ordered by datetime lst = [ {'title': 'in the past','date_time': datetime.datetime(2020, 3, 18, 0, 0)}, {'title': 'Just another event','date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am','date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()','date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one','date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event','date_time': datetime.datetime(2020, 10, 30, 12, 0)}, {'title': '18 Nov 20','date_time': datetime.datetime(2020, 11, 18, 20, 27)}, {'title': '18 January 2021','date_time': datetime.datetime(2021, 1, 18, 20, 0)}, {'title': '18 March 21','date_time': datetime.datetime(2021, 3, 18, 20, 0)} ] Then to group it, run it through itertools.groupby() from itertools import groupby def loop_tupe(): diction = {} for key,group in groupby(lst, key=lambda x: (x['date_time'].month, x['date_time'].year)): for element in group: append_value(diction, key, element) return diction After grouping it by the month and year the returned result looks like { (3, 2020): {'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0)}, (10, 2020): [ {'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)} ], (11, 2020): {'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27)}, (1, 2021): {'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0)}, (3, 2021): {'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0)} } It has been grouped correctly, however the dates are within a tuple whereas I would need them as one "value", and while it's in this format I am unable to loop over it in the way I would with the original list. I realise it has something to do with the way I'm using the anonymous function within the groupby() (and maybe how the return result is created perhaps?) but I'm unsure how else to apply a month and year grouping within it. Question: What can I do to group my original data by month & year while also keeping its format relatively similar to the list going in? Edit The append_value function that I'm using def append_value(dict_obj, key, value): if key in dict_obj: if not isinstance(dict_obj[key], list): dict_obj[key] = [dict_obj[key]] dict_obj[key].append(value) else: dict_obj[key] = value Edit 2 So far this is the closest I'm getting to a solution. I have changed the function used in groupby to take the datetime and change it into a string to be compared. (I've left the print in there to visualise) def loop_str(to_sort): output={} for key,group in groupby(to_sort, key=lambda item: item['date_time'].strftime('%B %Y')): for element in group: append_value(output,key,element) return output Doing so gives me this output { 'March 2020': { 'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0) }, 'October 2020': [ {'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)} ], 'November 2020': { 'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27) }, 'January 2021': { 'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0) }, 'March 2021': { 'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0) } } This is closer to what I need however unless I'm not seeing something it seems that this output could be a mix of dicts and lists which will be more difficult to loop over within a django template?
You can make the groupby key the string you want my formatting the date. Then you can just use it in a dict comprehension. It is easier to create the data structure if the values are constantly lists. It will probably also be easier to use it. from itertools import groupby import datetime lst = [ {'title': 'in the past','date_time': datetime.datetime(2020, 3, 18, 0, 0)}, {'title': 'Just another event','date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am','date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()','date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one','date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event','date_time': datetime.datetime(2020, 10, 30, 12, 0)}, {'title': '18 Nov 20','date_time': datetime.datetime(2020, 11, 18, 20, 27)}, {'title': '18 January 2021','date_time': datetime.datetime(2021, 1, 18, 20, 0)}, {'title': '18 March 21','date_time': datetime.datetime(2021, 3, 18, 20, 0)} ] groups = groupby(lst, key=lambda x: (x['date_time'].strftime("%B %Y"))) {k: list(g) for k, g in groups} Result: {'March 2020': [{'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0)}], 'October 2020': [{'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)}], 'November 2020': [{'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27)}], 'January 2021': [{'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0)}], 'March 2021': [{'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0)}]}
Pandas ticker to ohlc
rows is a list of dict from mysql. rows example [{'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605515L, 'price': Decimal('1080.04000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605549L, 'price': Decimal('1081.55000000'), 'type': 1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605547L, 'price': Decimal('1081.33000000'), 'type': 1, 'amount': Decimal('20.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605545L, 'price': Decimal('1081.30000000'), 'type': 1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605543L, 'price': Decimal('1081.29000000'), 'type': 1, 'amount': Decimal('20.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605541L, 'price': Decimal('1080.46000000'), 'type': 1, 'amount': Decimal('26.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605517L, 'price': Decimal('1080.04000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 22), 'tid': 648605601L, 'price': Decimal('1079.69000000'), 'type': -1, 'amount': Decimal('70.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 25), 'tid': 648605686L, 'price': Decimal('1079.72000000'), 'type': -1, 'amount': Decimal('4.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605765L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605753L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('106.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605751L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('80.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605749L, 'price': Decimal('1079.67000000'), 'type': -1, 'amount': Decimal('430.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605747L, 'price': Decimal('1079.70000000'), 'type': -1, 'amount': Decimal('66.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605745L, 'price': Decimal('1079.74000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605785L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605774L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605771L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 28), 'tid': 648605827L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('42.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 28), 'tid': 648605842L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 32), 'tid': 648605973L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 37), 'tid': 648606114L, 'price': Decimal('1079.44000000'), 'type': 1, 'amount': Decimal('24.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 37), 'tid': 648606116L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 42), 'tid': 648606258L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('56.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 45), 'tid': 648606345L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 46), 'tid': 648606392L, 'price': Decimal('1079.69000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606418L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606420L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('36.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606422L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('94.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606499L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('80.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606478L, 'price': Decimal('1079.31000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606476L, 'price': Decimal('1079.31000000'), 'type': -1, 'amount': Decimal('34.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606474L, 'price': Decimal('1079.55000000'), 'type': -1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606666L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606650L, 'price': Decimal('1079.17000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606648L, 'price': Decimal('1079.17000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 1), 'tid': 648606820L, 'price': Decimal('1079.03000000'), 'type': -1, 'amount': Decimal('28.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 2), 'tid': 648606825L, 'price': Decimal('1079.03000000'), 'type': 1, 'amount': Decimal('30.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 2), 'tid': 648606836L, 'price': Decimal('1079.02000000'), 'type': -1, 'amount': Decimal('22.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606945L, 'price': Decimal('1078.58000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606943L, 'price': Decimal('1078.61000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606941L, 'price': Decimal('1078.63000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606939L, 'price': Decimal('1078.88000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606926L, 'price': Decimal('1078.88000000'), 'type': -1, 'amount': Decimal('428.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606984L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606982L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606971L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606957L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('74.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606955L, 'price': Decimal('1078.15000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606953L, 'price': Decimal('1078.15000000'), 'type': -1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606951L, 'price': Decimal('1078.42000000'), 'type': -1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648606992L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648606995L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648607023L, 'price': Decimal('1078.06000000'), 'type': -1, 'amount': Decimal('4.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 8), 'tid': 648607047L, 'price': Decimal('1078.86000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 10), 'tid': 648607113L, 'price': Decimal('1078.06000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 10), 'tid': 648607115L, 'price': Decimal('1078.03000000'), 'type': -1, 'amount': Decimal('148.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 12), 'tid': 648607192L, 'price': Decimal('1079.00000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607218L, 'price': Decimal('1078.99000000'), 'type': 1, 'amount': Decimal('98.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607220L, 'price': Decimal('1079.00000000'), 'type': 1, 'amount': Decimal('42.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607222L, 'price': Decimal('1079.03000000'), 'type': 1, 'amount': Decimal('342.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607224L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('512.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607250L, 'price': Decimal('1078.98000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607252L, 'price': Decimal('1078.98000000'), 'type': 1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607254L, 'price': Decimal('1079.00000000'), 'type': 1, 'amount': Decimal('106.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607256L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607431L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('28.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607429L, 'price': Decimal('1079.01000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607427L, 'price': Decimal('1079.01000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 23), 'tid': 648607518L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 24), 'tid': 648607544L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('344.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 25), 'tid': 648607593L, 'price': Decimal('1078.79000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607631L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('430.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607623L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('18.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607621L, 'price': Decimal('1078.79000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 29), 'tid': 648607695L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('776.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 32), 'tid': 648607803L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 32), 'tid': 648607805L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 36), 'tid': 648607905L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 37), 'tid': 648607940L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 42), 'tid': 648608110L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 46), 'tid': 648608211L, 'price': Decimal('1079.88000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 46), 'tid': 648608213L, 'price': Decimal('1079.88000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 57), 'tid': 648608534L, 'price': Decimal('1080.29000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 57), 'tid': 648608536L, 'price': Decimal('1080.30000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 2), 'tid': 648608683L, 'price': Decimal('1080.59000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 3), 'tid': 648608733L, 'price': Decimal('1080.59000000'), 'type': 1, 'amount': Decimal('360.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 7), 'tid': 648608838L, 'price': Decimal('1080.90000000'), 'type': 1, 'amount': Decimal('82.00000000')}] if I didn't use set_index ,it will have an TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex' if rows: df = pd.DataFrame(rows) print df.head() # TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex' df = df.set_index("date") print df.head() resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"}) print resample_data Result : Connected to pydev debugger (build 162.1967.10) amount date price tid type 0 2.00000000 2017-03-21 11:15:12 1075.83000000 648370156 -1 1 10.00000000 2017-03-21 11:15:15 1076.00000000 648370241 -1 2 10.00000000 2017-03-21 11:15:17 1075.83000000 648370297 -1 3 10.00000000 2017-03-21 11:15:17 1075.83000000 648370311 1 4 8.00000000 2017-03-21 11:15:19 1076.13000000 648370370 1 amount price tid type date 2017-03-21 11:15:12 2.00000000 1075.83000000 648370156 -1 2017-03-21 11:15:15 10.00000000 1076.00000000 648370241 -1 2017-03-21 11:15:17 10.00000000 1075.83000000 648370297 -1 2017-03-21 11:15:17 10.00000000 1075.83000000 648370311 1 2017-03-21 11:15:19 8.00000000 1076.13000000 648370370 1 /Users/wyx/bitcoin_workspace/fibo-strategy/ticker.py:45: FutureWarning: how in .resample() is deprecated the new syntax is .resample(...)..apply(<func>) resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"}) Traceback (most recent call last): File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1580, in <module> globals = debugger.run(setup['file'], None, None, is_module) File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 964, in run pydev_imports.execfile(file, globals, locals) # execute the script File "/Users/wyx/bitcoin_workspace/fibo-strategy/ticker.py", line 45, in <module> resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"}) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/generic.py", line 4216, in resample limit=limit) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/tseries/resample.py", line 582, in _maybe_process_deprecations r = r.aggregate(how) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/tseries/resample.py", line 320, in aggregate result, how = self._aggregate(arg, *args, **kwargs) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 549, in _aggregate result = _agg(arg, _agg_1dim) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 500, in _agg result[fname] = func(fname, agg_how) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 483, in _agg_1dim return colg.aggregate(how, _level=(_level or 0) + 1) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 2652, in aggregate return getattr(self, func_or_funcs)(*args, **kwargs) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 1128, in ohlc lambda x: x._cython_agg_general('ohlc')) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 3103, in _apply_to_column_groupbys return func(self) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 1128, in <lambda> lambda x: x._cython_agg_general('ohlc')) File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 808, in _cython_agg_general raise DataError('No numeric types to aggregate') pandas.core.base.DataError: No numeric types to aggregate Process finished with exit code 1 I am a rookie for pandas. How solve the error? And if I want to use the last close price to fill the NaN of next min ohlc. How to do that?
You need to set an index using your dates. Code: from io import StringIO df = pd.read_csv(StringIO( u"""amount date price tid type 6.00000000 2017-03-21t10:46:32 1059.26000000 648313975 -1 4.00000000 2017-03-21t10:46:37 1059.42000000 648314094 -1 2.00000000 2017-03-21t10:46:37 1059.42000000 648314096 -1 2.00000000 2017-03-21t10:46:41 1059.26000000 648314176 -1 32.00000000 2017-03-21t10:46:41 1059.26000000 648314189 -1 """), sep='\s+', parse_dates='date'.split()) print(df) resample_data = df.set_index('date').resample( "1min", how={"price": "ohlc", "amount": "sum"}) print(resample_data) Results: amount date price tid type 0 6.0 2017-03-21 10:46:32 1059.26 648313975 -1 1 4.0 2017-03-21 10:46:37 1059.42 648314094 -1 2 2.0 2017-03-21 10:46:37 1059.42 648314096 -1 3 2.0 2017-03-21 10:46:41 1059.26 648314176 -1 4 32.0 2017-03-21 10:46:41 1059.26 648314189 -1 price amount open high low close amount date 2017-03-21 10:46:00 1059.26 1059.42 1059.26 1059.26 46.0
Remove duplicated from list of dict- python [duplicate]
This question already has answers here: Remove duplicates from a list of dictionaries when only one of the key values is different (5 answers) Closed 8 years ago. I have list of dictionaries, I want to remove duplicates from that list. How to do that ? a = [ {'dtstart': '2014-09-10T08:00:00', 'end': datetime.datetime(2014, 9, 10, 9, 0), 'location': 'Brady Auditorium, B-131', 'partial_date': datetime.date(2014, 9, 10), 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=01a331e2-42be-4622-b072-0c42b55b436e&w=540&h=700', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'stream': '01a331e2-42be-4622-b072-0c42b55b436e', 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'uid': '2d671415-c666-498f-a401-01652a08e4b3'}, {'dtstart': '2014-09-10T08:00:00', 'end': datetime.datetime(2014, 9, 10, 9, 0), 'location': 'Brady Auditorium, B-131', 'partial_date': datetime.date(2014, 9, 10), 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=ccf667b2-b5a0-464f-8797-66eb36b0bf6c&w=540&h=700', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'stream': 'ccf667b2-b5a0-464f-8797-66eb36b0bf6c', 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'uid': '2d671415-c666-498f-a401-01652a08e4b3'} ] What I have tried is , >>> [dict(t) for t in set([tuple(d.items()) for d in a])] But still returning duplicate elements.
Just try the following code: {document['uid']: document for document in a}.values() For every uuid you will get the latest document. If you're looking for the first entries, try this: {document['uid']: document for document in a[::-1]}.values()
Use a dictionary comprehension to create a dictionary with uid as keys and each dictionary as values. Then extract the values to return a list of unique dictionaries as keyed by uid. >>> a=[{'end': datetime.datetime(2014, 9, 10, 9, 0), 'uid': '2d671415-c666-498f-a401-01652a08e4b3', 'stream': '01a331e2-42be-4622-b072-0c42b55b436e', 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=01a331e2-42be-4622-b072-0c42b55b436e&w=540&h=700', 'partial_date': datetime.date(2014, 9, 10), 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'location': 'Brady Auditorium, B-131', 'dtstart': '2014-09-10T08:00:00'}, {'end': datetime.datetime(2014, 9, 10, 9, 0), 'uid': '2d671415-c666-498f-a401-01652a08e4b3', 'stream': 'ccf667b2-b5a0-464f-8797-66eb36b0bf6c', 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=ccf667b2-b5a0-464f-8797-66eb36b0bf6c&w=540&h=700', 'partial_date': datetime.date(2014, 9, 10), 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'location': 'Brady Auditorium, B-131', 'dtstart': '2014-09-10T08:00:00'}] >>> {d['uid']: d for d in a}.values() [{'dtstart': '2014-09-10T08:00:00', 'end': datetime.datetime(2014, 9, 10, 9, 0), 'location': 'Brady Auditorium, B-131', 'partial_date': datetime.date(2014, 9, 10), 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=ccf667b2-b5a0-464f-8797-66eb36b0bf6c&w=540&h=700', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'stream': 'ccf667b2-b5a0-464f-8797-66eb36b0bf6c', 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'uid': '2d671415-c666-498f-a401-01652a08e4b3'}]
Try appending the uids to a temporary list and verify with present dictionary import datetime a=[{'end': datetime.datetime(2014, 9, 10, 9, 0), 'uid': '2d671415-c666-498f-a401-01652a08e4b3', 'stream': '01a331e2-42be-4622-b072-0c42b55b436e', 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=01a331e2-42be-4622-b072-0c42b55b436e&w=540&h=700', 'partial_date': datetime.date(2014, 9, 10), 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'location': 'Brady Auditorium, B-131', 'dtstart': '2014-09-10T08:00:00'}, {'end': datetime.datetime(2014, 9, 10, 9, 0), 'uid': '2d671415-c666-498f-a401-01652a08e4b3', 'stream': 'ccf667b2-b5a0-464f-8797-66eb36b0bf6c', 'photo': 'http://tools.medicine.yale.edu/portal/stream?id=ccf667b2-b5a0-464f-8797-66eb36b0bf6c&w=540&h=700', 'partial_date': datetime.date(2014, 9, 10), 'summary': 'Clinical Neuroscience Grand Rounds: "The Mechanism of Impaired Consciousness of Absence Seizures"', 'start': datetime.datetime(2014, 9, 10, 8, 0), 'location': 'Brady Auditorium, B-131', 'dtstart': '2014-09-10T08:00:00'}] uuids = set() # temperary set holds UID final=[] for i in a: if i['uid'] not in uuids: final.append(i) uuids.add(i['uid']) print final
Grouping data on year
mydata = [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009'}, {'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'}, {'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'}, {'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'}, {'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'}, {'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'}, {'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'}, {'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'}, {'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'}, {'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'}, {'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'}, {'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'}, {'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'}, {'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'}, {'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'}, {'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'}, {'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'}, {'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'}, {'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'}, {'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'},] In 'mydata', I get list of sequential monthly data. I wrote some code to group them on year. partial_req_data = dict([(k,[f for f in v]) for k,v in itertools.groupby(mydata, key=lambda x : x.get('year'))]) Now I further need some efficient code to fill the missing months with {}, i.e. empty dict. There are bad ways to do that, but am looking for good ones. required_data = {"2009": [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009' }, {'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'}, {'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'}, {'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'}, {'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'}, {'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'}, {'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'}, {}, {}, {}], "2008": [{'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'}, {'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'}, {'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'}, {'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'}, {'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'}, {'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'}, {'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'}, {'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'}, {'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'}, {'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'}, {'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'}, {'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'},] "2007": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'}] }
import datetime from itertools import groupby from pprint import pprint required_data={} for k,g in groupby(mydata,key=lambda x: x.get('year')): partial={} for datum in g: partial[datum.get('date').month]=datum required_data[k]=[partial.get(m,{}) for m in range(1,13)] pprint(required_data) For each year k, partial is a dict whose keys are months. The trick is to use partial.get(m,{}) since this will return the datum when it exists, or {} when it does not.