I want to split the following list
['1', '9', '8', '0', '9', '3', '7', '6', '5', '0', '8', '7', '3', '3', '0', '3', '0', '4', ':', '0', '0', '4', '5', '6', '4', '3', '0', '|', '8', '1', '1', '3', '0', '9', '1', '9', '4', '1', '8', '2', '6', '5', '6', '0', '1', '1', ':', '0', '0', '4', '5', '6', '0', '|', '8', '6', '7', '0', '5', '6', '0', '3', '5', '4', '4', '5', '7', '3', '5', '4', '4', '5', ':', '0', '0', '4', '5', '6', '6', '1', '0', '|', '8', '1', '7', '1', '4', '7', '3', '5', '2', '6', '1', '8', '8', '9', '3', '4', '0', '1', ':', '0', '0', '4', '5', '6', '3', '0', '|', '4', '0', '9', '2', '9', '2', '7', '0', '9', '3', '4', '2', '4', '7', '8', '3', '5', '8', ':', '0', '0', '4', '5', '6', '4', '9', '0', '|', '3', '1', '5', '0', '7', '9', '5', '9', '2', '9', '6', '6', '6', '1', '9', '1', '4', '7', ':', '0', '0', '4', '5', '6', '3', '0', '|', '5', '6', '3', '8', '1', '6', '8', '7', '7', '8', '0', '6', '0', '5', '9', '5', '2', '0', ':', '0', '0', '4', '5', '6', '6', '3', '0', '|', '2', '3', '7', '2', '5', '1', '8', '0', '7', '4', '0', '5', '9', '3', '2', '5', '6', '4', ':', '0', '0', '4', '5', '6', '4', '5', '0', '|', '1', '7', '2', '0', '0', '2', '2', '7', '5', '4', '1', '2', '2', '7', '9', '2', '9', '6', ':', '0', '0', '4', '5', '6', '2', '0', '|', '3', '0', '2', '0', '5', '0', '8', '7', '2', '3', '8', '3', '2', '4', '2', '2', '4', '0', ':', '0', '0', '4', '5', '6', '3', '1', '0', '|', '9', '4', '1', '4', '0', '2', '8', '4', '3', '2', '2', '1', '4', '8', '7', '7', '0', '6', ':', '0', '0', '4', '5', '6', '3', '9', '0', '|', '4', '6', '0', '0', '2', '3', '2', '4', '7', '2', '0', '3', '9', '9', '1', '5', '5', '2', ':', '0', '0', '4', '5', '6', '2', '0', '|', '4', '2', '3', '5', '3', '6', '6', '0', '6', '7', '0', '4', '8', '2', '8', '4', '1', '6', ':', '0', '0', '4', '5', '6', '4', '7', '0']
Into another list called userIDs where the 18 first characters (the IDs) can be put into a list with the smaller numbers until the | symbol.
Example:
[[198093765087330304,00456430],[(next ID),(next smaller number)]]
You can use a nested list comprehension:
>>> [part.split(":") for part in "".join(my_list).split("|")]
[['198093765087330304', '00456430'], ['811309194182656011', '004560'],
['867056035445735445', '00456610'], ['817147352618893401', '0045630'],
['409292709342478358', '00456490'], ['315079592966619147', '0045630'],
['563816877806059520', '00456630'], ['237251807405932564', '00456450'],
['172002275412279296', '0045620'], ['302050872383242240', '00456310'],
['941402843221487706', '00456390'], ['460023247203991552', '0045620'],
['423536606704828416', '00456470']]
My solution puts your list into one string and then splits it twice, first with "|" and then with ":"
my_list = ['1', '9', '8', '0', '9', '3', '7', '6', '5', '0', '8', '7', '3', '3', '0', '3', '0', '4', ':', '0', '0', '4', '5', '6', '4', '3', '0', '|', '8', '1', '1', '3', '0', '9', '1', '9', '4', '1', '8', '2', '6', '5', '6', '0', '1', '1', ':', '0', '0', '4', '5', '6', '0', '|', '8', '6', '7', '0', '5', '6', '0', '3', '5', '4', '4', '5', '7', '3', '5', '4', '4', '5', ':', '0', '0', '4', '5', '6', '6', '1', '0', '|', '8', '1', '7', '1', '4', '7', '3', '5', '2', '6', '1', '8', '8', '9', '3', '4', '0', '1', ':', '0', '0', '4', '5', '6', '3', '0', '|', '4', '0', '9', '2', '9', '2', '7', '0', '9', '3', '4', '2', '4', '7', '8', '3', '5', '8', ':', '0', '0', '4', '5', '6', '4', '9', '0', '|', '3', '1', '5', '0', '7', '9', '5', '9', '2', '9', '6', '6', '6', '1', '9', '1', '4', '7', ':', '0', '0', '4', '5', '6', '3', '0', '|', '5', '6', '3', '8', '1', '6', '8', '7', '7', '8', '0', '6', '0', '5', '9', '5', '2', '0', ':', '0', '0', '4', '5', '6', '6', '3', '0', '|', '2', '3', '7', '2', '5', '1', '8', '0', '7', '4', '0', '5', '9', '3', '2', '5', '6', '4', ':', '0', '0', '4', '5', '6', '4', '5', '0', '|', '1', '7', '2', '0', '0', '2', '2', '7', '5', '4', '1', '2', '2', '7', '9', '2', '9', '6', ':', '0', '0', '4', '5', '6', '2', '0', '|', '3', '0', '2', '0', '5', '0', '8', '7', '2', '3', '8', '3', '2', '4', '2', '2', '4', '0', ':', '0', '0', '4', '5', '6', '3', '1', '0', '|', '9', '4', '1', '4', '0', '2', '8', '4', '3', '2', '2', '1', '4', '8', '7', '7', '0', '6', ':', '0', '0', '4', '5', '6', '3', '9', '0', '|', '4', '6', '0', '0', '2', '3', '2', '4', '7', '2', '0', '3', '9', '9', '1', '5', '5', '2', ':', '0', '0', '4', '5', '6', '2', '0', '|', '4', '2', '3', '5', '3', '6', '6', '0', '6', '7', '0', '4', '8', '2', '8', '4', '1', '6', ':', '0', '0', '4', '5', '6', '4', '7', '0']
s = '' # define string
for element in my_list: # for each element in list add it to a string
s += element
s = s.split(sep="|") # split the string with '|' separator
for i in range(len(s)):
s[i] = s[i].split(sep=':') # split each element of list with ':' separator
print(list(s))
Result: [['198093765087330304', '00456430'], ['811309194182656011', '004560'], ['867056035445735445', '00456610'], ['817147352618893401', '0045630'], [...
q = ['1', '9', '8', '0', '9', '3', '7', '6', '5', '0', '8', '7', '3', '3', '0', '3', '0', '4', ':', '0', '0', '4', '5', '6', '4', '3', '0', '|', '8', '1', '1', '3', '0', '9', '1', '9', '4', '1', '8', '2', '6', '5', '6', '0', '1', '1', ':', '0', '0', '4', '5', '6', '0', '|', '8', '6', '7', '0', '5', '6', '0', '3', '5', '4', '4', '5', '7', '3', '5', '4', '4', '5', ':', '0', '0', '4', '5', '6', '6', '1', '0', '|', '8', '1', '7', '1', '4', '7', '3', '5', '2', '6', '1', '8', '8', '9', '3', '4', '0', '1', ':', '0', '0', '4', '5', '6', '3', '0', '|', '4', '0', '9', '2', '9', '2', '7', '0', '9', '3', '4', '2', '4', '7', '8', '3', '5', '8', ':', '0', '0', '4', '5', '6', '4', '9', '0', '|', '3', '1', '5', '0', '7', '9', '5', '9', '2', '9', '6', '6', '6', '1', '9', '1', '4', '7', ':', '0', '0', '4', '5', '6', '3', '0', '|', '5', '6', '3', '8', '1', '6', '8', '7', '7', '8', '0', '6', '0', '5', '9', '5', '2', '0', ':', '0', '0', '4', '5', '6', '6', '3', '0', '|', '2', '3', '7', '2', '5', '1', '8', '0', '7', '4', '0', '5', '9', '3', '2', '5', '6', '4', ':', '0', '0', '4', '5', '6', '4', '5', '0', '|', '1', '7', '2', '0', '0', '2', '2', '7', '5', '4', '1', '2', '2', '7', '9', '2', '9', '6', ':', '0', '0', '4', '5', '6', '2', '0', '|', '3', '0', '2', '0', '5', '0', '8', '7', '2', '3', '8', '3', '2', '4', '2', '2', '4', '0', ':', '0', '0', '4', '5', '6', '3', '1', '0', '|', '9', '4', '1', '4', '0', '2', '8', '4', '3', '2', '2', '1', '4', '8', '7', '7', '0', '6', ':', '0', '0', '4', '5', '6', '3', '9', '0', '|', '4', '6', '0', '0', '2', '3', '2', '4', '7', '2', '0', '3', '9', '9', '1', '5', '5', '2', ':', '0', '0', '4', '5', '6', '2', '0', '|', '4', '2', '3', '5', '3', '6', '6', '0', '6', '7', '0', '4', '8', '2', '8', '4', '1', '6', ':', '0', '0', '4', '5', '6', '4', '7', '0']
you can do this series of operations:
First, you convert everything into a string, replace colons with commas and then split at |. Then pack each individual entry into a separate list.
[[a] for a in ''.join(q).replace(':', ',').split('|')]
Edit:
finding first entry only:
[[int(a.split(':')[0])] for a in ''.join(q).split('|')]
I have a list which has numbers, but because its appended to a list using for loop it has '\n' and I don't know how to remove it.
the list looks like this
['3', '7', '4', '5', '5', '9', '2', '2', '7', '\n', '4', '3', '7', '1', '5', '9', '4', '3', '0', '\n', '3', '7', '2', '4', '1', '0', '2', '7', '5', '\n', '7', '8', '4', '5', '1', '6', '2', '5', '7', '\n', '2', '8', '0', '6', '6', '1', '1', '2', '3', '\n', '9', '3', '5', '6', '8', '3', '8', '7', '1', '\n', '6', '7', '5', '5', '4', '7', '4', '8', '6']
I want to remove ' ' and '\n' so it would look like this
[374559227,437159430,372410275,784516257,280661123,935683871,675547486]
Join to a string and split the newlines:
l = [
'3', '7', '4', '5', '5', '9', '2', '2', '7', '\n', '4', '3', '7', '1', '5',
'9', '4', '3', '0', '\n', '3', '7', '2', '4', '1', '0', '2', '7', '5', '\n',
'7', '8', '4', '5', '1', '6', '2', '5', '7', '\n', '2', '8', '0', '6', '6',
'1', '1', '2', '3', '\n', '9', '3', '5', '6', '8', '3', '8', '7', '1', '\n',
'6', '7', '5', '5', '4', '7', '4', '8', '6'
]
print([int(x) for x in ''.join(l).split('\n')])
>>> [374559227, 437159430, 372410275, 784516257, 280661123, 935683871, 675547486]
You can use itertools.groupby:
>>> from itertools import groupby
>>> lst = ['3', '7', '4', '5', '5', '9', '2', '2', '7', '\n', '4', '3', '7', '1', '5', '9', '4', '3', '0', '\n', '3', '7', '2', '4', '1', '0', '2', '7', '5', '\n', '7', '8', '4', '5', '1', '6', '2', '5', '7', '\n', '2', '8', '0', '6', '6', '1', '1', '2', '3', '\n', '9', '3', '5', '6', '8', '3', '8', '7', '1', '\n', '6', '7', '5', '5', '4', '7', '4', '8', '6']
>>> [int(''.join(digits)) for is_number, digits in groupby(lst, lambda x: x != '\n') if is_number]
[374559227, 437159430, 372410275, 784516257, 280661123, 935683871, 675547486]
You can use reduce function
from functools import reduce
lst = ['3', '7', '4', '5', '5', '9', '2', '2', '7', '\n', '4', '3', '7', '1', '5', '9', '4', '3', '0', '\n', '3', '7', '2', '4', '1', '0', '2', '7', '5', '\n', '7', '8', '4', '5', '1', '6', '2', '5', '7', '\n', '2', '8', '0', '6', '6', '1', '1', '2', '3', '\n', '9', '3', '5', '6', '8', '3', '8', '7', '1', '\n', '6', '7', '5', '5', '4', '7', '4', '8', '6']
lst_result = [int(n) for n in reduce(lambda x, y: f"{x}{y}", lst).split('\n')]
Output:
[374559227, 437159430, 372410275, 784516257, 280661123, 935683871, 675547486]
I am reading a csv file from pandas where I have a column of (3,3) shaped lists.
An example list is as follows.
[[45.70345721, -0.00014686, -1.679e-05], [-0.00012219, 45.70271889, 0.00012527], [-1.161e-05, 0.00013083, 45.70306778]]
I tried to convert this list to a numpy float array with np.array(arr).astype(np.float). But it gives the following error.
ValueError: could not convert string to float:
When I searched for the root cause I observed that this list is in fully string format. print [i for i in arr] gives the following where everything is a string.
['[', '[', '4', '5', '.', '7', '0', '3', '4', '5', '7', '2', '1', ',', ' ', '-', '0', '.', '0', '0', '0', '1', '4', '6', '8', '6', ',', ' ', '-', '1', '.', '6', '7', '9', 'e', '-', '0', '5', ']', ',', ' ', '[', '-', '0', '.', '0', '0', '0', '1', '2', '2', '1', '9', ',', ' ', '4', '5', '.', '7', '0', '2', '7', '1', '8', '8', '9', ',', ' ', '0', '.', '0', '0', '0', '1', '2', '5', '2', '7', ']', ',', ' ', '[', '-', '1', '.', '1', '6', '1', 'e', '-', '0', '5', ',', ' ', '0', '.', '0', '0', '0', '1', '3', '0', '8', '3', ',', ' ', '4', '5', '.', '7', '0', '3', '0', '6', '7', '7', '8', ']', ']']
How do I convert this list to a numpy float array?
EDIT
Here is a snap of a part of my data frame.
When loaded, the data frame is in the below format. df here is a small example data frame.
df = pd.DataFrame(columns=["e_total"], data=[[['[', '[', '4', '5', '.', '7', '0', '3', '4', '5', '7', '2', '1', ',', ' ', '-', '0', '.', '0', '0', '0', '1', '4', '6', '8', '6', ',', ' ', '-', '1', '.', '6', '7', '9', 'e', '-', '0', '5', ']', ',', ' ', '[', '-', '0', '.', '0', '0', '0', '1', '2', '2', '1', '9', ',', ' ', '4', '5', '.', '7', '0', '2', '7', '1', '8', '8', '9', ',', ' ', '0', '.', '0', '0', '0', '1', '2', '5', '2', '7', ']', ',', ' ', '[', '-', '1', '.', '1', '6', '1', 'e', '-', '0', '5', ',', ' ', '0', '.', '0', '0', '0', '1', '3', '0', '8', '3', ',', ' ', '4', '5', '.', '7', '0', '3', '0', '6', '7', '7', '8', ']', ']']]])
Could someone give it a try and help me to convert this to a float array.
You can probably use eval() to turn the entire string into an actual list. eval() is generally not good to use, but in this case it might be your best bet.
What you listed as your "example" is not correct. You are listing the result of your print statement and list comprehension. What is being stored as an entry for that column is a string.
you should be able to simply take each item and wrap it in eval
eval(arr)
that should return you a shape (3,3) python list. From there you can convert it to a numpy array as necessary and change the types.
Aren't the numbers in the lists already floats? If that is the case just making the list an np.array will do what you are asking. You only need to do
np.array(list)
if the numbers are actually strings like you are showing in the second part you will have to go through the list and convert each number individually using either a nest loop or nested list comprehension.
the loop looks like this
for i in list:
for j in i:
j= np.float(j)
the list comprehension looks like
new_list= [ [np.float(j) for j in i] for i in list]
I'm using the following script to grab all the files in a directory, then filtering them based on their modified date.
dir = '/tmp/whatever'
dir_files = os.listdir(dir)
dir_files.sort(key=lambda x: os.stat(os.path.join(dir, x)).st_mtime)
files = []
for f in dir_files:
t = os.path.getmtime(dir + '/' + f)
c = os.path.getctime(dir + '/' + f)
mod_time = datetime.datetime.fromtimestamp(t)
created_time = datetime.datetime.fromtimestamp(c)
if mod_time >= form.cleaned_data['start'].replace(tzinfo=None) and mod_time <= form.cleaned_data['end'].replace(tzinfo=None):
files.append(f)
return by_hour
I'm need to go one step further and group the files by the hour in which they where modified. Does anyone know how to do this off the top of their head?
UPDATE: I'd like to have them in a dictionary ({date,hour,files})
UPDATED:
Thanks for all your replies!. I tried using the response from david, but when I output the result it looks like below (ie. it's breaking up the filename):
defaultdict(<type 'list'>, {datetime.datetime(2013, 1, 9, 15, 0): ['2', '8', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '5', '1', '8', '4', '3', '.', 'a', 'v', 'i', '2', '9', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '5', '2', '0', '2', '4', '.', 'a', 'v', 'i', '3', '0', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '5', '3', '8', '5', '9', '.', 'a', 'v', 'i', '3', '1', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '5', '4', '1', '2', '4', '.', 'a', 'v', 'i', '3', '2', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '5', '5', '3', '1', '0', '.', 'a', 'v', 'i', '3', '3', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '5', '5', '5', '5', '8', '.', 'a', 'v', 'i'], datetime.datetime(2013, 1, 9, 19, 0): ['6', '1', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '9', '0', '1', '1', '8', '.', 'a', 'v', 'i', '6', '2', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '9', '0', '6', '3', '1', '.', 'a', 'v', 'i', '6', '3', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '9', '1', '4', '1', '5', '.', 'a', 'v', 'i', '6', '4', '-', '2', '0', '1', '3', '0', '1', '0', '9', '1', '9', '2', '2', '3', '3', '.', 'a', 'v', 'i']})
I was hoping to get it to store the complete file names. Also how would I loop over it and grab the files in each hour and the hour they belong to?
I managed to sort the above out by just changing it to append. However it's not sorted from the oldest hour to the most recent.
Many thanks,
Ben
You can round a datetime object to the nearest hour with the line:
mod_hour = datetime.datetime(*mod_time.timetuple()[:4])
(This is because mod_time.timetuple()[:4] returns a tuple like (2013, 1, 8, 21). Thus, using a collections.defaultdict to keep a dictionary of lists:
import collections
by_hour = collections.defaultdict(list)
for f in dir_files:
t = os.path.getmtime(dir + '/' + f)
mod_time = datetime.datetime.fromtimestamp(t)
mod_hour = datetime.datetime(*mod_time.timetuple()[:4])
# for example, (2013, 1, 8, 21)
by_hour[mod_hour].append(f)
import os, datetime, operator
dir = "Your_dir_path"
by_hour =sorted([(f,datetime.datetime.fromtimestamp(os.path.getmtime(os.path.join(dir , f)))) for f in os.listdir(dir)],key=operator.itemgetter(1), reverse=True)
above code will give sorting based on year-->month-->day-->hour-->min-->sec format.
Building on David's excellent answer, you can use itertools.groupby to simplify the work a little bit:
import os, itertools, datetime
dir = '/tmp/whatever'
mtime = lambda f : datetime.datetime.fromtimestamp(os.path.getmtime(dir + '/' + f))
mtime_hour = lambda f: datetime.datetime(*mtime(f).timetuple()[:4])
dir_files = sorted(os.listdir(dir), key=mtime)
dir_files = filter(lambda f: datetime.datetime(2012,1,2,4) < mtime(f) < datetime.datetime(2012,12,1,4), dir_files)
by_hour = dict((k,list(v)) for k,v in itertools.groupby(dir_files, key=mtime_hour)) #python 2.6
#by_hour = {k:list(v) for k,v in itertools.groupby(dir_files, key=mtime_hour)} #python 2.7
Build entries lazily, Use UTC timezone, read modification time only once:
#!/usr/bin/env python
import os
from collections import defaultdict
from datetime import datetime
HOUR = 3600 # seconds in an hour
dirpath = "/path/to/dir"
start, end = datetime(...), datetime(...)
# get full paths for all entries in dirpath
entries = (os.path.join(dirpath, name) for name in os.listdir(dirpath))
# add modification time truncated to hour
def date_and_hour(path):
return datetime.utcfromtimestamp(os.path.getmtime(path) // HOUR * HOUR)
entries = ((date_and_hour(path), path) for path in entries)
# filter by date range: [start, end)
entries = ((mtime, path) for mtime, path in entries if start <= mtime < end)
# group by hour
result = defaultdict(list)
for dt, path in entries:
result[dt].append(path)
from pprint import pprint
pprint(dict(result))