I want to generate and keep a set of tuples in a certain time. Yet I found the program seemed to consume all the memory if given enough time.
I have tried two methods. One is delete the newly generated variables, the other is gc.collect(). But neither of them worked. If I just generate and not keep the tuples, the program would consume limited memory.
generate and keep: gk.py
import gc
import time
from memory_profiler import profile
from random import sample
from sys import getsizeof
#profile
def loop(limit):
t = time.time()
i = 0
A = set()
while True:
i += 1
duration = time.time() - t
a = tuple(sorted(sample(range(200), 100)))
A.add(a)
if not i % int(1e4):
print('step {:.2e}...'.format(i))
if duration > limit:
print('done')
break
# method 1: delete the variables
# del duration, a
# method 2: use gc
# gc.collect()
memory = getsizeof(t) + getsizeof(i) + getsizeof(duration) + \
getsizeof(a) + getsizeof(limit) + getsizeof(A)
print('memory consumed: {:.2e}MB'.format(memory/2**20))
pass
def main():
limit = 300
loop(limit)
pass
if __name__ == '__main__':
print('running...')
main()
generate and not keep: gnk.py
import time
from memory_profiler import profile
from random import sample
from sys import getsizeof
#profile
def loop(limit):
t = time.time()
i = 0
while True:
i += 1
duration = time.time() - t
a = tuple(sorted(sample(range(200), 100)))
if not i % int(1e4):
print('step {:.2e}...'.format(i))
if duration > limit:
print('done')
break
memory = getsizeof(t) + getsizeof(i) + getsizeof(duration) + \
getsizeof(a) + getsizeof(limit)
print('memory consumed: {:.2e}MB'.format(memory/2**20))
pass
def main():
limit = 300
loop(limit)
pass
if __name__ == '__main__':
print('running...')
main()
use "mprof" (needs module memory_profiler) in cmd/shell to check memory usage
mprof run my_file.py
mprof plot
result of gk.py
memory consumed: 4.00e+00MB
Filename: gk.py
Line # Mem usage Increment Line Contents
================================================
12 32.9 MiB 32.9 MiB #profile
13 def loop(limit):
14 32.9 MiB 0.0 MiB t = time.time()
15 32.9 MiB 0.0 MiB i = 0
16 32.9 MiB 0.0 MiB A = set()
17 32.9 MiB 0.0 MiB while True:
18 115.8 MiB 0.0 MiB i += 1
19 115.8 MiB 0.0 MiB duration = time.time() - t
20 115.8 MiB 0.3 MiB a = tuple(sorted(sample(range(200), 100)))
21 115.8 MiB 2.0 MiB A.add(a)
22 115.8 MiB 0.0 MiB if not i % int(1e4):
23 111.8 MiB 0.0 MiB print('step {:.2e}...'.format(i))
24 115.8 MiB 0.0 MiB if duration > limit:
25 115.8 MiB 0.0 MiB print('done')
26 115.8 MiB 0.0 MiB break
27 # method 1: delete the variables
28 # del duration, a
29 # method 2: use gc
30 # gc.collect()
31 memory = getsizeof(t) + getsizeof(i) + getsizeof(duration) + \
32 115.8 MiB 0.0 MiB getsizeof(a) + getsizeof(limit) + getsizeof(A)
33 115.8 MiB 0.0 MiB print('memory consumed: {:.2e}MB'.format(memory/2**20))
34 115.8 MiB 0.0 MiB pass
result of gnk.py
memory consumed: 9.08e-04MB
Filename: gnk.py
Line # Mem usage Increment Line Contents
================================================
11 33.0 MiB 33.0 MiB #profile
12 def loop(limit):
13 33.0 MiB 0.0 MiB t = time.time()
14 33.0 MiB 0.0 MiB i = 0
15 33.0 MiB 0.0 MiB while True:
16 33.0 MiB 0.0 MiB i += 1
17 33.0 MiB 0.0 MiB duration = time.time() - t
18 33.0 MiB 0.1 MiB a = tuple(sorted(sample(range(200), 100)))
19 33.0 MiB 0.0 MiB if not i % int(1e4):
20 33.0 MiB 0.0 MiB print('step {:.2e}...'.format(i))
21 33.0 MiB 0.0 MiB if duration > limit:
22 33.0 MiB 0.0 MiB print('done')
23 33.0 MiB 0.0 MiB break
24 memory = getsizeof(t) + getsizeof(i) + getsizeof(duration) + \
25 33.0 MiB 0.0 MiB getsizeof(a) + getsizeof(limit)
26 33.0 MiB 0.0 MiB print('memory consumed: {:.2e}MB'.format(memory/2**20))
27 33.0 MiB 0.0 MiB pass
I have two problems:
both the programs consumed more memory than the variables occupied. "gk.py" consumed 115.8MB, its variables occupied 4.00MB. "gnk.py" consumed 33.0MB, its variables occupied 9.08e-04MB. Why the programs consumed more memory than the corresponding variables occupied?
memory that "gk.py" consumed increases linearly with time. memory that "gnk.py" consumed remains constantly with time. Why does this happen?
Any help would be appreciated.
Given that the size of the set is being constantly increased, there will be a time when it will eventually consume all memory.
An estimative (from my computer):
10 seconds of code running ~ 5e4 tuples saved to the set
300 seconds of code running ~ 1.5e6 tuples saved to the set
1 tuple = 100 integers ~ 400bytes
total:
1.5e6 * 400bytes = 6e8bytes = 600MB filled in 300s
Related
I'm using concurrent.futures.ThreadPoolExecutor to do multithreading tasks. The memory usage is very high and doesn't get released after the job is finished. I use memory_profiler to track the memory usage. Here is my test code and output.
import gc
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from memory_profiler import profile
import sys
import time
from concurrent.futures import ThreadPoolExecutor
def do():
a = [1] * (4096*1023)
return sum(a)
#profile
def main():
threadpool = ThreadPoolExecutor(max_workers=60)
tt = time.time()
jobs = []
for x in range(1000):
jobs.append(threadpool.submit(do,))
rst = [j.result() for j in jobs]
print(time.time()-tt)
return None
Line # Mem usage Increment Occurrences Line Contents
=============================================================
18 51.2 MiB 51.2 MiB 1 #profile
19 def main():
20 51.2 MiB 0.0 MiB 1 threadpool = ThreadPoolExecutor(max_workers=60)
21
22 51.2 MiB 0.0 MiB 1 tt = time.time()
23 51.2 MiB 0.0 MiB 1 jobs = []
24 404.2 MiB 0.0 MiB 1001 for x in range(1000):
25 404.2 MiB 353.0 MiB 1000 jobs.append(threadpool.submit(do,))
26 404.2 MiB 0.0 MiB 1003 rst = [j.result() for j in jobs]
27 404.2 MiB 0.0 MiB 1 print(time.time()-tt)
28 404.2 MiB 0.0 MiB 1 return None
As showed in the stats, the memory did not get released after the jobs are finished. However, if we change the do function into
def do():
a = [1] * (4096*1024) # <- Increase the size of list a
return sum(a)
The memory will be released correctly. The stats are:
Line # Mem usage Increment Occurrences Line Contents
=============================================================
18 51.4 MiB 51.4 MiB 1 #profile
19 def main():
20 51.4 MiB 0.0 MiB 1 threadpool = ThreadPoolExecutor(max_workers=60)
21
22 51.4 MiB 0.0 MiB 1 tt = time.time()
23 51.4 MiB 0.0 MiB 1 jobs = []
24 116.4 MiB -116311.3 MiB 1001 for x in range(1000):
25 244.3 MiB -48019.8 MiB 1000 jobs.append(threadpool.submit(do,))
26 53.5 MiB -62.9 MiB 1003 rst = [j.result() for j in jobs]
27 53.5 MiB 0.0 MiB 1 print(time.time()-tt)
28 53.5 MiB 0.0 MiB 1 return None
It seems there is a obj size threshold to control whether the memory will get released. I wonder what's the rule behind this? Also, if I want to make the memory released for all cases, what should I do?
Edit:
About my environment, I'm using a Intel(R) Xeon(R) Platinum 8260 CPU with 16GB RAM, OS is Debian 9 linux 4.14, Python version is 3.8.12, and I'm using Anaconda to manage my Python environment.
How can you profile a python module that use multiprocessing (multiprocessing.Pool.map) so each spawned process will be also profiled line by line.
Currently I use line_profiler for profiling but it doesn't support multiprocessing.
Is there a way to do it manually? Or maybe use some other tool?
The normal way of using line_profiler of adding #profile to the function being profiled and running kernprof -v -l script.py leads to the following error for multiprocessing:
Can't pickle <class '__main__.Worker'>: attribute lookup Worker on __main__ failed.
To fix this, we have to setup the line_profiler ourselves in the sub-process we want to profile, rather than doing it globally via kernelprof:
import multiprocessing as mp
import line_profiler
class Worker(mp.Process):
def run(self):
prof = line_profiler.LineProfiler()
# Wrap all functions that you want to be profiled in this process
# These can be global functions or any class methods
# Make sure to replace instance methods on a class level, not the bound methods self.run2
Worker.run2 = prof(Worker.run2)
...
# run the main
self.run2()
# store stats in separate file for each process
prof.dump_stats('worker.lprof')
def run2(self):
# real run method renamed
...
Now running the script this generates a profile file that we can then visualize with:
python -m line_profiler worker.lprof
you could use memory_profiler like this
from memory_profiler import profile
import multiprocessing as mp
import time, psutil, gc, os
#profile(precision=4)
def array_ops(num):
gc.collect()
size1 = 10 ** num
size2 = 20 ** (num+1)
x = [1] * size1
y = [2] * size2
y *= 2
del y
gc.collect()
z = x * 2
gc.collect()
return x
if __name__ == '__main__':
num_workers = 3
pool = mp.Pool(num_workers)
pool.map(array_ops, [4,5,6])
pool.close()
pool.join()
This is a sample output
Line # Mem usage Increment Line Contents
================================================
6 34.4258 MiB 34.4258 MiB #profile(precision=4)
7 def array_ops(num):
8 34.4258 MiB 0.0000 MiB gc.collect()
9 34.4258 MiB 0.0000 MiB size1 = 10 ** num
10 34.4258 MiB 0.0000 MiB size2 = 20 ** (num+1)
11 34.5586 MiB 0.1328 MiB x = [1] * size1
12 58.7852 MiB 24.2266 MiB y = [2] * size2
13 83.2539 MiB 24.4688 MiB y *= 2
14 34.6055 MiB 0.0000 MiB del y
15 34.6055 MiB 0.0000 MiB gc.collect()
16 34.6055 MiB 0.0000 MiB z = x * 2
17 34.6055 MiB 0.0000 MiB gc.collect()
18 34.6055 MiB 0.0000 MiB return x
Filename: array_ops.py
Line # Mem usage Increment Line Contents
================================================
6 34.4258 MiB 34.4258 MiB #profile(precision=4)
7 def array_ops(num):
8 34.4258 MiB 0.0000 MiB gc.collect()
9 34.4258 MiB 0.0000 MiB size1 = 10 ** num
10 34.4258 MiB 0.0000 MiB size2 = 20 ** (num+1)
11 35.0820 MiB 0.6562 MiB x = [1] * size1
12 523.3711 MiB 488.2891 MiB y = [2] * size2
13 1011.6172 MiB 488.2461 MiB y *= 2
14 35.2969 MiB 0.0000 MiB del y
15 35.2969 MiB 0.0000 MiB gc.collect()
16 36.5703 MiB 1.2734 MiB z = x * 2
17 36.5703 MiB 0.0000 MiB gc.collect()
18 36.8242 MiB 0.2539 MiB return x
Filename: array_ops.py
Line # Mem usage Increment Line Contents
================================================
6 34.4258 MiB 34.4258 MiB #profile(precision=4)
7 def array_ops(num):
8 34.4258 MiB 0.0000 MiB gc.collect()
9 34.4258 MiB 0.0000 MiB size1 = 10 ** num
10 34.4258 MiB 0.0000 MiB size2 = 20 ** (num+1)
11 42.0391 MiB 7.6133 MiB x = [1] * size1
12 9807.7109 MiB 9765.6719 MiB y = [2] * size2
13 19573.2109 MiB 9765.5000 MiB y *= 2
14 42.1641 MiB 0.0000 MiB del y
15 42.1641 MiB 0.0000 MiB gc.collect()
16 57.3594 MiB 15.1953 MiB z = x * 2
17 57.3594 MiB 0.0000 MiB gc.collect()
18 57.3594 MiB 0.0000 MiB return x
I am writing a simple application which splits a large text file into smaller files, and i have written 2 versions of it, one using lists and one using generators. I profiled both the versions using memory_profiler module and it clearly showed the better memory efficiency of the generators version, however strangely enough when then version using generators was profiled, it increases the execution time. Below demonstration explains what i mean
Version using Lists
from memory_profiler import profile
#profile()
def main():
file_name = input("Enter the full path of file you want to split into smaller inputFiles: ")
input_file = open(file_name).readlines()
num_lines_orig = len(input_file)
parts = int(input("Enter the number of parts you want to split in: "))
output_files = [(file_name + str(i)) for i in range(1, parts + 1)]
st = 0
p = int(num_lines_orig / parts)
ed = p
for i in range(parts-1):
with open(output_files[i], "w") as OF:
OF.writelines(input_file[st:ed])
st = ed
ed = st + p
with open(output_files[-1], "w") as OF:
OF.writelines(input_file[st:])
if __name__ == "__main__":
main()
when run with profiler
$ time py36 Splitting\ text\ files_BAD_usingLists.py
Enter the full path of file you want to split into smaller inputFiles: /apps/nttech/rbhanot/Downloads/test.txt
Enter the number of parts you want to split in: 3
Filename: Splitting text files_BAD_usingLists.py
Line # Mem usage Increment Line Contents
================================================
6 47.8 MiB 0.0 MiB #profile()
7 def main():
8 47.8 MiB 0.0 MiB file_name = input("Enter the full path of file you want to split into smaller inputFiles: ")
9 107.3 MiB 59.5 MiB input_file = open(file_name).readlines()
10 107.3 MiB 0.0 MiB num_lines_orig = len(input_file)
11 107.3 MiB 0.0 MiB parts = int(input("Enter the number of parts you want to split in: "))
12 107.3 MiB 0.0 MiB output_files = [(file_name + str(i)) for i in range(1, parts + 1)]
13 107.3 MiB 0.0 MiB st = 0
14 107.3 MiB 0.0 MiB p = int(num_lines_orig / parts)
15 107.3 MiB 0.0 MiB ed = p
16 108.1 MiB 0.7 MiB for i in range(parts-1):
17 107.6 MiB -0.5 MiB with open(output_files[i], "w") as OF:
18 108.1 MiB 0.5 MiB OF.writelines(input_file[st:ed])
19 108.1 MiB 0.0 MiB st = ed
20 108.1 MiB 0.0 MiB ed = st + p
21
22 108.1 MiB 0.0 MiB with open(output_files[-1], "w") as OF:
23 108.1 MiB 0.0 MiB OF.writelines(input_file[st:])
real 0m6.115s
user 0m0.764s
sys 0m0.052s
When run without profiler
$ time py36 Splitting\ text\ files_BAD_usingLists.py
Enter the full path of file you want to split into smaller inputFiles: /apps/nttech/rbhanot/Downloads/test.txt
Enter the number of parts you want to split in: 3
real 0m5.916s
user 0m0.696s
sys 0m0.080s
Now the one using generators
#profile()
def main():
file_name = input("Enter the full path of file you want to split into smaller inputFiles: ")
input_file = open(file_name)
num_lines_orig = sum(1 for _ in input_file)
input_file.seek(0)
parts = int(input("Enter the number of parts you want to split in: "))
output_files = ((file_name + str(i)) for i in range(1, parts + 1))
st = 0
p = int(num_lines_orig / parts)
ed = p
for i in range(parts-1):
file = next(output_files)
with open(file, "w") as OF:
for _ in range(st, ed):
OF.writelines(input_file.readline())
st = ed
ed = st + p
if num_lines_orig - ed < p:
ed = st + (num_lines_orig - ed) + p
else:
ed = st + p
file = next(output_files)
with open(file, "w") as OF:
for _ in range(st, ed):
OF.writelines(input_file.readline())
if __name__ == "__main__":
main()
When run with profiler option
$ time py36 -m memory_profiler Splitting\ text\ files_GOOD_usingGenerators.py
Enter the full path of file you want to split into smaller inputFiles: /apps/nttech/rbhanot/Downloads/test.txt
Enter the number of parts you want to split in: 3
Filename: Splitting text files_GOOD_usingGenerators.py
Line # Mem usage Increment Line Contents
================================================
4 47.988 MiB 0.000 MiB #profile()
5 def main():
6 47.988 MiB 0.000 MiB file_name = input("Enter the full path of file you want to split into smaller inputFiles: ")
7 47.988 MiB 0.000 MiB input_file = open(file_name)
8 47.988 MiB 0.000 MiB num_lines_orig = sum(1 for _ in input_file)
9 47.988 MiB 0.000 MiB input_file.seek(0)
10 47.988 MiB 0.000 MiB parts = int(input("Enter the number of parts you want to split in: "))
11 48.703 MiB 0.715 MiB output_files = ((file_name + str(i)) for i in range(1, parts + 1))
12 47.988 MiB -0.715 MiB st = 0
13 47.988 MiB 0.000 MiB p = int(num_lines_orig / parts)
14 47.988 MiB 0.000 MiB ed = p
15 48.703 MiB 0.715 MiB for i in range(parts-1):
16 48.703 MiB 0.000 MiB file = next(output_files)
17 48.703 MiB 0.000 MiB with open(file, "w") as OF:
18 48.703 MiB 0.000 MiB for _ in range(st, ed):
19 48.703 MiB 0.000 MiB OF.writelines(input_file.readline())
20
21 48.703 MiB 0.000 MiB st = ed
22 48.703 MiB 0.000 MiB ed = st + p
23 48.703 MiB 0.000 MiB if num_lines_orig - ed < p:
24 48.703 MiB 0.000 MiB ed = st + (num_lines_orig - ed) + p
25 else:
26 48.703 MiB 0.000 MiB ed = st + p
27
28 48.703 MiB 0.000 MiB file = next(output_files)
29 48.703 MiB 0.000 MiB with open(file, "w") as OF:
30 48.703 MiB 0.000 MiB for _ in range(st, ed):
31 48.703 MiB 0.000 MiB OF.writelines(input_file.readline())
real 1m48.071s
user 1m13.144s
sys 0m19.652s
When run without profiler
$ time py36 Splitting\ text\ files_GOOD_usingGenerators.py
Enter the full path of file you want to split into smaller inputFiles: /apps/nttech/rbhanot/Downloads/test.txt
Enter the number of parts you want to split in: 3
real 0m10.429s
user 0m3.160s
sys 0m0.016s
So why profiling is making my code slow first of all ? Secondly if at profiling impacts execution speed, then why this effect is not showing on version of the code using lists.
I cpu_profiled the code using line_profiler and i got the answer this time, the reason why generator's version takes more time is because of below lines
19 2 11126.0 5563.0 0.2 with open(file, "w") as OF:
20 379886 200418.0 0.5 3.0 for _ in range(st, ed):
21 379884 2348653.0 6.2 35.1 OF.writelines(input_file.readline())
And why it does not slows down for lists version is because
19 2 9419.0 4709.5 0.4 with open(output_files[i], "w") as OF:
20 2 1654165.0 827082.5 65.1 OF.writelines(input_file[st:ed])
For lists, the new file is being written by simply taking a copy of the list by slicing it and that is infact a single statement. However for generators version, the new file is being populated by reading the input file line by line and this makes the memory profiler profile for every single line which amounts to increased cpu time.
I am using a python memory profiler and at top of every function, I am using #profile on top of function to analyze the memory consumption of the function.But every time I refresh the same page the size of my function always increases.Why is it so I don't know.
I tried using python garbage collector but that has no impact.An example of this i am pasting here.
Line # Mem usage Increment Line Contents
================================================
27 83.2 MiB 83.2 MiB #login_required
28 #profile
29 def app_user_detail(request, slug=None):
30 83.3 MiB 0.1 MiB university_obj = Universities.objects.using('cms').filter(deleted=0, status=1, verified=1)
31 83.3 MiB 0.0 MiB ids = [4, 5]
32 83.3 MiB 0.0 MiB master_user_types = MasterUserTypes.objects.using("cms").filter(~Q(id__in=ids)).all()
33 83.3 MiB 0.0 MiB gc.isenabled()
34 83.3 MiB 0.0 MiB gc.collect()
35 83.3 MiB 0.0 MiB return render(request, 'templates/news_managment/news_dashboard_detail.html',
36 89.0 MiB 5.7 MiB {'slug': slug, 'university_obj': university_obj, 'master_user_types': master_user_types})
Suppose,i have 89.0 Mib right now for this function but when i refresh it the size will increase.I am running the django project on localhost
I have a load a very large datafile which is bigger than my RAM. I try to do that both with Pickle and HDF5 but the data are loaded in memory.
Is there a way to access data without load them on memory but accessing to them directly on the Disk ?
from memory_profiler import profile
import numpy as np
import pandas as pd
import cPickle
import gc
import time
basepath = '/Users/toto/Desktop/'
#profile
def test_write():
dim = 10000000
df = pd.DataFrame({'test':range(dim)}, index=range(dim))
for i in range(30):
df[str(i)]=df['test'] * np.random.normal(0,1)
print 'df created'
cPickle.dump(df, open(basepath + 'df_pickle', 'wb'))
gc.collect()
store = pd.HDFStore(basepath + 'df_HDFpd')
store['df'] = df
store.close()
gc.collect()
del df
gc.collect()
#profile
def test_read(method):
print method
if method == 'pickle':
df = cPickle.load(open(basepath + 'df_pickle', 'rb'))
if method == 'HDF':
store = pd.HDFStore(basepath + 'df_HDFpd')
df = store['df']
print df.head(5)
try:
store.close()
except:
pass
#test_write()
timer = time.time()
test_read('HDF')
print 'Execution time: 'time.time()-timer
Result for test_write():
Line # Mem usage Increment Line Contents
================================================
12 42.5 MiB 0.0 MiB #profile
13 def test_write():
14 42.5 MiB 0.0 MiB dim = 10000000
15 969.4 MiB 926.8 MiB df = pd.DataFrame({'test':range(dim)}, index=range(dim))
16 3029.7 MiB 2060.3 MiB for i in range(30):
17 3029.7 MiB 0.0 MiB df[str(i)]=df['test'] * np.random.normal(0,1)
18
19 3029.7 MiB 0.0 MiB print 'df created'
20 3029.7 MiB 0.1 MiB cPickle.dump(df, open(basepath + 'df_pickle', 'wb'))
21 2616.7 MiB -413.0 MiB gc.collect()
22 2619.7 MiB 3.0 MiB store = pd.HDFStore(basepath + 'df_HDFpd')
23 2695.3 MiB 75.5 MiB store['df'] = df
24 2695.4 MiB 0.1 MiB store.close()
25 2696.1 MiB 0.7 MiB gc.collect()
26 1319.8 MiB -1376.3 MiB del df
27 1319.8 MiB 0.0 MiB gc.collect()
Result for test_load('HDF'):
Line # Mem usage Increment Line Contents
================================================
29 42.5 MiB 0.0 MiB
30 #profile
31 42.5 MiB 0.0 MiB def test_read(method):
32 42.5 MiB 0.0 MiB print method
33 if method == 'pickle':
34 42.5 MiB 0.0 MiB df = cPickle.load(open(basepath + 'df_pickle', 'rb'))
35 46.7 MiB 4.2 MiB if method == 'HDF':
36 2488.7 MiB 2442.0 MiB store = pd.HDFStore(basepath + 'df_HDFpd')
37 2489.2 MiB 0.5 MiB df = store['df']
38 print df.head(5)
39 2489.2 MiB 0.0 MiB
40 2489.2 MiB 0.0 MiB try:
41 store.close()
42 except:
43 pass
Result for test_load('cPickle'):
to come in few minutes
If you use h5py, when you index into an H5File it gives you something which is not a NumPy array, but is convertible to one. So you should slice that, or operate on it directly in some way, which can avoid reading the entire thing into memory at once.
I haven't used HDFs yet, but it looks like you can read an HDF in incrementally with pandas.read_hdf(), either using the start/stop arguments or by getting it to return an iterator.