Use with statement to close GDAL datasets - python

Consider this basic example for the use of a with statement from Jeff Knupp's blog:
class File():
def __init__(self, filename, mode):
self.filename = filename
self.mode = mode
def __enter__(self):
self.open_file = open(self.filename, self.mode)
return self.open_file
def __exit__(self, *args):
self.open_file.close()
I have a testfile which contains two lines, 'ABC' and 'DEF'. Everything works as expected:
with File('/tmp/testfile','r') as f:
txt = [x.strip() for x in f.readlines()]
print(txt)
# ['ABC', 'DEF']
Calling a class method outside the with block gives me the expected error:
f.readlines()
ValueError Traceback (most recent call last)
in ()
----> 1 f.readlines()
ValueError: I/O operation on closed file.
Now to my question:
How can I achieve the same behavior with a gdal object instead of a file?
I have a class method, which should read data from disk and put it into a gdal raster for further processing. Once this is done, I would like to close the generated gdal raster appropriately. Normally this is done with setting it to None and with using gdal.Unlink.
However, when I put everything into a context structure as in the previous example, I can still interact with the dataset outside of the with block.
Here's a reproducible example:
class Raster:
'''Raster class with sidelength s'''
def __init__(self,s):
self.sidelength = s
def __enter__(self):
# create raster in memory
driver = gdal.GetDriverByName('GTiff')
self.raster = driver.Create('/vsimem/inmem.tif', self.sidelength, self.sidelength, 1, gdal.GDT_Float32)
self.raster.GetRasterBand(1).WriteArray(np.random.rand(self.sidelength,self.sidelength))
return self.raster
def __exit__(self, *args):
# close file and unlink
self.raster = None
gdal.Unlink('/vsimem/inmem.tif')
The with block works as expected:
with Raster(5) as r:
print(r)
# <osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x7f8078862ae0> >
But after the block the object is still there and I can still read the values:
print(r)
#<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x7f8078044a20> >
print(r.ReadAsArray())
#[[0.2549882 0.80292517 0.23358545 0.6284887 0.7294142 ]
# [0.9310723 0.21535267 0.9054575 0.60967094 0.9937953 ]
# [0.69144976 0.01727938 0.16800325 0.61249655 0.1785022 ]
# [0.16179436 0.43245795 0.7042811 0.4809799 0.85534436]
# [0.67751276 0.7560658 0.9594516 0.6294476 0.3539126 ]]

You cannot really close a gdal dataset while keeping a reference to it. You can keep a reference to the Raster instance instead and use r.raster to access the dataset inside the with block.
class Raster:
'''Raster class with sidelength s'''
def __init__(self,s):
self.sidelength = s
def __enter__(self):
# create raster in memory
driver = gdal.GetDriverByName('GTiff')
self.raster = driver.Create('/vsimem/inmem.tif', self.sidelength, self.sidelength, 1, gdal.GDT_Float32)
self.raster.GetRasterBand(1).WriteArray(np.random.rand(self.sidelength,self.sidelength))
return self
def __exit__(self, *args):
# close file and unlink
self.raster = None
gdal.Unlink('/vsimem/inmem.tif')
with Raster(5) as r:
print(r.raster)
Output:
<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x04564728> >
Outside the with block the dataset is unreachable:
r.raster is None
Output:
True
There will be problems again if you bind another variable to r.raster so you might want to completely encapsulate it inside the Raster instance, along with any functionality you need. At this point you would be more or less reinventing Rasterio but if your needs are simple that might be better than depending on it.

As suggested in the comment, Rasterio via rasterio.open implements the behaviour you are looking for.

Related

Pydrake: Creating a Trajectory Source for RigidTransformations

I've been trying to create a TrajectorySource for RigidTransforms to pass into a DifferentialInverseKinematicsIntegrator which only takes in RigidTransforms in its input port.
def createTraj(time, pose):
times = []
poses = []
for step in time:
times.append(time[step])
poses.append(pose[step])
return PiecewisePose.MakeLinear(times, poses)
Initially, I tried to directly pass in the output from createTraj above into TrajectorySource but ran into the issue of my trajectory having more than one columns: Failure at systems/primitives/trajectory_source.cc:21 in TrajectorySource(): condition 'trajectory.cols() == 1' failed.
import matplotlib.pyplot as plt, mpld3
class DexterTest():
# Output from createTraj is passed as parameter: traj into constructor
def __init__(self, traj):
builder = DiagramBuilder()
self.station = DexterPPStation(1e-4, "/opt/drake/share/drake/manipulation/models/final_dexter_description/urdf/dexter.urdf")
self.station.CreateBins("/opt/drake/share/drake/examples/manipulation_station/models/bin.sdf", RigidTransform(np.array([0.5,0,0])), RigidTransform(np.array([0,0.5,0])))
self.station.CreateRandomPickingObjects(3)
self.station.AddDexter()
builder.AddSystem(self.station)
self.station.Finalize()
self.diff_ik = DifferentialInverseKinematicsIntegrator(self.station.controller_plant, self.station.plant.GetFrameByName("link6", self.station.dexter["instance"]), self.station.time_step, DifferentialInverseKinematicsParameters(7,7))
builder.AddSystem(self.diff_ik)
#=========================================== Likely Source of Error ===========================================
pose = builder.AddSystem(PoseSystem())
p_G_source = builder.AddSystem(TrajectorySource(traj.get_position_trajectory()))
w_G_source = builder.AddSystem(TrajectorySource(traj.get_orientation_trajectory()))
builder.Connect(p_G_source.get_output_port(), pose.GetInputPort("p_G"))
builder.Connect(w_G_source.get_output_port(), pose.GetInputPort("r_G"))
builder.Connect(pose.get_output_port(), self.diff_ik.get_input_port())
#======================================================================================
MeshcatVisualizerCpp.AddToBuilder(builder, self.station.GetOutputPort("query_object"), meshcat)
self.diagram = builder.Build()
self.simulator = Simulator(self.diagram)
self.diagram_context = self.simulator.get_mutable_context()
self.station_context = self.station.GetMyMutableContextFromRoot(self.diagram_context)
self.plant_context = self.station.GetSubsystemContext(self.station.plant, self.station_context)
self.station.SetRandomPoses(self.plant_context)
builder.Connect(self.diff_ik.get_output_port(), self.station.GetInputPort("dexter_position"))
def run(self):
self.simulator.set_target_realtime_rate(2.0)
self.simulator.AdvanceTo(1)
class PoseSystem(LeafSystem):
def __init__(self):
LeafSystem.__init__(self)
self.p_G = self.DeclareVectorInputPort("p_G", BasicVector(3))
self.r_G = self.DeclareVectorInputPort("r_G", BasicVector(4))
self.DeclareAbstractOutputPort("X_G", Value[RigidTransform], self.CalcOutput)
def CalcOutput(self, context, output):
pose = RigidTransform(Quaternion(self.r_G.Eval(context)), self.p_G.Eval(context))
output.set_value(pose)
Instead, I tried to break up my trajectory into its orientation and position parts, add them to the input ports of a custom system, and then reconstruct them together in the output port. However, this gives me the following RuntimeError once the run method is called: RuntimeError: This multibody element does not belong to the supplied MultibodyTree.
Any help would be greatly appreciated!
I think you are very close. The PoseSystem looks like it should be a solution to the problem you've articulated in your post. (The error about MultibodyTree must be coming from the other part of your code.
You don't actually need to break the RigidTransform up into orientation / translation to create your PoseSystem, your CalcOutput could just call output.set_value(poses.Eval(t)) if poses is a PiecewisePose trajectory.
I have an example of doing this in the PickAndPlaceTrajectory class in this notebook: https://github.com/RussTedrake/manipulation/blob/008cec6343dd39063705287e6664a3fee71a43b8/pose.ipynb

Issues publishing to device shadow using the aws-iot-device-sdk-python-v2

In a python application that uses the aws iot device sdk for python v2 (v1.7.1) I am running into an issue where I cannot update the device shadow.
After starting the program, the DeviceShadowManager will attempt to get the latest shadow state and set it locally.
If a delta state is present the DeviceShadowManager will merge the last reported state and delta state and publish it.
That works. However, when the manager subscribes for updates, after the initial setup, I am running into an error,
where when the desired state changes, the manager cannot update the reported state. Here is the error:
Exception ignored in: <class 'TypeError'>
Traceback (most recent call last):
File "/Users/tom/.../lib/python3.9/site-packages/awscrt/mqtt.py", line 506, in callback_wrapper
callback(topic=topic, payload=payload)
TypeError: callback_wrapper() missing 3 required positional arguments: 'dup', 'qos', and 'retain'
I looked at the source, but just do not understand why a TypeError is raised,
especially because this exact scenario seems to be handled by the try and except block or am I getting it all wrong?
The source of the error:
if callback:
def callback_wrapper(topic, payload, dup, qos, retain):
try:
callback(topic=topic, payload=payload, dup=dup, qos=QoS(qos), retain=retain)
except TypeError:
# This callback used to have fewer args.
# Try again, passing only those those args, to cover case where
# user function failed to take forward-compatibility **kwargs.
callback(topic=topic, payload=payload) # this is line 506
Below you can find my code and the log of the program.
This dataclass represents the shadow:
from dataclasses import dataclass
#dataclass
class DeviceShadow:
score_threshold: float = 0.6
minimum_distance: int = 150
The shadow is managed by the DeviceShadowManager. Most of this is based on the shadow sample from the aforementioned repository.
from dataclasses import asdict
from queue import Queue
from threading import Lock
from awscrt import mqtt
from awsiot import iotshadow
from awsiot.iotshadow import IotShadowClient
from app.device_shadow.device_shadow import DeviceShadow, from_json as device_shadow_from_json
from app.models import log
SHADOW_VALUE_DEFAULT = DeviceShadow()
class DeviceShadowManager:
_shadow_client: IotShadowClient
shadow_value: DeviceShadow = DeviceShadow()
_lock = Lock()
_thing_name: str
def __init__(self, thing_name: str, mqtt_connection: mqtt.Connection):
self._thing_name = thing_name
self._shadow_client = iotshadow.IotShadowClient(mqtt_connection)
update_accepted_subscribed_future, _ = self._shadow_client.subscribe_to_update_shadow_accepted(
request=iotshadow.UpdateShadowSubscriptionRequest(thing_name=self._thing_name),
qos=mqtt.QoS.AT_LEAST_ONCE,
callback=self.on_update_shadow_accepted # omitted
)
update_rejected_subscribed_future, _ = self._shadow_client.subscribe_to_update_shadow_rejected(
request=iotshadow.UpdateShadowSubscriptionRequest(thing_name=self._thing_name),
qos=mqtt.QoS.AT_LEAST_ONCE,
callback=self.on_update_shadow_rejected # omitted
)
# Wait for subscriptions to succeed
update_accepted_subscribed_future.result(60)
update_rejected_subscribed_future.result(60)
log.info("Subscribing to Get responses...")
get_accepted_subscribed_future, _ = self._shadow_client.subscribe_to_get_shadow_accepted(
request=iotshadow.GetShadowSubscriptionRequest(thing_name=self._thing_name),
qos=mqtt.QoS.AT_LEAST_ONCE,
callback=self.on_get_shadow_accepted)
get_rejected_subscribed_future, _ = self._shadow_client.subscribe_to_get_shadow_rejected(
request=iotshadow.GetShadowSubscriptionRequest(thing_name=self._thing_name),
qos=mqtt.QoS.AT_LEAST_ONCE,
callback=self.on_get_shadow_rejected) # omitted
# Wait for subscriptions to succeed
get_accepted_subscribed_future.result()
get_rejected_subscribed_future.result()
log.info("Subscribing to Delta events...")
delta_subscribed_future, _ = self._shadow_client.subscribe_to_shadow_delta_updated_events(
request=iotshadow.ShadowDeltaUpdatedSubscriptionRequest(
thing_name=self._thing_name
),
qos=mqtt.QoS.AT_LEAST_ONCE,
callback=self.on_shadow_delta_updated)
# Wait for subscription to succeed
delta_subscribed_future.result()
# From here on out the rest runs asynchronously.
# Issue request for shadow's current value.
# The response will be received by the on_get_accepted() callback
with self._lock:
publish_get_future = self._shadow_client.publish_get_shadow(
request=iotshadow.GetShadowRequest(
thing_name=self._thing_name,
),
qos=mqtt.QoS.AT_LEAST_ONCE
)
# Ensure that publish succeeds
publish_get_future.result()
def on_get_shadow_accepted(self, response: iotshadow.GetShadowResponse) -> None:
log.info("Finished getting initial shadow value.")
if response.state and response.state.delta:
if not response.state.reported:
response.state.reported = {}
merged_state = self.merge_states(response.state.delta, response.state.desired)
return self.set_desired(device_shadow_from_json(merged_state))
if response.state and response.state.reported:
return self.set_local(device_shadow_from_json(response.state.reported))
self.set_desired(SHADOW_VALUE_DEFAULT)
return
def on_shadow_delta_updated(self, delta: iotshadow.ShadowDeltaUpdatedEvent) -> None:
if delta.state:
if delta.state is None:
log.info("Delta reports that nothing is set. Setting defaults...")
self.set_desired(SHADOW_VALUE_DEFAULT)
return
log.info("Delta reports that desired shadow is '{}'. Changing local shadow...".format(delta.state))
self.set_desired(self.merge_states(delta.state, self.shadow_value))
else:
log.info("Delta did not report a change")
#staticmethod
def merge_states(delta: dict, reported: DeviceShadow):
for key, value in delta.items():
reported[key] = value
return reported
def set_local(self, value: DeviceShadow) -> None:
with self._lock:
self.shadow_value = value
def set_desired(self, new_value: DeviceShadow) -> None:
with self._lock:
if self.shadow_value == new_value:
log.debug("Local shadow is already '{}'.".format(new_value))
return
log.debug("Changing local shadow to '{}'.".format(new_value))
self.shadow_value = new_value
log.debug("Updating reported shadow to '{}'...".format(new_value))
request = iotshadow.UpdateShadowRequest(
thing_name=self._thing_name,
state=iotshadow.ShadowState(
desired=asdict(new_value),
reported=asdict(new_value),
),
)
self._shadow_client.publish_update_shadow(request, mqtt.QoS.AT_LEAST_ONCE)
Below you will find the log:
DEBUG:app.mqtt:Connecting to xxxxxxxxxxxxxx-ats.iot.eu-central-1.amazonaws.com with client ID '80d8bc54-971e-0e65-a537-37d14a3cb630'...
INFO:app.models:Subscribing to Get responses...
INFO:app.models:Subscribing to Delta events...
INFO:app.models:Finished getting initial shadow value.
DEBUG:app.models:Changed local shadow to 'DeviceShadow(score_threshold=0.7, minimum_distance=1503)'.
DEBUG:app.models:Updating reported shadow to 'DeviceShadow(score_threshold=0.7, minimum_distance=1503)'...
INFO:app.models:Update request published.
DEBUG:app.models:Finished updating reported shadow to '{'score_threshold': 0.7, 'minimum_distance': 1503}'.
INFO:app.models:Delta reports that desired shadow is '{'minimum_distance': 15035}'. Changing local shadow...
Exception ignored in: <class 'TypeError'>
Traceback (most recent call last):
File "/Users/tom/.../lib/python3.9/site-packages/awscrt/mqtt.py", line 506, in callback_wrapper
callback(topic=topic, payload=payload)
TypeError: callback_wrapper() missing 3 required positional arguments: 'dup', 'qos', and 'retain'
DEBUG:app.models:Finished updating reported shadow to '{'score_threshold': 0.7, 'minimum_distance': 1503}'.
As you can see the stacktrace is pretty short, is there a way to debug this better?
Any ideas to why it is giving me this particular error and maybe how to solve it?
All help is appreciated!
I am pretty sure the problem lies within
#staticmethod
def merge_states(delta: dict, reported: DeviceShadow):
for key, value in delta.items():
reported[key] = value
return reported
where the __setitem__ call on the reported argument raises a TypeError because the reported argument is a DeviceShadow dataclass object that doesn't support item assignment.
If you want to set fields of a dataclass where you have a string of the field name, you can use setattr(reported, key, value).

add chartobject to excel via Python

so I have been trying to add a chart object to an Excel file using IronPython and I keep getting an error whenever I call ws.ChartObjects. For some reason it tells me that its a DispCallable and that it has no Add property.
clr.AddReferenceByName('Microsoft.Office.Interop.Excel, Version=11.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c')
from Microsoft.Office.Interop import Excel
System.Threading.Thread.CurrentThread.CurrentCulture = System.Globalization.CultureInfo("en-US")
from System.Runtime.InteropServices import Marshal
def SetUp(xlApp):
# supress updates and warning pop ups
xlApp.Visible = False
xlApp.DisplayAlerts = False
xlApp.ScreenUpdating = False
return xlApp
def ExitExcel(filePath, xlApp, wb, ws):
# clean up before exiting excel, if any COM object remains
# unreleased then excel crashes on open following time
def CleanUp(_list):
if isinstance(_list, list):
for i in _list:
Marshal.ReleaseComObject(i)
else:
Marshal.ReleaseComObject(_list)
return None
wb.SaveAs(str(filePath))
xlApp.ActiveWorkbook.Close(False)
xlApp.ScreenUpdating = True
CleanUp([ws,wb,xlApp])
return None
def GetWidthHeight(origin, extent, ws):
left = ws.Cells(bb.xlRange(cellRange)[1], bb.xlRange(cellRange)[0]).Left
top = ws.Cells(bb.xlRange(cellRange)[1], bb.xlRange(cellRange)[0]).Top
width = ws.Range[origin, extent].Width
height = ws.Range[origin, extent].Height
return [left, top, width, height]
if runMe:
message = None
try:
xlApp = SetUp(Excel.ApplicationClass())
errorReport = None
xlApp.Workbooks.open(str(filePath))
wb = xlApp.ActiveWorkbook
ws = xlApp.Sheets(sheetName)
# i have no clue why ws.ChartObjects.Count throws an error all the time
origin = ws.Cells(bb.xlRange(cellRange)[1], bb.xlRange(cellRange)[0])
extent = ws.Cells(bb.xlRange(cellRange)[3], bb.xlRange(cellRange)[2])
left = GetWidthHeight(origin, extent, ws)[0]
top = GetWidthHeight(origin, extent, ws)[1]
width = GetWidthHeight(origin, extent, ws)[2]
height = GetWidthHeight(origin, extent, ws)[3]
xlChartObject = ws.ChartObjects.Add(int(left), int(top), int(width), int(height))
Marshal.ReleaseComObject(extent)
Marshal.ReleaseComObject(origin)
ExitExcel(filePath, xlApp, wb, ws)
except:
# if error accurs anywhere in the process catch it
import traceback
errorReport = traceback.format_exc()
My problem is with calling ws.ChartObjects.Add() which throws an exception 'DispCallable' object has no attribute 'Add'. How do i go around this? What is wrong?
Based on a similar issue indicating that ChartObjects is a function you should use
ChartObjects().Add(...)
As per official documentation, the arguments should be double. If this is not the issue, you can split
xlChartObject = ws.ChartObjects.Add(...
into
xlChartObjects = ws.ChartObjects
xlChartObject = xlChartObjects.Add(...
to start debugging. It is a good idea to:
Check available methods (for the class of xlChartObjects, e.g.) with How do I get list of methods in a Python class?, or Finding what methods an object has.
Check type with What's the canonical way to check for type in python?.
You will likely learn how to fix these lines.
PS: In the code you posted sheetName and bb are not defined, although you probably define them earlier.

Create UV Set with openmaya API 2.0

Im writing a custom reader node for maya (in python with openmaya API 2.0) and I would like to send my uv sets to a regular maya mesh node.
Im wondering what would be the best way to push the uv sets in a mesh node? I wasn't able to find what data I have to create and how to send them to the mesh node.
My reader is a OpenMaya.MPxNode who push custom data to a OpenMaya.MPxSurfaceShape. The shape is linked by out mesh / in mesh plugs to a regular maya mesh. I attempt to fill the uvSet plug of this shape using compute but without success. I expect the UVSets to be sent to the mesh.
The following code sample is a limited test where my only goal is to create a new UVSet and attach it to the mesh.
Any ideas or documentation which could help?
I tried several things but I always get the error below.
The error:
// Error: (kFailure): Object does not exist
# Traceback (most recent call last):
# File "/path/to/maya/plug-ins/test_create_uv_set.py", line 60, in compute
# mesh.createUVSet("toto")
# RuntimeError: (kFailure): Object does not exist //
The running code:
"""
usage
import maya.cmds as cmds
import pymel.core as pm
cmds.loadPlugin("/path/to/maya/plug-ins/test_create_uv_set.py")
transform_node = pm.polySphere(n='transform1', ch=1, o=1, r=4)[0]
mesh1_node = transform_node.getShape()
pm.setAttr(mesh1_node + ".visibility", False)
uv_set_mod_node = pm.createNode("uvSetModifier", name="uvsetmodifier1")
mesh2_node = pm.createNode("mesh", name="mesh2", parent=transform_node)
pm.hyperShade(assign="initialShadingGroup")
pm.Attribute.connect(mesh1_node.attr("outMesh"), uv_set_mod_node.attr("inMesh"))
pm.Attribute.connect(uv_set_mod_node.attr("outMesh"), mesh2_node.attr("inMesh"))
"""
import sys
import maya.api.OpenMaya as OpenMaya
def maya_useNewAPI():
pass
class uvSetModifier(OpenMaya.MPxNode):
typeName = "uvSetModifier"
id = OpenMaya.MTypeId(0xCCCCC)
inMesh = None
outMesh = None
#staticmethod
def creator():
return uvSetModifier()
#staticmethod
def initialize():
typedAttr = OpenMaya.MFnTypedAttribute()
uvSetModifier.inMesh = typedAttr.create("inMesh", "im", OpenMaya.MFnData.kMesh)
typedAttr.writable = True
OpenMaya.MPxNode.addAttribute(uvSetModifier.inMesh)
uvSetModifier.outMesh = typedAttr.create("outMesh", "om", OpenMaya.MFnData.kMesh)
typedAttr.writable = True
OpenMaya.MPxNode.addAttribute(uvSetModifier.outMesh)
def __init__(self):
OpenMaya.MPxNode.__init__(self)
def compute(self, plug, datablock):
if plug == uvSetModifier.outMesh:
inputData = datablock.inputValue(uvSetModifier.inMesh)
outputData = datablock.outputValue(uvSetModifier.outMesh)
outputData.setMObject(inputData.asMesh())
mesh = OpenMaya.MFnMesh(inputData.asMesh())
mesh.createUVSet("toto")
datablock.setClean(plug)
def initializePlugin(obj):
plugin = OpenMaya.MFnPlugin(obj, "Autodesk", "3.0", "Any")
try:
plugin.registerNode(uvSetModifier.typeName, uvSetModifier.id, uvSetModifier.creator, uvSetModifier.initialize)
except:
sys.stderr.write("Failed to register node\n")
raise
def uninitializePlugin(obj):
plugin = OpenMaya.MFnPlugin(obj)
try:
plugin.deregisterNode(uvSetModifier.id)
except:
sys.stderr.write("Failed to deregister node\n")
pass
UPDATE 1: THEODOX input
I added the following line (59)
if inputData.asMesh() is not None:
print "test"
mesh = OpenMaya.MFnMesh(inputData.asMesh())
mesh.createUVSet("toto")
results: I still get the same error message

memory overflow when using numpy load in a loop

Looping over npz files load causes memory overflow (depending on the file
list length).
None of the following seems to help
Deleting the variable which stores the data in the file.
Using mmap.
calling gc.collect() (garbage collection).
The following code should reproduce the phenomenon:
import numpy as np
# generate a file for the demo
X = np.random.randn(1000,1000)
np.savez('tmp.npz',X=X)
# here come the overflow:
for i in xrange(1000000):
data = np.load('tmp.npz')
data.close() # avoid the "too many files are open" error
in my real application the loop is over a list of files and the overflow exceeds 24GB of RAM!
please note that this was tried on ubuntu 11.10, and for both numpy v
1.5.1 as well as 1.6.0
I have filed a report in numpy ticket 2048 but this may be of a wider interest and so I am posting it here as well (moreover, I am not sure that this is a bug but may result of my bad programming).
SOLUTION (by HYRY):
the command
del data.f
should precede the command
data.close()
for more information and a method to find the solution, please read HYRY's kind answer below
I think this is a bug, and maybe I found the solution: call "del data.f".
for i in xrange(10000000):
data = np.load('tmp.npz')
del data.f
data.close() # avoid the "too many files are open" error
to found this kind of memory leak. you can use the following code:
import numpy as np
import gc
# here come the overflow:
for i in xrange(10000):
data = np.load('tmp.npz')
data.close() # avoid the "too many files are open" error
d = dict()
for o in gc.get_objects():
name = type(o).__name__
if name not in d:
d[name] = 1
else:
d[name] += 1
items = d.items()
items.sort(key=lambda x:x[1])
for key, value in items:
print key, value
After the test program, I created a dict and count objects in gc.get_objects(). Here is the output:
...
wrapper_descriptor 1382
function 2330
tuple 9117
BagObj 10000
NpzFile 10000
list 20288
dict 21001
From the result we know that there are something wrong with BagObj and NpzFile. Find the code:
class NpzFile(object):
def __init__(self, fid, own_fid=False):
...
self.zip = _zip
self.f = BagObj(self)
if own_fid:
self.fid = fid
else:
self.fid = None
def close(self):
"""
Close the file.
"""
if self.zip is not None:
self.zip.close()
self.zip = None
if self.fid is not None:
self.fid.close()
self.fid = None
def __del__(self):
self.close()
class BagObj(object):
def __init__(self, obj):
self._obj = obj
def __getattribute__(self, key):
try:
return object.__getattribute__(self, '_obj')[key]
except KeyError:
raise AttributeError, key
NpzFile has del(), NpzFile.f is a BagObj, and BagObj._obj is NpzFile, this is a reference cycle and will cause both NpzFile and BagObj uncollectable. Here is some explanation in Python document: http://docs.python.org/library/gc.html#gc.garbage
So, to break the reference cycle, will need to call "del data.f"
What I found as the solution: (python==3.8 and numpy==1.18.5)
import gc # import garbage collector interface
for i in range(1000):
data = np.load('tmp.npy')
# process data
del data
gc.collect()

Categories