pydicom 'Dataset' object has no attribute 'TransferSyntaxUID' - python

I'm using pydicom 1.0.0a1, downloaded from here, When I run the following code:
ds=pydicom.read_file('./DR/abnormal/abc.dcm',force=True)
ds.pixel_array
this error occurs:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-6-d4e81d303439> in <module>()
7 ds=pydicom.read_file('./DR/abnormal/abc.dcm',force=True)
8
----> 9 ds.pixel_array
10
/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/dataset.pyc in __getattr__(self, name)
501 if tag is None: # `name` isn't a DICOM element keyword
502 # Try the base class attribute getter (fix for issue 332)
--> 503 return super(Dataset, self).__getattribute__(name)
504 tag = Tag(tag)
505 if tag not in self: # DICOM DataElement not in the Dataset
/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/dataset.pyc in pixel_array(self)
1064 The Pixel Data (7FE0,0010) as a NumPy ndarray.
1065 """
-> 1066 return self._get_pixel_array()
1067
1068 # Format strings spec'd according to python string formatting options
/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/dataset.pyc in _get_pixel_array(self)
1042 elif self._pixel_id != id(self.PixelData):
1043 already_have = False
-> 1044 if not already_have and not self._is_uncompressed_transfer_syntax():
1045 try:
1046 # print("Pixel Data is compressed")
/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/dataset.pyc in _is_uncompressed_transfer_syntax(self)
662 """Return True if the TransferSyntaxUID is a compressed syntax."""
663 # FIXME uses file_meta here, should really only be thus for FileDataset
--> 664 return self.file_meta.TransferSyntaxUID in NotCompressedPixelTransferSyntaxes
665
666 def __ne__(self, other):
/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/dataset.pyc in __getattr__(self, name)
505 if tag not in self: # DICOM DataElement not in the Dataset
506 # Try the base class attribute getter (fix for issue 332)
--> 507 return super(Dataset, self).__getattribute__(name)
508 else:
509 return self[tag].value
AttributeError: 'Dataset' object has no attribute 'TransferSyntaxUID'
I read the google group post , and I changed the filereader.py file to the posted file, and I got this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/__init__.py", line 41, in read_file
from pydicom.dicomio import read_file
File "/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/dicomio.py", line 3, in <module>
from pydicom.filereader import read_file, read_dicomdir
File "/Applications/anaconda/lib/python2.7/site-packages/pydicom-1.0.0a1-py2.7.egg/pydicom/filereader.py", line 35, in <module>
from pydicom.datadict import dictionaryVR
ImportError: cannot import name dictionaryVR
Does anybody know how to solve this problem?

You should set the TransferSyntaxUID after reading the file before trying to get the pixel_array.
import pydicom.uid
ds=pydicom.read_file('./DR/abnormal/abc.dcm',force=True)
ds.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian # or whatever is the correct transfer syntax for the file
ds.pixel_array
The correction from the post you referenced was done before some changes in the code to harmonize some naming, so the error is thrown because the current master uses dictionary_VR rather than dictionaryVR. Setting the transfer syntax in user code as above avoids that problem.

Related

Python lzma unable to load joblib

I have a scikit learn pipeline that I serialize using:
with lzma.open('outputs/baseModel_LR.joblib',"wb") as f:
dill.dump(pipeline, f)
When I try to open the file and load the pipeline using:
with lzma.open('outputs/baseModel_LR.joblib',"rb") as f:
model = dill.load(f)
it gives error:
---------------------------------------------------------------------------
EOFError Traceback (most recent call last)
somePath/notebooks/test.ipynb Cell 5 in <cell line: 1>()
1 with lzma.open('outputs/baseModel_LR.joblib',"rb") as f:
----> 2 model = dill.load(f)
3 model
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dill/_dill.py:373, in load(file, ignore, **kwds)
367 def load(file, ignore=None, **kwds):
368 """
369 Unpickle an object from a file.
370
371 See :func:`loads` for keyword arguments.
372 """
--> 373 return Unpickler(file, ignore=ignore, **kwds).load()
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dill/_dill.py:646, in Unpickler.load(self)
645 def load(self): #NOTE: if settings change, need to update attributes
--> 646 obj = StockUnpickler.load(self)
647 if type(obj).__module__ == getattr(_main_module, '__name__', '__main__'):
648 if not self._ignore:
649 # point obj class to main
File /anaconda/envs/azureml_py38/lib/python3.8/lzma.py:200, in LZMAFile.read(self, size)
194 """Read up to size uncompressed bytes from the file.
...
100 "end-of-stream marker was reached")
101 else:
102 rawblock = b""
**EOFError: Compressed file ended before the end-of-stream marker was reached**
Has anyone faced this problem and solved it? I use lzma because otherwise the joblib size is 27GB and with lzma its just 20MB

Problem loading xlsx file with pandas in python

I imported a new pandas version and now cannot import xlsx files as I used to. I have looked at similar issues and most seem to work when adding engine=""openpyxl", however in my case when I run the following code:
df = pd.read_excel("IPO_10.xlsx", engine="openpyxl")
df.head()
I get the following error:
AttributeError Traceback (most recent call last) ~/opt/anaconda3/lib/python3.8/site packages/IPython/core/formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in _repr_html_(self)
732 GH3541, GH3573
733 """
--> 734 width, height = console.get_console_size()
735 max_columns = get_option("display.max_columns")
736 nb_columns = len(self.columns)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/formats/format.py in to_html(self, buf, encoding, classes, notebook, border)
980 Buffer to write to. If None, the output is returned as a string.
981 encoding : str, default “utf-8”
--> 982 Set character encoding.
983 classes : str or list-like
984 classes to include in the `class` attribute of the opening
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/formats/html.py in __init__(self, formatter, classes, border, table_id, render_links)
54 self.col_space = {
55 column: f"{value}px" if isinstance(value, int) else value
---> 56 for column, value in self.fmt.col_space.items()
57 }
58
AttributeError: 'NoneType' object has no attribute items
and my data frame is then printed as output. How can I solve this? Thank you.

How to run model trained on GPU on CPU in spaCy

I'm using spaCy 2.0.18. I have trained a model using GPU but now I want to load this model for the predictions and run on CPU only.
I am able to load the model into memory but once I try to use it I get the following error:
import spacy
nlp = spacy.load("path_to_my_model")
# works fine up to this moment
result = nlp("Test") # throws the exception below:
Exception ignored in: <bound method Stream.__del__ of <cupy.cuda.stream.Stream object at 0x7fd288621be0>>
Traceback (most recent call last):
File "cupy/cuda/stream.pyx", line 161, in cupy.cuda.stream.Stream.__del__
AttributeError: 'Stream' object has no attribute 'ptr'
---------------------------------------------------------------------------
CUDARuntimeError Traceback (most recent call last)
<ipython-input-4-306c96b208c5> in <module>
----> 1 nlp("Yolo")
/opt/anaconda3/lib/python3.7/site-packages/spacy/language.py in __call__(self, text, disable)
344 if not hasattr(proc, '__call__'):
345 raise ValueError(Errors.E003.format(component=type(proc), name=name))
--> 346 doc = proc(doc)
347 if doc is None:
348 raise ValueError(Errors.E005.format(name=name))
nn_parser.pyx in spacy.syntax.nn_parser.Parser.__call__()
nn_parser.pyx in spacy.syntax.nn_parser.Parser.parse_batch()
/opt/anaconda3/lib/python3.7/site-packages/spacy/util.py in get_cuda_stream(require)
236
237 def get_cuda_stream(require=False):
--> 238 return CudaStream() if CudaStream is not None else None
239
240
cupy/cuda/stream.pyx in cupy.cuda.stream.Stream.__init__()
cupy/cuda/runtime.pyx in cupy.cuda.runtime.streamCreate()
cupy/cuda/runtime.pyx in cupy.cuda.runtime.streamCreate()
cupy/cuda/runtime.pyx in cupy.cuda.runtime.check_status()
CUDARuntimeError: cudaErrorNoDevice: no CUDA-capable device is detected
How to force spaCy to use CPU instead of GPU?

python : sqlalchemy batch insert with on_conflict_update

I have to insert approx. 30000 rows daily in my postgres database,
I have 4 columns in my database namely :
id(pkey), category, createddate, updatedon.
My requirement is to update updatedon and category column with today's date and new category if id is present, else insert a new row with createddate and updateon being same.
I found Ilja Everilä's [answer]:https://stackoverflow.com/a/44865375/5665430 for batch update
insert_statement = sqlalchemy.dialects.postgresql.insert(id_tag)
upsert_statement = insert_statement.on_conflict_do_update(
constraint='id',
set_={ "createddate": insert_statement.excluded.createddate }
)
insert_values = df.to_dict(orient='records')
conn.execute(upsert_statement, insert_values)
Its throwing AttributeError,
Traceback (most recent call last):
File "<ipython-input-60-4c5e5e0daf14>", line 5, in <module>
set_= dict(createddate = insert_statement.excluded.createddate)
File "/home/bluepi/anaconda2/lib/python2.7/site-packages/sqlalchemy/util/langhelpers.py", line 764, in __get__
obj.__dict__[self.__name__] = result = self.fget(obj)
File "/home/bluepi/anaconda2/lib/python2.7/site-packages/sqlalchemy/dialects/postgresql/dml.py", line 43, in excluded
return alias(self.table, name='excluded').columns
File "/home/bluepi/anaconda2/lib/python2.7/site-packages/sqlalchemy/sql/selectable.py", line 161, in alias
return _interpret_as_from(selectable).alias(name=name, flat=flat)
AttributeError: 'TextClause' object has no attribute 'alias'
I have tried one by one update as shown here http://docs.sqlalchemy.org/en/latest/dialects/postgresql.html#postgresql-insert-on-conflict , but I am getting the same error.
Please help me understand where I am going wrong, thanks in advance.
From your comment
id_tag is nothing but mane of my table in postgres
one could deduce that id_tag is bound to a string. If you'd provided a Minimal, Complete, and Verifiable example, there'd been a lot less guesswork. As it turns out, postgresql.dml.insert() automatically wraps passed strings in a text() construct, and the result when trying to use Insert.excluded is:
In [2]: postgresql.insert('fail').excluded
~/sqlalchemy/lib/sqlalchemy/sql/selectable.py:43: SAWarning: Textual SQL FROM expression 'fail' should be explicitly declared as text('fail'), or use table('fail') for more specificity (this warning may be suppressed after 10 occurrences)
{"expr": util.ellipses_string(element)})
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-f176aac8b913> in <module>()
----> 1 postgresql.insert('fail').excluded
~/sqlalchemy/lib/sqlalchemy/util/langhelpers.py in __get__(self, obj, cls)
765 if obj is None:
766 return self
--> 767 obj.__dict__[self.__name__] = result = self.fget(obj)
768 return result
769
~/sqlalchemy/lib/sqlalchemy/dialects/postgresql/dml.py in excluded(self)
41
42 """
---> 43 return alias(self.table, name='excluded').columns
44
45 #_generative
~/sqlalchemy/lib/sqlalchemy/sql/selectable.py in alias(selectable, name, flat)
159
160 """
--> 161 return _interpret_as_from(selectable).alias(name=name, flat=flat)
162
163
AttributeError: 'TextClause' object has no attribute 'alias'
So, instead of passing a string containing the name of your table to postgresql.dml.insert() pass it an actual Table object, or a light weight table() construct that has been populated with column() objects.

Error when calling Pkg_resources.resource_string

I'm a little confused. I'm working on a Python project where I load a resource file, read it as tsv and transform it into a dictionary with pattern objects as key for later usage. To load the file, so far I've used the pkg_resources package from setuptools. It basically looks like this:
from csv import DictReader
from pkg_resources import resource_string
def make_dict():
"""Make global dictionary."""
global event_dict
dictlines = [l.decode('utf8') for l in resource_string(
'pkgname.resources.tsv', 'event_dict.tsv').splitlines()]
reader = DictReader(dictlines, dialect='excel-tab')
for row in reader:
event = re.compile(r'\b{}\b'.format(re.escape(row['word'])))
classes = string_to_list(row['id'])
event_dict[event] = classes
So far, it worked well. However, once I started calling the module from another module, following error appeared:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\Python\Python36\lib\site-packages\pkg_resources\__init__.py in get_provider(moduleOrReq)
430 try:
--> 431 module = sys.modules[moduleOrReq]
432 except KeyError:
KeyError: 'pkgname.resources.tsv'
During handling of the above exception, another exception occurred:
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-22-efa35954c76f> in <module>()
----> 1 make_event_dict()
<ipython-input-21-b318bc78e8fd> in make_event_dict()
4 global event_dict
5 dictlines = [l.decode('utf8') for l in resource_string(
----> 6 'pkgname.resources.tsv', 'event_classes_dict.tsv').splitlines()]
7 reader = DictReader(dictlines, dialect='excel-tab')
8 for row in reader:
C:\Python\Python36\lib\site-packages\pkg_resources\__init__.py in resource_string(self, package_or_requirement, resource_name)
1215 def resource_string(self, package_or_requirement, resource_name):
1216 """Return specified resource as a string"""
-> 1217 return get_provider(package_or_requirement).get_resource_string(
1218 self, resource_name
1219 )
C:\Python\Python36\lib\site-packages\pkg_resources\__init__.py in get_provider(moduleOrReq)
431 module = sys.modules[moduleOrReq]
432 except KeyError:
--> 433 __import__(moduleOrReq)
434 module = sys.modules[moduleOrReq]
435 loader = getattr(module, '__loader__', None)
ModuleNotFoundError: No module named 'pkgname'
Now I'm guessing something's wrong with my project setup, so this is what it's structured like:
|Pkg\
|----setup.py
|----pkg\
|--------__init__.py
|--------events.py
|--------resources\
|------------__init__.py
|------------tsv\
|----------------__init__.py
|----------------event_dict.tsv
What could be wrong? Not exactly sure if the __init__.py in the subfolders are needed, btw.

Categories