Text to XML in python - python

I'm trying to convert text into xml format. And I'm using LXML Library. But I'm getting error message. Please help me. Thank you
import re
from lxml import etree
import urllib,urllib2
def get_movie_info(movie_id):
URL = "http://www.raaga.com/a/rss.asp?%s"%(movie_id)
f = urllib.urlopen(URL)
movie_info = f.read()
rss = "".join([ line.strip() for line in movie_info ])
mi_tree = etree.fromstring(rss)
#mi_title = self._parse_movie_title(mi_tree.xpath("/rss/channel/title/text()")[0])
#mi_tracks = mi_tree.xpath("/rss/channel/item")
return mi_tree
get_movie_info('A0000102')
Here is my traceback
Traceback (most recent call last):
File "py1.py", line 14, in <module>
get_movie_info('A0000102')
File "py1.py", line 9, in get_movie_info
mi_tree = etree.fromstring(rss)
File "lxml.etree.pyx", line 2743, in lxml.etree.fromstring (src/lxml\lxml.etre
e.c:52665)
File "parser.pxi", line 1573, in lxml.etree._parseMemoryDocument (src/lxml\lxm
l.etree.c:79932)
File "parser.pxi", line 1452, in lxml.etree._parseDoc (src/lxml\lxml.etree.c:7
8774)
File "parser.pxi", line 960, in lxml.etree._BaseParser._parseDoc (src/lxml\lxm
l.etree.c:75389)
File "parser.pxi", line 564, in lxml.etree._ParserContext._handleParseResultDo
c (src/lxml\lxml.etree.c:71739)
File "parser.pxi", line 645, in lxml.etree._handleParseResult (src/lxml\lxml.e
tree.c:72614)
File "parser.pxi", line 585, in lxml.etree._raiseParseError (src/lxml\lxml.etr
ee.c:71955)
lxml.etree.XMLSyntaxError: xmlParsePITarget: invalid name prefix 'xml', line 1,
column 13

It works fine for me without this string:
rss = "".join([ line.strip() for line in movie_info ])
Something like this:
mi_tree = etree.fromstring(movie_info)

Related

lxml.etree.XMLSyntaxError: error parsing attribute name, line 1, column 8

So basically, I keep getting and no matter what I do to try to fix it, it doesn't work.enter code here File "C:\Users\Administrator\Desktop\Py test\unblacklister.py", line 9, in doc = etree.parse(file) File "src\lxml\etree.pyx", line 3536, in lxml.etree.parse File "src\lxml\parser.pxi", line 1876, in lxml.etree._parseDocument File "src\lxml\parser.pxi", line 1902, in lxml.etree._parseDocumentFromURL File "src\lxml\parser.pxi", line 1805, in lxml.etree._parseDocFromFile File "src\lxml\parser.pxi", line 1177, in lxml.etree._BaseParser._parseDocFromFile File "src\lxml\parser.pxi", line 615, in lxml.etree._ParserContext._handleParseResultDoc File "src\lxml\parser.pxi", line 725, in lxml.etree._handleParseResult File "src\lxml\parser.pxi", line 654, in lxml.etree._raiseParseError File "condo.rbxl", line 1 lxml.etree.XMLSyntaxError: error parsing attribute name, line 1, column 8 and no matter what I do it doesn't fix it. Anyone know the problem?
the code: ```
import random import secrets from lxml import etree = 'condo.rbxl' doc = etree.parse(file) def uniqueId(): print('UniqueId Unpatched') for el in doc.xpath("//UniqueId[#name='UniqueId']"): el.text = f'ILOVEHOTDOGShjhjhjhjhhjjhjhjhjLOLlmao1O1L{secrets.token_hex(110)}' doc.write(file) def referentt(): print('Referent Unpatched') for el in doc.xpath("//Item[#referent]"): string = ''.join(random.choice('losmamis123456abcdoglolbannable') for i in range(70)) el.attrib['referent'] = f'DASDJKADSKJKLDFLjhhjhjhjFDKJSLJAFDSKAFDKJADFSJLADFSLJKLKJDSAFLKJADF3132132123132SLKJADFSKJLADFSKJLLKJASDFLKJLFSKJADKJLAFSDJKLADFSLKJASKJLDF{string}' doc.write(file) def assetId(): print('AssetId Unpatched') for el in doc.xpath("//SourceAssetId[#name='SourceAssetId']"): el.text = f'-{secrets.token_hex(20)}' doc.write(file)```
sorry its not organized

Nuitka Dependancy Error

I'm trying to compile a simple standalone python application using Nuitka. I was able to do this using just the standard library.
I'm now looking to bring in some extra dependancies but I am now receiving errors from Nuitka I'm unable to interpret.
I've tried to import numpy into the project which looks like:
Structure:
npg
__init__.py
__main__.py
Pipfile
Pipfile.lock
Here's the __main__.py
import numpy as np
def main():
print('hello Nuitka')
print('hello numpy', np.__version__)
if __name__ == '__main__':
main()
I then run python -m nuitka npg/__main__.py --standalone and get the error:
Nuitka:WARNING:/usr/local/lib/python3.6/site-packages/numpy/testing/_private/pytesttester.py:125: Cannot find 'pytest' in package 'numpy.testing._private' as absolute import (tried pytest).
Nuitka:WARNING:/usr/local/lib/python3.6/site-packages/setuptools/command/egg_info.py:20: Cannot find 'setuptools.extern.six.moves' in package 'setuptools.command' as absolute import (tried setuptools.extern.six.moves).
Nuitka:WARNING:/usr/local/lib/python3.6/site-packages/setuptools/command/egg_info.py:20: Cannot find 'setuptools.extern.six' in package 'setuptools.command' as absolute import (tried setuptools.extern.six).
Nuitka:WARNING:/usr/local/lib/python3.6/site-packages/setuptools/glob.py:13: Cannot find 'setuptools.extern.six' in package 'setuptools' as absolute import (tried setuptools.extern.six).
Nuitka:WARNING:Problem at '<SourceCodeReference to /usr/src/app/npg/__main__.py:4>' with FunctionDef(name='main', args=arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Expr(value=Call(func=Name(id='print', ctx=Load()), args=[Str(s='hello Nuitka')], keywords=[])), Expr(value=Call(func=Name(id='print', ctx=Load()), args=[Str(s='hello numpy'), Attribute(value=Name(id='np', ctx=Load()), attr='__version__', ctx=Load())], keywords=[]))], decorator_list=[], returns=None).
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:566:
-> from __main__ import __requires__
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:565:
-> try:
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:559:
-> #classmethod
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:559:
-> #classmethod
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:543:
-> class WorkingSet:
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:543:
-> class WorkingSet:
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:543:
-> class WorkingSet:
Problem with statement at /usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py:543:
-> class WorkingSet:
Nuitka:INFO:Interrupted while working on '<Node 'COMPILED_PYTHON_PACKAGE' with {'filename': '/usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py', 'package': None, 'name': 'pkg_resources'}>'.
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/nuitka/__main__.py", line 218, in <module>
main()
File "/usr/local/lib/python3.6/site-packages/nuitka/__main__.py", line 212, in main
MainControl.main()
File "/usr/local/lib/python3.6/site-packages/nuitka/MainControl.py", line 800, in main
filename = filename
File "/usr/local/lib/python3.6/site-packages/nuitka/MainControl.py", line 152, in createNodeTree
Optimization.optimize(main_module.getOutputFilename())
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/Optimization.py", line 533, in optimize
makeOptimizationPass(initial_pass = True)
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/Optimization.py", line 446, in makeOptimizationPass
changed = optimizeModule(current_module)
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/Optimization.py", line 173, in optimizeModule
changed = optimizeCompiledPythonModule(module)
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/Optimization.py", line 97, in optimizeCompiledPythonModule
module.computeModule()
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ModuleNodes.py", line 461, in computeModule
trace_collection = self.trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 165, in computeStatementsSequence
trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FrameNodes.py", line 195, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/TryNodes.py", line 129, in computeStatement
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 169, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/AssignNodes.py", line 297, in computeStatement
trace_collection.onExpression(self.getAssignSource())
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/OutlineNodes.py", line 247, in computeExpressionRaw
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 169, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/TryNodes.py", line 129, in computeStatement
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 169, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/TryNodes.py", line 129, in computeStatement
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 165, in computeStatementsSequence
trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FrameNodes.py", line 195, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/LocalsDictNodes.py", line 406, in computeStatement
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/NodeBases.py", line 933, in computeStatementSubExpressions
expression = expression
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ExpressionBases.py", line 976, in computeExpressionRaw
expression = trace_collection.onExpression(sub_expression)
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ExpressionBases.py", line 976, in computeExpressionRaw
expression = trace_collection.onExpression(sub_expression)
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ExpressionBases.py", line 976, in computeExpressionRaw
expression = trace_collection.onExpression(sub_expression)
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FunctionNodes.py", line 901, in computeExpressionRaw
function_body.computeFunctionRaw(trace_collection)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FunctionNodes.py", line 408, in computeFunctionRaw
self.computeFunction(trace_collection)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FunctionNodes.py", line 422, in computeFunction
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 169, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/TryNodes.py", line 129, in computeStatement
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 165, in computeStatementsSequence
trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FrameNodes.py", line 195, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/TryNodes.py", line 129, in computeStatement
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/StatementNodes.py", line 169, in computeStatementsSequence
statement = statement
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 575, in onStatement
statement.computeStatement(self)
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/AssignNodes.py", line 297, in computeStatement
trace_collection.onExpression(self.getAssignSource())
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ExpressionBases.py", line 1054, in computeExpressionRaw
expression = sub_expression
File "/usr/local/lib/python3.6/site-packages/nuitka/optimizations/TraceCollections.py", line 550, in onExpression
trace_collection = self
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ExpressionBases.py", line 998, in computeExpressionRaw
trace_collection = trace_collection
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ImportNodes.py", line 400, in computeExpression
module_name = imported_module_name
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ImportNodes.py", line 290, in _attemptRecursion
module_package = module_package
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/ImportNodes.py", line 222, in _consider
reason = reason
File "/usr/local/lib/python3.6/site-packages/nuitka/importing/Recursion.py", line 169, in recurseTo
reason = reason
File "/usr/local/lib/python3.6/site-packages/nuitka/importing/Recursion.py", line 92, in _recurseTo
is_main = False
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/Building.py", line 1055, in createModuleTree
is_main = is_main
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/Building.py", line 760, in buildParseTree
source_ref = source_ref
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/TreeHelpers.py", line 384, in buildStatementsNode
statements = buildNodeList(provider, nodes, source_ref, allow_none = True)
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/TreeHelpers.py", line 338, in buildNodeList
entry = buildNode(provider, node, node_source_ref, allow_none)
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/TreeHelpers.py", line 295, in buildNode
source_ref = source_ref
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/ReformulationFunctionStatements.py", line 153, in buildFunctionNode
source_ref = source_ref
File "/usr/local/lib/python3.6/site-packages/nuitka/tree/ReformulationFunctionStatements.py", line 799, in buildFunctionWithParsing
source_ref = source_ref
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FunctionNodes.py", line 452, in __init__
source_ref = source_ref
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/FunctionNodes.py", line 386, in __init__
setLocalsDictType(self.locals_dict_name, "python3_function")
File "/usr/local/lib/python3.6/site-packages/nuitka/nodes/LocalsScopes.py", line 30, in setLocalsDictType
assert locals_dict_name not in locals_dict_handles, locals_dict_name
AssertionError: locals___main__$$$function_1_main
I'm use how to interpret that error. Looks like it's having problems with the syntax of the Python's standard libraries import machinery.

S3 the read operation timed out while reading commoncrawl data

In order to read few files from common crawl I have written this script
import warc
import boto
for line in sys.stdin:
line = line.strip()
#Connect to AWS and read a dataset
conn = boto.connect_s3(anon=True, host='s3.amazonaws.com')
pds = conn.get_bucket('commoncrawl')
k = Key(pds)
k.key = line
f = warc.WARCFile(fileobj=GzipStreamFile(k))
skipped_doc = 0
for num, record in enumerate(f):
# analysis code
Where each line is the key of warc files. When I run this script to analyze 5 files, I got this exception
Traceback (most recent call last):
File "./warc_mapper_full.py", line 42, in <module>
for num, record in enumerate(f):
File "/usr/lib/python2.7/site-packages/warc/warc.py", line 393, in __iter__
record = self.read_record()
File "/usr/lib/python2.7/site-packages/warc/warc.py", line 364, in read_record
self.finish_reading_current_record()
File "/usr/lib/python2.7/site-packages/warc/warc.py", line 358, in finish_reading_current_record
self.current_payload.read()
File "/usr/lib/python2.7/site-packages/warc/utils.py", line 59, in read
return self._read(self.length)
File "/usr/lib/python2.7/site-packages/warc/utils.py", line 69, in _read
content = self.buf + self.fileobj.read(size)
File "/home/hpcnl/Documents/kics/current_work/aws/tasks/warc-analysis/src/gzipstream/gzipstream/gzipstreamfile.py", line 67, in read
result = super(GzipStreamFile, self).read(*args, **kwargs)
File "/home/hpcnl/Documents/kics/current_work/aws/tasks/warc-analysis/src/gzipstream/gzipstream/gzipstreamfile.py", line 48, in readinto
data = self.read(len(b))
File "/home/hpcnl/Documents/kics/current_work/aws/tasks/warc-analysis/src/gzipstream/gzipstream/gzipstreamfile.py", line 38, in read
raw = self.stream.read(io.DEFAULT_BUFFER_SIZE)
File "/usr/lib/python2.7/site-packages/boto/s3/key.py", line 400, in read
data = self.resp.read(size)
File "/usr/lib/python2.7/site-packages/boto/connection.py", line 413, in read
return http_client.HTTPResponse.read(self, amt)
File "/usr/lib64/python2.7/httplib.py", line 602, in read
s = self.fp.read(amt)
File "/usr/lib64/python2.7/socket.py", line 380, in read
data = self._sock.recv(left)
File "/usr/lib64/python2.7/ssl.py", line 736, in recv
return self.read(buflen)
File "/usr/lib64/python2.7/ssl.py", line 630, in read
v = self._sslobj.read(len or 1024)
ssl.SSLError: ('The read operation timed out',)
I run it many times. Above exception happened every time. Where is the problem ?

python lxml save not working

I have the following script -
count = 1
for line in temp:
if (str(count) + '=') in line:
job = re.findall(re.escape('=')+"(.*)",line)[0]
fullsrcurl = self.srcjson + '?format=xml&jobname=' + job
srcfile = urllib2.urlopen(fullsrcurl)
srcdoc = etree.parse(srcfile)
srcdata = etree.tostring(srcdoc, pretty_print=True)
srcjobmst_id = srcdoc.xpath('//jobmst_id/text()')[0]
srcxml = 'c:\\temp\\deployments\\%s\\%s.xml' % (source_env, srcjobmst_id)
srcxmlsave = open(srcxml, 'w')
srcxmlsave.write(srcdata)
srcxmlsave.close
fulldsturl = self.targetjson + '?format=xml&jobname=' + job
dstfile = urllib2.urlopen(fulldsturl)
dstdoc = etree.parse(dstfile)
dstdata = etree.tostring(dstdoc, pretty_print=True)
dstjobmst_id = dstdoc.xpath('//jobmst_id/text()')[0]
dstxml = 'c:\\temp\\deployments\\%s\\%s.xml' % (target_env, dstjobmst_id)
dstxmlsave = open(dstxml, 'w')
dstxmlsave.write(dstdata)
dstxmlsave.close
print "Job = " + job
count += 1
It's hitting 2 separate APIs in 2 environments but the data is almost identical. The source works fine, as soon as it tries to do anything with the destination data I get the followign error -
Traceback (most recent call last):
File "S:\Operations\Tidal\deployment\deployv2.py", line 213, in <module>
main()
File "S:\Operations\Tidal\deployment\deployv2.py", line 209, in main
auto_deploy.deploy()
File "S:\Operations\Tidal\deployment\deployv2.py", line 173, in deploy
dstdoc = etree.parse(dstfile)
File "lxml.etree.pyx", line 3239, in lxml.etree.parse (src\lxml\lxml.etree.c:6
9970)
File "parser.pxi", line 1770, in lxml.etree._parseDocument (src\lxml\lxml.etre
e.c:102272)
File "parser.pxi", line 1790, in lxml.etree._parseFilelikeDocument (src\lxml\l
xml.etree.c:102531)
File "parser.pxi", line 1685, in lxml.etree._parseDocFromFilelike (src\lxml\lx
ml.etree.c:101457)
File "parser.pxi", line 1134, in lxml.etree._BaseParser._parseDocFromFilelike
(src\lxml\lxml.etree.c:97084)
File "parser.pxi", line 582, in lxml.etree._ParserContext._handleParseResultDo
c (src\lxml\lxml.etree.c:91290)
File "parser.pxi", line 683, in lxml.etree._handleParseResult (src\lxml\lxml.e
tree.c:92476)
File "parser.pxi", line 622, in lxml.etree._raiseParseError (src\lxml\lxml.etr
ee.c:91772)
lxml.etree.XMLSyntaxError: Extra content at the end of the document, line 4, col
umn 1
So there has to be something different about the destination/target xml but I'm having a hard time understanding what. When I look at both of the values in a browser they're identical except for a few values (jobmst_id)
You aren't closing the files. Change srcxmlsave.close to srcxmlsave.close() or use a context manager as in
with open(srcxml, 'w') as srcxmlsave:
srcxmlsave.write(srcdata)
If anyone experiences an issue like this in the future I found the problem and it's nothing related to lxml or the xml I'm generating. My source environment has been productionalized using mod_wsgi but the target environment is still using runserver.
I guess something in the encoding is breaking with the target. I just productionalized the target environment and it works fine.

How to fix such ClientForm bug?

from mechanize import Browser
br = Browser()
page = br.open('http://wow.interzet.ru/news.php?readmore=23')
br.form = br.forms().next()
print br.form
gives me the following error:
Traceback (most recent call last):
File "C:\Users\roddik\Desktop\mech.py", line 6, in <module>
br.form = br.forms().next()
File "build\bdist.win32\egg\mechanize\_mechanize.py", line 426, in forms
File "D:\py26\lib\site-package\mechanize-0.1.11-py2.6.egg\mechanize\_html.py", line 559, in forms
File "D:\py26\lib\site-packages\mechanize-0.1.11-py2.6.egg\mechanize\_html.py", line 225, in forms
File "D:\py26\lib\site-packages\clientform-0.2.10-py2.6.egg\ClientForm.py", line 967, in ParseResponseEx
File "D:\py26\lib\site-packages\clientform-0.2.10-py2.6.egg\ClientForm.py", line 1100, in _ParseFileEx
File "D:\py26\lib\site-packages\clientform-0.2.10-py2.6.egg\ClientForm.py", line 870, in feed
File "D:\py26\lib\sgmllib.py", line 104, in feed
self.goahead(0)
File "D:\py26\lib\sgmllib.py", line 138, in goahead
k = self.parse_starttag(i)
File "D:\py26\lib\sgmllib.py", line 290, in parse_starttag
self._convert_ref, attrvalue)
File "D:\py26\lib\sgmllib.py", line 302, in _convert_ref
return self.convert_charref(match.group(2)) or \
File "D:\py26\lib\site-packages\clientform-0.2.10-py2.6.egg\ClientForm.py", line 850, in convert_charref
File "D:\py26\lib\site-packages\clientform-0.2.10-py2.6.egg\ClientForm.py", line 244, in unescape_charref
ValueError: invalid literal for int() with base 10: 'e'
How can I fix it?
Edit:
I've fixed it this way. Is it ok? If not, how instead?
import ClientForm
from mechanize import Browser
def myunescape_charref(data, encoding):
if not str(data).isdigit(): return 0
name, base = data, 10
if name.startswith("x"):
name, base= name[1:], 16
uc = unichr(int(name, base))
if encoding is None:
return uc
else:
try:
repl = uc.encode(encoding)
except UnicodeError:
repl = "&#%s;" % data
return repl
ClientForm.unescape_charref = myunescape_charref
The problem is caused by urls like this
http://wow.zet/forum/index.php?showtopic=1197&pid=30419&st=0&#entry30419
ClientForm is looking for an integer after the &#
It is ok to have the # in the url, but it should be escaped in the html
as &# means a character encoding

Categories