<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.xiaomi.oga">
...
<meta-data
android:name="xxx"
android:value="xxx" >
</meta-data>
<meta-data
android:name="OTA_TYPE"
android:value="daily_build_test" />
</application>
</manifest>
I want get the daily_build_test above by python
So far, I tried:
import lxml.etree as ET
import os
ns='{http://schemas.android.com/apk/res/android}'
nametag=ns+'name'
categorytag='category'
packagetag='package'
class XmlParser:
def __init__(self, file):
self.file = file
self.tree = None
self.root = None
def __parse(self):
if self.tree is None or self.root is None:
parser = ET.XMLParser(strip_cdata=False)
self.tree = ET.parse(self.file, parser)
self.root = self.tree.getroot()
def gettree(self):
self.__parse()
return self.tree
def getroot(self):
self.__parse()
return self.root
def get_meta_data(parser):
tree = parser.gettree()
#value = tree.xpath('/manifest/application//meta-data[#*=\'OTA_TYPE\']/#*[2]')
NS = {'android' : 'http://schemas.android.com/apk/res/android'}
value = tree.xpath('/manifest/application/meta-data[#a:name=\'OTA_TYPE\']/#a:value', namespace=NS)
print '%s' %(value)
return value
if __name__ == '__main__':
file = os.environ['PYTHON_ARG']
parser = XmlParser(file)
meta_data = get_meta_data(parser)
print '%s' %(meta_data)
and I got :
Traceback (most recent call last):
File "<stdin>", line 44, in <module>
File "<stdin>", line 37, in get_meta_data
File "src/lxml/lxml.etree.pyx", line 2272, in lxml.etree._ElementTree.xpath (src/lxml/lxml.etree.c:70786)
File "src/lxml/xpath.pxi", line 352, in lxml.etree.XPathDocumentEvaluator.__call__ (src/lxml/lxml.etree.c:179055)
File "src/lxml/xpath.pxi", line 95, in lxml.etree._XPathContext.registerVariables (src/lxml/lxml.etree.c:175335)
File "src/lxml/extensions.pxi", line 614, in lxml.etree._wrapXPathObject (src/lxml/lxml.etree.c:171074)
lxml.etree.XPathResultError: Unknown return type: dict
or ['daily_build_test']
when the commentted line :
value = tree.xpath('/manifest/application//meta-data[#*=\'OTA_TYPE\']/#*[2]')
is applied
I don't want the brackets the quotes.
The argument to tree.xpath is namespaces, not namespace. So:
NS = {'a' : 'http://schemas.android.com/apk/res/android'}
value = tree.xpath('/manifest/application/'
'meta-data[#a:name=\'OTA_TYPE\']/#a:value',
namespaces=NS)
Also note that you were using the key android in your NS dictionary, but a: in your query. The example above corrects the dictionary so that it matches the prefix you're using.
Thanks to larsks, I finally get I want by:*
def get_meta_data(parser):
tree = parser.gettree()
NS = {'a' : 'http://schemas.android.com/apk/res/android'}
value = tree.xpath('/manifest/application/meta-data[#a:name=\'OTA_TYPE\']/#a:value', namespaces=NS)
print '%s' %(value[0])
# NOTE value is a list of length 0.
return value[0]
Related
I have a project that searches PDFs for URLs and in the process extracts the PDF Metadata. It works perfectly around 99.6% of the time without any errors. But every once in a while, a file throws the old "invalid token error. Traceback Below:
Traceback (most recent call last):
File "c:\python38\lib\runpy.py", line 193, in _run_module_as_main
return run_code(code, main_globals, None,
File "c:\python38\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "C:\Python38\Scripts\linkrot.exe_main.py", line 7, in
File "c:\python38\lib\site-packages\linkrot\cli.py", line 182, in main
pdf = linkrot.linkrot(args.pdf)
File "c:\python38\lib\site-packages\linkrot_init.py", line 131, in init
self.reader = PDFMinerBackend(self.stream)
File "c:\python38\lib\site-packages\linkrot\backends.py", line 213, in init
self.metadata.update(xmp_to_dict(metadata))
File "c:\python38\lib\site-packages\linkrot\libs\xmp.py", line 92, in xmp_to_dict
return XmpParser(xmp).meta
File "c:\python38\lib\site-packages\linkrot\libs\xmp.py", line 41, in init
self.tree = ET.XML(xmp)
File "c:\python38\lib\xml\etree\ElementTree.py", line 1320, in XML
parser.feed(text)
xml.etree.ElementTree.ParseError: not well-formed (invalid token): line 55, column 10
My assumption is that there is some sort of issue with the XML extracted from the PDF, but I can't be sure. Is there a workaround? Some way the rest of the program could run when this error throws? The metadata is valuable to the process so I'd like to keep it if possible. I don't know etree that well, so I'd appreciate some help. The Code itself is below:
class XmpParser(object):
"""
Parses an XMP string into a dictionary.
Usage:
parser = XmpParser(xmpstring)
meta = parser.meta
"""
def __init__(self, xmp):
self.tree = ET.XML(xmp)
self.rdftree = self.tree.find(RDF_NS + "RDF")
#property
def meta(self):
""" A dictionary of all the parsed metadata. """
meta = defaultdict(dict)
if self.rdftree:
for desc in self.rdftree.findall(RDF_NS + "Description"):
for (
el
) in (
desc.iter()
):
ns, tag = self._parse_tag(el)
value = self._parse_value(el)
meta[ns][tag] = value
return dict(meta)
def _parse_tag(self, el):
""" Extract the namespace and tag from an element. """
ns = None
tag = el.tag
if tag[0] == "{":
ns, tag = tag[1:].split("}", 1)
if ns in NS_MAP:
ns = NS_MAP[ns]
return ns, tag
def _parse_value(self, el): # noqa: C901
""" Extract the metadata value from an element. """
if el.find(RDF_NS + "Bag") is not None:
value = []
for li in el.findall(RDF_NS + "Bag/" + RDF_NS + "li"):
value.append(li.text)
elif el.find(RDF_NS + "Seq") is not None:
value = []
for li in el.findall(RDF_NS + "Seq/" + RDF_NS + "li"):
value.append(li.text)
elif el.find(RDF_NS + "Alt") is not None:
value = {}
for li in el.findall(RDF_NS + "Alt/" + RDF_NS + "li"):
value[li.get(XML_NS + "lang")] = li.text
else:
value = el.text
return value
Any help or advice would be appreciated.
I have the following code for reading in files from a folder:
from pyspark.sql.types import *
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext.getOrCreate()
spark = SparkSession(sc)
class MicrosoftAcademicGraph:
def __init__(self):
self.version = '2021-12-06'
def getBasepath(self):
basepath = '/work/ScienceOfScience/Data/ScienceOfScience/mag/mag/'
if (self.version != ''):
basepath = self.version + '/'
return basepath
# return stream path
def getFullpath(self, streamName):
path = self.getBasepath() + self.streams[streamName][0]
return self
# return stream header
def getHeader(self, streamName):
return self.streams[streamName][1]
# return stream schema
def getSchema(self, streamName):
schema = StructType()
for field in self.streams[streamName][1]:
fieldname, fieldtype = field.split(':')
nullable = fieldtype.endswith('?')
if nullable:
fieldtype = fieldtype[:-1]
schema.add(StructField(fieldname, self.datatypedict[fieldtype], nullable))
return schema
# return stream dataframe
def getDataframe(self, streamName):
return spark.read.format('csv').options(header='false', delimiter='\t').schema(self.getSchema(streamName)).load(self.getFullpath(streamName))
# define stream dictionary
streams = {
'Affiliations' : ('mag/Affiliations.txt', ['AffiliationId:long', 'Rank:uint', 'NormalizedName:string', 'DisplayName:string', 'GridId:string', 'OfficialPage:string', 'WikiPage:string', 'PaperCount:long', 'PaperFamilyCount:long', 'CitationCount:long', 'Iso3166Code:string', 'Latitude:float?', 'Longitude:float?', 'CreatedDate:DateTime']),
'AuthorExtendedAttributes' : ('mag/AuthorExtendedAttributes.txt', ['AuthorId:long', 'AttributeType:int', 'AttributeValue:string'])}
I'm tring to retrieve one of the files called 'Authors' in the following way:
e = MicrosoftAcademicGraph()
e.getDataframe('Authors')
I get a long list of errors that look like this:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 51, in getDataframe
File "/home/ucloud/.local/lib/python3.6/site-packages/pyspark/sql/readwriter.py", line 162, in load
return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))
File "/home/ucloud/.local/lib/python3.6/site-packages/py4j/java_gateway.py", line 1313, in __call__
args_command, temp_args = self._build_args(*args)
File "/home/ucloud/.local/lib/python3.6/site-packages/py4j/java_gateway.py", line 1277, in _build_args
(new_args, temp_args) = self._get_args(args)
command_part = REFERENCE_TYPE + parameter._get_object_id()
AttributeError: 'MicrosoftAcademicGraph' object has no attribute '_get_object_id'
is there something wrong in the code or does this maybe have to do with version mismatch between python and pyspark?
I have a YAML file: ./YAML/simpleData.yml
- name: 'Somu'
age: 26
content:
- name: 'Neo'
age: 27
content: []
- name: 'Ari'
age: 26
content: []
And I'm trying to parse it using PyYAML via:
import yaml
# Creating objects directly with the YAML module:
print("Attempting Direct Object Load: ")
class Person:
def __init__(self, name, age, con):
self.name = name
self.age = hp
self.content = con
def __repr__(self):
return "%s(name=%r, hp=%r, sp=%r)" % (
self.__class__.__name__, self.name, self.age, self.content)
def printData(self):
print(self.name)
print(self.age)
if self.content:
for per in self.content:
print("-->", end="")
per.printData()
# Data load:
person_obj = None
data = ""
try:
with open('YAML/simpleData.yml') as source:
for line in source:
data += line
except Exception as err:
print("An exception occurred: " + str(err))
person_obj = yaml.load("""!!python/object:__main__.Person\n""" + data)
if not person_obj:
print("Data Loading Failed..! EXITING!!")
exit(1)
person_obj.printData()
I'm new to Python, and thus can't determine what I'm doing wrong, due to which this exception is being raised:
yaml.constructor.ConstructorError: expected a mapping node, but found sequence
in "<unicode string>", line 1, column 1:
!!python/object:__main__.Person
^
How do I fix this?
Full Output Dump:
Attempting Direct Object Load:
Traceback (most recent call last):
File "/home/somu/Programming/python/HeadFirstPython/yamlIntro.py", line 106, in <module>
person_obj = yaml.load("""!!python/object:__main__.Person\n""" + data)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/__init__.py", line 72, in load
return loader.get_single_data()
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 37, in get_single_data
return self.construct_document(node)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 46, in construct_document
for dummy in generator:
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 578, in construct_python_object
state = self.construct_mapping(node, deep=deep)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 204, in construct_mapping
return super().construct_mapping(node, deep=deep)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 122, in construct_mapping
node.start_mark)
yaml.constructor.ConstructorError: expected a mapping node, but found sequence
in "<unicode string>", line 1, column 1:
!!python/object:__main__.Person
^
Process finished with exit code 1
At the root, c.q. top level, of your file you have a sequence. The first element of which is a mapping with among others the key-value pair name: Somu.
If you want to load this using PyYAML in the way you described, you should strip of the first two characters of each line:
data += line[2:]
or insert the !!python/object:__main__.Person after the first dash:
data = data.replace('- ', '- !!python/object:__main__.Person\n', 1)
I keep getting this error, I'm not sure why though.
Traceback (most recent call last):
File "/home/cambria/Main.py", line 1, in <module>
from RiotAPI import RiotAPI
File "/home/cambria/RiotAPI.py", line 6
def __init__(self, api_key, region=Consts.REGIONS['north_america'])
^
SyntaxError: invalid syntax
I have not used Python for that long, I am just using it because it facilitates what I'm trying to do well, but I have used various other languages and as far as I can tell you would want to close these ()'s in this statement def __init__(self, api_key, region=Consts.REGIONS['north_america']) however I keep getting a SyntaxError: invalid syntax?
the rest of that definition is as follows, if it helps.
class RiotAPI(object):
def __init__(self, api_key, region=Consts.REGIONS['north_america'])
self.api_key = api_key
self.region = region
EDIT 1: if i add a : at the end of def __init__(self, api_key, region=Consts.REGIONS['north_america']): like so, why? and after doing this i get a new syntax error that i will address after some wisedom
EDIT 2: new syntax error after fixing the first is,
Traceback (most recent call last):
File "/home/cambria/Main.py", line 1, in <module>
from RiotAPI import RiotAPI
File "/home/cambria/RiotAPI.py", line 11
args = ('api_key': self.api_key)
^
SyntaxError: invalid syntax
which is
def _request(self, api_url, params=()):
args = ('api_key': self.api_key)
for key, value in params.items():
if key not in args:
args[key] = value
EDIT 3: This should be the last of it.. no more syntax, just a
Traceback (most recent call last):
File "/home/cambria/Main.py", line 10, in <module>
main()
File "/home/cambria/Main.py", line 5, in main
respons3 = api.get_summoner_by_name('hi im gosan')
File "/home/cambria/RiotAPI.py", line 31, in get_summoner_by_name
return self._request(api_url)
File "/home/cambria/RiotAPI.py", line 12, in _request
for key, value in params.items():
AttributeError: 'tuple' object has no attribute 'items'
in
def _request(self, api_url, params=()):
args = {'api_key': self.api_key}
for key, value in params.items():
if key not in args:
args[key] = value
response = requests.get(
Consts.URL['base'].format(
proxy=self.region,
region=self.region,
url=api_url
),
params=args
)
print response.url
return response.json()
this is the only error i have received that i really don't know much on. Is this a result of there being no .items on my params? or i left it initialized as an empty dictionary?
The problem is just that you're missing a : at the end of the line.
def __init__(self, api_key, region=Consts.REGIONS['north_america']):
self.api_key = api_key
self.region = region
You forgot a ::
class RiotAPI(object):
def __init__(self, api_key, region=Consts.REGIONS['north_america']): # <HERE
self.api_key = api_key
self.region = region
I have a command as follows:
class AddChatMessages(Command):
arguments = [
('messages', AmpList([('message', Unicode()), ('type', Integer())]))]
And I have a responder for it in a controller:
def add_chat_messages(self, messages):
for i, m in enumerate(messages):
messages[i] = (m['message'], m['type'])
self.main.add_chat_messages(messages)
return {}
commands.AddChatMessages.responder(add_chat_messages)
I am writing a unit test for it. This is my code:
class AddChatMessagesTest(ProtocolTestMixin, unittest.TestCase):
command = commands.AddChatMessages
data = {'messages': [{'message': 'hi', 'type': 'None'}]}
def assert_callback(self, unused):
pass
Where ProtocolMixin is as follows:
class ProtocolTestMixin(object):
def setUp(self):
self.protocol = client.CommandProtocol()
def assert_callback(self, unused):
raise NotImplementedError("Has to be implemented!")
def test_responder(self):
responder = self.protocol.lookupFunction(
self.command.commandName)
d = responder(self.data)
d.addCallback(self.assert_callback)
return d
It works if AmpList is not involved, but when it is - I get following error:
======================================================================
ERROR: test_responder
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/defer.py", line 139, in maybeDeferred
result = f(*args, **kw)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 203, in runWithWarningsSuppressed
reraise(exc_info[1], exc_info[2])
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 199, in runWithWarningsSuppressed
result = f(*a, **kw)
File "/Users/<username>/Projects/space/tests/client_test.py", line 32, in test_responder
d = responder(self.data)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1016, in doit
kw = command.parseArguments(box, self)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1717, in parseArguments
return _stringsToObjects(box, cls.arguments, protocol)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2510, in _stringsToObjects
argparser.fromBox(argname, myStrings, objects, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1209, in fromBox
objects[nk] = self.fromStringProto(st, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1465, in fromStringProto
boxes = parseString(inString)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2485, in parseString
return cls.parse(StringIO(data))
TypeError: must be string or buffer, not list
Which makes sense, but how do I serialize a list in AddChatMessagesTest.data?
The responder expects to be called with a serialized box. It will then deserialize it, dispatch the objects to application code, take the object the application code returns, serialize it, and then return that serialized form.
For a few AMP types. most notably String, the serialized form is the same as the deserialized form, so it's easy to overlook this.
I think that you'll want to pass your data through Command.makeArguments in order to produce an object suitable to pass to a responder.
For example:
>>> from twisted.protocols.amp import Command, Integer
>>> class Foo(Command):
... arguments = [("bar", Integer())]
...
>>> Foo.makeArguments({"bar": 17}, None)
AmpBox({'bar': '17'})
>>>
If you do this with a Command that uses AmpList I think you'll find makeArguments returns an encoded string for the value of that argument and that the responder is happy to accept and parse that kind of string.