to get android manifest meta data value by python xpath

to get android manifest meta data value by python xpath - python

<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.xiaomi.oga">
...
<meta-data
android:name="xxx"
android:value="xxx" >
</meta-data>
<meta-data
android:name="OTA_TYPE"
android:value="daily_build_test" />
</application>
</manifest>
I want get the daily_build_test above by python
So far, I tried:
import lxml.etree as ET
import os
ns='{http://schemas.android.com/apk/res/android}'
nametag=ns+'name'
categorytag='category'
packagetag='package'
class XmlParser:
def __init__(self, file):
self.file = file
self.tree = None
self.root = None
def __parse(self):
if self.tree is None or self.root is None:
parser = ET.XMLParser(strip_cdata=False)
self.tree = ET.parse(self.file, parser)
self.root = self.tree.getroot()
def gettree(self):
self.__parse()
return self.tree
def getroot(self):
self.__parse()
return self.root
def get_meta_data(parser):
tree = parser.gettree()
#value = tree.xpath('/manifest/application//meta-data[#*=\'OTA_TYPE\']/#*[2]')
NS = {'android' : 'http://schemas.android.com/apk/res/android'}
value = tree.xpath('/manifest/application/meta-data[#a:name=\'OTA_TYPE\']/#a:value', namespace=NS)
print '%s' %(value)
return value
if __name__ == '__main__':
file = os.environ['PYTHON_ARG']
parser = XmlParser(file)
meta_data = get_meta_data(parser)
print '%s' %(meta_data)
and I got :
Traceback (most recent call last):
File "<stdin>", line 44, in <module>
File "<stdin>", line 37, in get_meta_data
File "src/lxml/lxml.etree.pyx", line 2272, in lxml.etree._ElementTree.xpath (src/lxml/lxml.etree.c:70786)
File "src/lxml/xpath.pxi", line 352, in lxml.etree.XPathDocumentEvaluator.__call__ (src/lxml/lxml.etree.c:179055)
File "src/lxml/xpath.pxi", line 95, in lxml.etree._XPathContext.registerVariables (src/lxml/lxml.etree.c:175335)
File "src/lxml/extensions.pxi", line 614, in lxml.etree._wrapXPathObject (src/lxml/lxml.etree.c:171074)
lxml.etree.XPathResultError: Unknown return type: dict
or ['daily_build_test']
when the commentted line :
value = tree.xpath('/manifest/application//meta-data[#*=\'OTA_TYPE\']/#*[2]')
is applied
I don't want the brackets the quotes.

The argument to tree.xpath is namespaces, not namespace. So:
NS = {'a' : 'http://schemas.android.com/apk/res/android'}
value = tree.xpath('/manifest/application/'
'meta-data[#a:name=\'OTA_TYPE\']/#a:value',
namespaces=NS)
Also note that you were using the key android in your NS dictionary, but a: in your query. The example above corrects the dictionary so that it matches the prefix you're using.

Thanks to larsks, I finally get I want by:*
def get_meta_data(parser):
tree = parser.gettree()
NS = {'a' : 'http://schemas.android.com/apk/res/android'}
value = tree.xpath('/manifest/application/meta-data[#a:name=\'OTA_TYPE\']/#a:value', namespaces=NS)
print '%s' %(value[0])
# NOTE value is a list of length 0.
return value[0]

Related

xml.etree giving an Invalid Token Error When Parsing PDF Metadata

I have a project that searches PDFs for URLs and in the process extracts the PDF Metadata. It works perfectly around 99.6% of the time without any errors. But every once in a while, a file throws the old "invalid token error. Traceback Below:
Traceback (most recent call last):
File "c:\python38\lib\runpy.py", line 193, in _run_module_as_main
return run_code(code, main_globals, None,
File "c:\python38\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "C:\Python38\Scripts\linkrot.exe_main.py", line 7, in
File "c:\python38\lib\site-packages\linkrot\cli.py", line 182, in main
pdf = linkrot.linkrot(args.pdf)
File "c:\python38\lib\site-packages\linkrot_init.py", line 131, in init
self.reader = PDFMinerBackend(self.stream)
File "c:\python38\lib\site-packages\linkrot\backends.py", line 213, in init
self.metadata.update(xmp_to_dict(metadata))
File "c:\python38\lib\site-packages\linkrot\libs\xmp.py", line 92, in xmp_to_dict
return XmpParser(xmp).meta
File "c:\python38\lib\site-packages\linkrot\libs\xmp.py", line 41, in init
self.tree = ET.XML(xmp)
File "c:\python38\lib\xml\etree\ElementTree.py", line 1320, in XML
parser.feed(text)
xml.etree.ElementTree.ParseError: not well-formed (invalid token): line 55, column 10
My assumption is that there is some sort of issue with the XML extracted from the PDF, but I can't be sure. Is there a workaround? Some way the rest of the program could run when this error throws? The metadata is valuable to the process so I'd like to keep it if possible. I don't know etree that well, so I'd appreciate some help. The Code itself is below:
class XmpParser(object):
"""
Parses an XMP string into a dictionary.
Usage:
parser = XmpParser(xmpstring)
meta = parser.meta
"""
def __init__(self, xmp):
self.tree = ET.XML(xmp)
self.rdftree = self.tree.find(RDF_NS + "RDF")
#property
def meta(self):
""" A dictionary of all the parsed metadata. """
meta = defaultdict(dict)
if self.rdftree:
for desc in self.rdftree.findall(RDF_NS + "Description"):
for (
el
) in (
desc.iter()
):
ns, tag = self._parse_tag(el)
value = self._parse_value(el)
meta[ns][tag] = value
return dict(meta)
def _parse_tag(self, el):
""" Extract the namespace and tag from an element. """
ns = None
tag = el.tag
if tag[0] == "{":
ns, tag = tag[1:].split("}", 1)
if ns in NS_MAP:
ns = NS_MAP[ns]
return ns, tag
def _parse_value(self, el): # noqa: C901
""" Extract the metadata value from an element. """
if el.find(RDF_NS + "Bag") is not None:
value = []
for li in el.findall(RDF_NS + "Bag/" + RDF_NS + "li"):
value.append(li.text)
elif el.find(RDF_NS + "Seq") is not None:
value = []
for li in el.findall(RDF_NS + "Seq/" + RDF_NS + "li"):
value.append(li.text)
elif el.find(RDF_NS + "Alt") is not None:
value = {}
for li in el.findall(RDF_NS + "Alt/" + RDF_NS + "li"):
value[li.get(XML_NS + "lang")] = li.text
else:
value = el.text
return value
Any help or advice would be appreciated.

pyspark error: "object has no attribute '_get_object_id' " when trying to read file

I have the following code for reading in files from a folder:
from pyspark.sql.types import *
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext.getOrCreate()
spark = SparkSession(sc)
class MicrosoftAcademicGraph:
def __init__(self):
self.version = '2021-12-06'
def getBasepath(self):
basepath = '/work/ScienceOfScience/Data/ScienceOfScience/mag/mag/'
if (self.version != ''):
basepath = self.version + '/'
return basepath
# return stream path
def getFullpath(self, streamName):
path = self.getBasepath() + self.streams[streamName][0]
return self
# return stream header
def getHeader(self, streamName):
return self.streams[streamName][1]
# return stream schema
def getSchema(self, streamName):
schema = StructType()
for field in self.streams[streamName][1]:
fieldname, fieldtype = field.split(':')
nullable = fieldtype.endswith('?')
if nullable:
fieldtype = fieldtype[:-1]
schema.add(StructField(fieldname, self.datatypedict[fieldtype], nullable))
return schema
# return stream dataframe
def getDataframe(self, streamName):
return spark.read.format('csv').options(header='false', delimiter='\t').schema(self.getSchema(streamName)).load(self.getFullpath(streamName))
# define stream dictionary
streams = {
'Affiliations' : ('mag/Affiliations.txt', ['AffiliationId:long', 'Rank:uint', 'NormalizedName:string', 'DisplayName:string', 'GridId:string', 'OfficialPage:string', 'WikiPage:string', 'PaperCount:long', 'PaperFamilyCount:long', 'CitationCount:long', 'Iso3166Code:string', 'Latitude:float?', 'Longitude:float?', 'CreatedDate:DateTime']),
'AuthorExtendedAttributes' : ('mag/AuthorExtendedAttributes.txt', ['AuthorId:long', 'AttributeType:int', 'AttributeValue:string'])}
I'm tring to retrieve one of the files called 'Authors' in the following way:
e = MicrosoftAcademicGraph()
e.getDataframe('Authors')
I get a long list of errors that look like this:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 51, in getDataframe
File "/home/ucloud/.local/lib/python3.6/site-packages/pyspark/sql/readwriter.py", line 162, in load
return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))
File "/home/ucloud/.local/lib/python3.6/site-packages/py4j/java_gateway.py", line 1313, in __call__
args_command, temp_args = self._build_args(*args)
File "/home/ucloud/.local/lib/python3.6/site-packages/py4j/java_gateway.py", line 1277, in _build_args
(new_args, temp_args) = self._get_args(args)
command_part = REFERENCE_TYPE + parameter._get_object_id()
AttributeError: 'MicrosoftAcademicGraph' object has no attribute '_get_object_id'
is there something wrong in the code or does this maybe have to do with version mismatch between python and pyspark?

!!python/object:main.ClassName throwing exception

I have a YAML file: ./YAML/simpleData.yml
- name: 'Somu'
age: 26
content:
- name: 'Neo'
age: 27
content: []
- name: 'Ari'
age: 26
content: []
And I'm trying to parse it using PyYAML via:
import yaml
# Creating objects directly with the YAML module:
print("Attempting Direct Object Load: ")
class Person:
def __init__(self, name, age, con):
self.name = name
self.age = hp
self.content = con
def __repr__(self):
return "%s(name=%r, hp=%r, sp=%r)" % (
self.__class__.__name__, self.name, self.age, self.content)
def printData(self):
print(self.name)
print(self.age)
if self.content:
for per in self.content:
print("-->", end="")
per.printData()
# Data load:
person_obj = None
data = ""
try:
with open('YAML/simpleData.yml') as source:
for line in source:
data += line
except Exception as err:
print("An exception occurred: " + str(err))
person_obj = yaml.load("""!!python/object:__main__.Person\n""" + data)
if not person_obj:
print("Data Loading Failed..! EXITING!!")
exit(1)
person_obj.printData()
I'm new to Python, and thus can't determine what I'm doing wrong, due to which this exception is being raised:
yaml.constructor.ConstructorError: expected a mapping node, but found sequence
in "<unicode string>", line 1, column 1:
!!python/object:__main__.Person
^
How do I fix this?
Full Output Dump:
Attempting Direct Object Load:
Traceback (most recent call last):
File "/home/somu/Programming/python/HeadFirstPython/yamlIntro.py", line 106, in <module>
person_obj = yaml.load("""!!python/object:__main__.Person\n""" + data)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/__init__.py", line 72, in load
return loader.get_single_data()
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 37, in get_single_data
return self.construct_document(node)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 46, in construct_document
for dummy in generator:
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 578, in construct_python_object
state = self.construct_mapping(node, deep=deep)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 204, in construct_mapping
return super().construct_mapping(node, deep=deep)
File "/home/somu/Programming/python/HeadFirstPython/venv/lib/python3.6/site-packages/yaml/constructor.py", line 122, in construct_mapping
node.start_mark)
yaml.constructor.ConstructorError: expected a mapping node, but found sequence
in "<unicode string>", line 1, column 1:
!!python/object:__main__.Person
^
Process finished with exit code 1

At the root, c.q. top level, of your file you have a sequence. The first element of which is a mapping with among others the key-value pair name: Somu.
If you want to load this using PyYAML in the way you described, you should strip of the first two characters of each line:
data += line[2:]
or insert the !!python/object:__main__.Person after the first dash:
data = data.replace('- ', '- !!python/object:__main__.Person\n', 1)

basic python syntax that i don't quite get

I keep getting this error, I'm not sure why though.
Traceback (most recent call last):
File "/home/cambria/Main.py", line 1, in <module>
from RiotAPI import RiotAPI
File "/home/cambria/RiotAPI.py", line 6
def __init__(self, api_key, region=Consts.REGIONS['north_america'])
^
SyntaxError: invalid syntax
I have not used Python for that long, I am just using it because it facilitates what I'm trying to do well, but I have used various other languages and as far as I can tell you would want to close these ()'s in this statement def __init__(self, api_key, region=Consts.REGIONS['north_america']) however I keep getting a SyntaxError: invalid syntax?
the rest of that definition is as follows, if it helps.
class RiotAPI(object):
def __init__(self, api_key, region=Consts.REGIONS['north_america'])
self.api_key = api_key
self.region = region
EDIT 1: if i add a : at the end of def __init__(self, api_key, region=Consts.REGIONS['north_america']): like so, why? and after doing this i get a new syntax error that i will address after some wisedom
EDIT 2: new syntax error after fixing the first is,
Traceback (most recent call last):
File "/home/cambria/Main.py", line 1, in <module>
from RiotAPI import RiotAPI
File "/home/cambria/RiotAPI.py", line 11
args = ('api_key': self.api_key)
^
SyntaxError: invalid syntax
which is
def _request(self, api_url, params=()):
args = ('api_key': self.api_key)
for key, value in params.items():
if key not in args:
args[key] = value
EDIT 3: This should be the last of it.. no more syntax, just a
Traceback (most recent call last):
File "/home/cambria/Main.py", line 10, in <module>
main()
File "/home/cambria/Main.py", line 5, in main
respons3 = api.get_summoner_by_name('hi im gosan')
File "/home/cambria/RiotAPI.py", line 31, in get_summoner_by_name
return self._request(api_url)
File "/home/cambria/RiotAPI.py", line 12, in _request
for key, value in params.items():
AttributeError: 'tuple' object has no attribute 'items'
in
def _request(self, api_url, params=()):
args = {'api_key': self.api_key}
for key, value in params.items():
if key not in args:
args[key] = value
response = requests.get(
Consts.URL['base'].format(
proxy=self.region,
region=self.region,
url=api_url
),
params=args
)
print response.url
return response.json()
this is the only error i have received that i really don't know much on. Is this a result of there being no .items on my params? or i left it initialized as an empty dictionary?

The problem is just that you're missing a : at the end of the line.
def __init__(self, api_key, region=Consts.REGIONS['north_america']):
self.api_key = api_key
self.region = region

You forgot a ::
class RiotAPI(object):
def __init__(self, api_key, region=Consts.REGIONS['north_america']): # <HERE
self.api_key = api_key
self.region = region

Serializing twisted.protocols.amp.AmpList for testing

I have a command as follows:
class AddChatMessages(Command):
arguments = [
('messages', AmpList([('message', Unicode()), ('type', Integer())]))]
And I have a responder for it in a controller:
def add_chat_messages(self, messages):
for i, m in enumerate(messages):
messages[i] = (m['message'], m['type'])
self.main.add_chat_messages(messages)
return {}
commands.AddChatMessages.responder(add_chat_messages)
I am writing a unit test for it. This is my code:
class AddChatMessagesTest(ProtocolTestMixin, unittest.TestCase):
command = commands.AddChatMessages
data = {'messages': [{'message': 'hi', 'type': 'None'}]}
def assert_callback(self, unused):
pass
Where ProtocolMixin is as follows:
class ProtocolTestMixin(object):
def setUp(self):
self.protocol = client.CommandProtocol()
def assert_callback(self, unused):
raise NotImplementedError("Has to be implemented!")
def test_responder(self):
responder = self.protocol.lookupFunction(
self.command.commandName)
d = responder(self.data)
d.addCallback(self.assert_callback)
return d
It works if AmpList is not involved, but when it is - I get following error:
======================================================================
ERROR: test_responder
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/defer.py", line 139, in maybeDeferred
result = f(*args, **kw)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 203, in runWithWarningsSuppressed
reraise(exc_info[1], exc_info[2])
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 199, in runWithWarningsSuppressed
result = f(*a, **kw)
File "/Users/<username>/Projects/space/tests/client_test.py", line 32, in test_responder
d = responder(self.data)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1016, in doit
kw = command.parseArguments(box, self)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1717, in parseArguments
return _stringsToObjects(box, cls.arguments, protocol)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2510, in _stringsToObjects
argparser.fromBox(argname, myStrings, objects, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1209, in fromBox
objects[nk] = self.fromStringProto(st, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1465, in fromStringProto
boxes = parseString(inString)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2485, in parseString
return cls.parse(StringIO(data))
TypeError: must be string or buffer, not list
Which makes sense, but how do I serialize a list in AddChatMessagesTest.data?

The responder expects to be called with a serialized box. It will then deserialize it, dispatch the objects to application code, take the object the application code returns, serialize it, and then return that serialized form.
For a few AMP types. most notably String, the serialized form is the same as the deserialized form, so it's easy to overlook this.
I think that you'll want to pass your data through Command.makeArguments in order to produce an object suitable to pass to a responder.
For example:
>>> from twisted.protocols.amp import Command, Integer
>>> class Foo(Command):
... arguments = [("bar", Integer())]
...
>>> Foo.makeArguments({"bar": 17}, None)
AmpBox({'bar': '17'})
>>>
If you do this with a Command that uses AmpList I think you'll find makeArguments returns an encoded string for the value of that argument and that the responder is happy to accept and parse that kind of string.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

to get android manifest meta data value by python xpath - python

Related

xml.etree giving an Invalid Token Error When Parsing PDF Metadata

pyspark error: "object has no attribute '_get_object_id' " when trying to read file

!!python/object:main.ClassName throwing exception

basic python syntax that i don't quite get

Serializing twisted.protocols.amp.AmpList for testing

Categories

Resources

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

to get android manifest meta data value by python xpath - python

Related

xml.etree giving an Invalid Token Error When Parsing PDF Metadata

pyspark error: "object has no attribute '_get_object_id' " when trying to read file

!!python/object:__main__.ClassName throwing exception

basic python syntax that i don't quite get

Serializing twisted.protocols.amp.AmpList for testing

Categories

Resources

!!python/object:main.ClassName throwing exception