Generate XML files with Japanese characters using NiFi - python

I am converting a JSON payload to XML file using Python ExecuteScript processor in NiFi.
The JSON looks like this :
{
"Header": {
"Att1": 1,
"Att2": "value2",
"Att3": "1",
"Att4": "경기00자123"
}
}
The python script to convert this JSON to XML is as below :
import json
import xml.etree.ElementTree as ET
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class ModJSON(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
data = json.loads(text)
root = ET.Element("headerinfo")
entity = ET.SubElement(root, "headerfile")
ET.SubElement(entity, "Att1").text = str(data["Header"]["Att1"])
ET.SubElement(entity, "Att2").text = str(data["Header"]["Att2"])
ET.SubElement(entity, "Att3").text = str(data["Header"]["Att3"])
ET.SubElement(entity, "Att4").text = data["Header"]["Att4"].encode("utf8")
xmlNew = ET.tostring(root)
outputStream.write(bytearray(xmlNew))
flowFile = session.get()
if flowFile != None:
try :
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", 'headerfile.xml')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
except Exception as e:
flowFile = session.putAttribute(flowFile,'python_error', str(e))
session.transfer(flowFile, REL_FAILURE)
No matter how I try to encode the Att4 with Japanese characters, it looks like this in the resulting XML :
京都111を3
How can I change the code to fix this?
Tried a lot of different things but nothing seems to work.

seems there is an issue with byte-string in jython - they are automatically converted to str object with incorrect encoding.
however ElementTree has write function that could write to a file-like object and OutputStream (java object) actually implements write function - so, we could make ElementTree write directly to OutputStream
import json
import xml.etree.ElementTree as ET
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class ModJSON(StreamCallback):
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
data = json.loads(text)
root = ET.Element("headerinfo")
entity = ET.SubElement(root, "headerfile")
ET.SubElement(entity, "Att1").text = str(data["Header"]["Att1"])
ET.SubElement(entity, "Att2").text = str(data["Header"]["Att2"])
ET.SubElement(entity, "Att3").text = str(data["Header"]["Att3"])
ET.SubElement(entity, "Att4").text = data["Header"]["Att4"]
ET.ElementTree(root).write(outputStream, encoding='utf-8')
flowFile = session.get()
if flowFile != None:
try :
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", 'headerfile.xml')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
except Exception as e:
flowFile = session.putAttribute(flowFile,'python_error', str(e))
session.transfer(flowFile, REL_FAILURE)

Related

How to get testfile to outputstream flowfile in nifi executescript processor

I am trying to write a text file that contains dictionary to outputStream callback in Nifi ExcuteScript processor . Here is my code, but it doesn't work as expected -
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
import os
import java.io.FileOutputStream
ofile=open("/home/nifi/data/outfile.txt", 'a')
with open("/home/nifi/data/validation.json",'r') as vfile:
validation = json.loads(vfile.read())
finaldict={}
dict1={}
class ModJSON(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
invalidrecord = json.loads(text)
for record in invalidrecord:
#print(finaldict)
finaldict.update(record)
# print(finaldict)
finaldict['errors'] = []
errordict = {}
# loop through each field from validation json
for field in validation:
for element, value in record.items():
if field == element:
dtype = type(value)
dict1 = validation[field]
for validtype in dict1:
if validtype['validationType'] == 'datatype':
if str(dtype) == validtype['check']:
pass
else:
errordict = {"element": field}
errordict.update({"errorCode": validtype['errorCode']})
finaldict["errors"].append(errordict)
if validtype['validationType'] == 'maxlength':
dlen = len(str(value))
if str(dlen) <= str(validtype['check']):
pass
else:
errordict = {"element": field}
errordict.update({"errorCode": validtype['errorCode']})
finaldict["errors"].append(errordict)
else:
pass
json.dump(finaldict, ofile)
outputStream.write(bytearray(ofile.encode('utf-8')))
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", flowFile.getAttribute('filename').split('.')[0]+'_translated.json')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
In above code, outputStream.write(bytearray(ofile.encode('utf-8'))) is not writing outfile.txt to flowfile and throwing:
NullPointerException at session.transfer(flowFile, REL_SUCCESS)
Can someone please suggest what is wrong with these code.
Related question is in below link, but couldn't find what i am looking for, as it does not send the text_file to flowfile output of nifi.
Access Json element and write to a text file using python ExecuteScript processor

getting "No JSON object could be decoded" for json URL

The api returning json on brower but when parsing it on python I am getting this exception: No JSON object could be decoded. I have used both json.load() and json.loads() but failed.
Here is that code.
def handler_timeout(self):
try:
data = json.load(
urlopen(
'https://www.zebapi.com/api/v1/market/ticker/btc/inr'
)
)
buy_price = data['buy']
sell_price = data['sell']
status_message = "Buy: ₹ " + "{:,}".format(buy_price) + " Sell: ₹ " + "{:,}".format(sell_price)
self.ind.set_label(status_message, "")
except Exception, e:
print str(e)
self.ind.set_label("!", "")
return True
Here is the output for urlopen(URL):
<addinfourl at 140336849031752 whose fp = <socket._fileobject object at 0x7fa2bb6f1cd0>>
Here is the output for urlopen(URL).read() :
��`I�%&/m�{J�J��t�`$ؐ#�������iG#)�*��eVe]f#�흼��{����{����;�N'���?\fdl��J�ɞ!���?~|?"~�o���G��~��=J�vv��;#�x���}��e���?=�N�u�/�h��ًWɧ�U�^���Ã���;���}�'���Q��ct
The content of the url is gzip-encoded.
>>> u = urllib.urlopen('https://www.zebapi.com/api/v1/market/ticker/btc/inr')
>>> info = u.info()
>>> info['Content-Encoding']
'gzip'
Decompress the content.
import gzip
import io
import json
import urllib
u = urllib.urlopen('https://www.zebapi.com/api/v1/market/ticker/btc/inr')
with io.BytesIO(u.read()) as f:
gz = gzip.GzipFile(fileobj=f)
print json.load(gz)
or use requests which decode gzip automatically:
import requests
print requests.get('https://www.zebapi.com/api/v1/market/ticker/btc/inr').json()

Query mysql from json came from angularjs - Python

Hi Guys I already get the json data and this is my json data from angularjs, can someone help me on this?. I'm stuck only on it. Thank you.
{u'isChecked': {u'49871': False, u'49870': True, u'113634': False}}
then in my python I want to update mysql when id is found in the json data
right now here is my code for updating and i want to connect it to my json data
updatetable = """UPDATE table_x
SET value = '1'
"""
db.session.execute(updatetable)
db.session.commit()
Here is a solution
#!/usr/bin/env python
import platform
import sys
import urllib2
import simplejson as json
def update_table(id):
sqlUpdateStr = "UPDATE table_x SET value = '1' where id="+id
print "Executing update: " + sqlUpdateStr
def test_parse_json():
print "Loading json ..."
req = urllib2.Request("http://localhost/example.json")
opener = urllib2.build_opener()
f = opener.open(req)
# json.load() will deserialize your JSON document and return a Python object.
data = json.load(f)
print data['isChecked']
print ""
for id in data['isChecked']:
id_val = str2bool(data['isChecked'][id])
if id_val == True:
print "Found id for update: " + id
update_table(id)
else:
print "Ignoring record with id=" + id
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
def main():
test_parse_json()
return
if __name__ == '__main__':
main()
and the content of example.json is:
{
"isChecked":{
"49870":"true",
"49871":"false",
"113634":"false"
}
}

Loop through to change parameter in Python 2.7

So I have this code that is creating an output in Excel.
What I want to do now is get the parameters (lid) in payload to loop through a list of other ID's
This list is stored in a txt file.
can anyone modify my code to show me how to do that please?
The text file has values
1654,
3457,
4327,
1234
(can also hard code these somewhere in the script if it is easier)
from __future__ import print_function
import sys
import csv
import collections
import itertools
try:
import requests
from requests import exceptions
import base64
import json
except ImportError as e:
import requests
from requests import exceptions
import base64
import json
print ("Import Error: %s" % e)
API_TOKEN = u''
b64token = base64.b64encode(bytes(API_TOKEN))
REST_BASE_URL = u'https://visdasa.dsds.com/rest/'
# API URL request examples (choose one)
REST_URL = u'rawdata/'
FULL_URL = REST_BASE_URL + REST_URL
def retrieve_data(api_url):
try:
#connect to the API and retrieve data
bauth_header = {'Authorization': 'Basic '+b64token.decode('UTF-8')}
payload = {'start': '2014-08-01T00:00:01', 'stop': '2014- 8-01T23:59:59','category': 'ots','lid': '9263'}
response = requests.get(api_url, headers=bauth_header, params=payload)
# check the api response
if response.status_code == requests.codes.ok:
# Convert from json data
json_data = json.loads(response.text)
Header_String = "ID", "Site Name", "Network ID", "Network Lablel", "Company Branch ID", "Comapany Label","Count", "timestamp", "ots_duration", "notsure1", "notsure2"
for location_row in json_data["data"]["locations"]:
Location_string = (location_row["id"], location_row["label"], location_row["site"]["network"]["id"],location_row["site"]["network"]["label"],
location_row["site"]["id"], location_row["site"]["label"])
try:
with open('C:\\Users\\teddy\\Desktop\\party\\test.csv', 'w') as wFile:
writer = csv.writer(wFile, delimiter=',')
writer.write(Header_string)
for row in json_data["data"]["raw_data"]:
writer.writerow(row)
except IOError as e:
logger.error("I/O error({0}): {1}".format(e.errno, e.strerror))
print( "I/O error({0}): {1}".format(e.errno, e.strerror))
else:
json_data = json.loads(response.text)
# If not successful api call the throw an error
raise requests.RequestException("Error with the api. Status code : %i \n Json response: %s"
% (response.status_code, json_data))
except (requests.exceptions.ProxyError, requests.RequestException) as e:
print (e)
def main():
#retrieve_data(FULL_URL, PROXY_SETTINGS)
retrieve_data(FULL_URL)
sys.exit()
if __name__ == '__main__':
main()
Why not just pass all the lid values as a parameter to your retrieve_data function.
def retrieve_data(api_url):
would become
def retrieve_data(api_url, lid_value):
You would remove the hardcoded lid section of your payload so the payload would look like this
payload = {'start': '2014-08-01T00:00:01', 'stop': '2014- 8-01T23:59:59','category': 'ots'}
Then on the next line you can add
payload['lid'] = lid_value
In your main function you could then loop through the values in the text file. Here is a simple loop with a list.
def main():
lid_values = ['1654', '3457', '4327', '1234']
for lid in lid_values:
retrieve_data(FULL_URL, lid)
sys.exit()

windows chrome refresh tab 0(or current tab) via command line

I'm trying to do it with python, webbrowser module. But it does not have Chromium-specific functions. Is there another way? probably with a batch script?
I use this myself: (I wrote is quickly as it was only for personal use). With a lot of clean up you might be able to get what you want. See https://developers.google.com/chrome-developer-tools/docs/remote-debugging
import urllib2
import urllib
import os
import subprocess
import json
from websocket import create_connection
def refresh_page(url):
data = json.load(urllib2.urlopen('http://localhost:9222/json'))
found_page = False
for page in data:
if page['url'].lower() == url.lower():
found_page = True
websocketURL = page['webSocketDebuggerUrl']
ws = create_connection(websocketURL)
obj = { "id": 0,
"method": "Page.reload",
"params":
{
"ignoreCache": True,
"scriptToEvaluateOnLoad": ""
}
}
dev_request = json.dumps(obj)
ws.send(dev_request)
result = ws.recv()
ws.close()
if not found_page:
raise Exception("No pageFound")
def open_or_refresh(file_name):
file_name = "".join ( [f if f in r'\/:*?"<>|' else urllib.quote(f) for f in file_name] )
file_name = 'file:///' + file_name.replace('\\', '/')
file_name = file_name.encode('ascii', 'ignore')
try:
refresh_page(file_name)
except:
cmd = (r'"%(LOCALAPPDATA)s\Google\Chrome\Application\chrome.exe"'%os.environ
+ r' --remote-debugging-port=9222 "%s"' % file_name)
subprocess.Popen(cmd)
open_or_refresh(r"C:\test.html")
open_or_refresh(r"C:\test.html")

Categories