How to get testfile to outputstream flowfile in nifi executescript processor - python

I am trying to write a text file that contains dictionary to outputStream callback in Nifi ExcuteScript processor . Here is my code, but it doesn't work as expected -
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
import os
import java.io.FileOutputStream
ofile=open("/home/nifi/data/outfile.txt", 'a')
with open("/home/nifi/data/validation.json",'r') as vfile:
validation = json.loads(vfile.read())
finaldict={}
dict1={}
class ModJSON(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
invalidrecord = json.loads(text)
for record in invalidrecord:
#print(finaldict)
finaldict.update(record)
# print(finaldict)
finaldict['errors'] = []
errordict = {}
# loop through each field from validation json
for field in validation:
for element, value in record.items():
if field == element:
dtype = type(value)
dict1 = validation[field]
for validtype in dict1:
if validtype['validationType'] == 'datatype':
if str(dtype) == validtype['check']:
pass
else:
errordict = {"element": field}
errordict.update({"errorCode": validtype['errorCode']})
finaldict["errors"].append(errordict)
if validtype['validationType'] == 'maxlength':
dlen = len(str(value))
if str(dlen) <= str(validtype['check']):
pass
else:
errordict = {"element": field}
errordict.update({"errorCode": validtype['errorCode']})
finaldict["errors"].append(errordict)
else:
pass
json.dump(finaldict, ofile)
outputStream.write(bytearray(ofile.encode('utf-8')))
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", flowFile.getAttribute('filename').split('.')[0]+'_translated.json')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
In above code, outputStream.write(bytearray(ofile.encode('utf-8'))) is not writing outfile.txt to flowfile and throwing:
NullPointerException at session.transfer(flowFile, REL_SUCCESS)
Can someone please suggest what is wrong with these code.
Related question is in below link, but couldn't find what i am looking for, as it does not send the text_file to flowfile output of nifi.
Access Json element and write to a text file using python ExecuteScript processor

Related

(Python) Delete file after send to API

I just got to know the world of programming and python was the first thing I learned.. This program already can extract data from .txt file and send it to API..
But the things is I don't know how to delete the file,after the data have been extracted and send to the API... Here is my coding...
from fileinput import close
import os
import requests
from datetime import datetime
import glob
import time
'''List'''
data_send_list = []
'''Path'''
path = "./file"
'''File List'''
file_name = []
URL = 'http://.......'
def main():
#Main
print("Main Def" "\n")
#ScanFile
data_send_list = scan_files(path)
#send_API
for json in data_send_list:
send_api(URL, json)
def read_text_file(file_path):
with open (file_path, 'r') as file:
data_dictionary={}
data = file.readlines()
...............
'''UPDATE THE DICTIONARY'''
data_dictionary.update([(...)(...)])
return data_dictionary
def scan_files(path):
list = []
os.chdir(path)
for file in glob.glob("*.txt"):
list.append(read_text_file(file))
return list
def send_api(url,json,):
requests_session = requests.session()
post_api = requests_session.post(url,data=json)
print("Sending API")
if(post_api.status_code >= 200 and post_api.status_code <300):
print("Successful. Status code: ",post_api.status_code)
print("\n")
#i hope that i can delete the file here
else:
print("Failed to send to API. Status code: ",post_api.status_code)
print("\n")
close()
return post_api.status_code
I was hoping that if the data can be sent to API... and give output "status code: 200" the data file will be deleted... while the data that is not sent, the file will remain
There would be a lot of better ways other than my answer.
import os
...
def send_api(url,json,path): # You need to add function parameter path to use at this function
requests_session = requests.session()
post_api = requests_session.post(url,data=json)
print("Sending API")
if(post_api.status_code >= 200 and post_api.status_code <300):
print("Successful. Status code: ",post_api.status_code)
print("\n")
os.remove(path) # use os.remove function to remove file
else:
print("Failed to send to API. Status code: ",post_api.status_code)
print("\n")
close()
return post_api.status_code

Generate XML files with Japanese characters using NiFi

I am converting a JSON payload to XML file using Python ExecuteScript processor in NiFi.
The JSON looks like this :
{
"Header": {
"Att1": 1,
"Att2": "value2",
"Att3": "1",
"Att4": "경기00자123"
}
}
The python script to convert this JSON to XML is as below :
import json
import xml.etree.ElementTree as ET
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class ModJSON(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
data = json.loads(text)
root = ET.Element("headerinfo")
entity = ET.SubElement(root, "headerfile")
ET.SubElement(entity, "Att1").text = str(data["Header"]["Att1"])
ET.SubElement(entity, "Att2").text = str(data["Header"]["Att2"])
ET.SubElement(entity, "Att3").text = str(data["Header"]["Att3"])
ET.SubElement(entity, "Att4").text = data["Header"]["Att4"].encode("utf8")
xmlNew = ET.tostring(root)
outputStream.write(bytearray(xmlNew))
flowFile = session.get()
if flowFile != None:
try :
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", 'headerfile.xml')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
except Exception as e:
flowFile = session.putAttribute(flowFile,'python_error', str(e))
session.transfer(flowFile, REL_FAILURE)
No matter how I try to encode the Att4 with Japanese characters, it looks like this in the resulting XML :
京都111を3
How can I change the code to fix this?
Tried a lot of different things but nothing seems to work.
seems there is an issue with byte-string in jython - they are automatically converted to str object with incorrect encoding.
however ElementTree has write function that could write to a file-like object and OutputStream (java object) actually implements write function - so, we could make ElementTree write directly to OutputStream
import json
import xml.etree.ElementTree as ET
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class ModJSON(StreamCallback):
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
data = json.loads(text)
root = ET.Element("headerinfo")
entity = ET.SubElement(root, "headerfile")
ET.SubElement(entity, "Att1").text = str(data["Header"]["Att1"])
ET.SubElement(entity, "Att2").text = str(data["Header"]["Att2"])
ET.SubElement(entity, "Att3").text = str(data["Header"]["Att3"])
ET.SubElement(entity, "Att4").text = data["Header"]["Att4"]
ET.ElementTree(root).write(outputStream, encoding='utf-8')
flowFile = session.get()
if flowFile != None:
try :
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", 'headerfile.xml')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
except Exception as e:
flowFile = session.putAttribute(flowFile,'python_error', str(e))
session.transfer(flowFile, REL_FAILURE)

Query mysql from json came from angularjs - Python

Hi Guys I already get the json data and this is my json data from angularjs, can someone help me on this?. I'm stuck only on it. Thank you.
{u'isChecked': {u'49871': False, u'49870': True, u'113634': False}}
then in my python I want to update mysql when id is found in the json data
right now here is my code for updating and i want to connect it to my json data
updatetable = """UPDATE table_x
SET value = '1'
"""
db.session.execute(updatetable)
db.session.commit()
Here is a solution
#!/usr/bin/env python
import platform
import sys
import urllib2
import simplejson as json
def update_table(id):
sqlUpdateStr = "UPDATE table_x SET value = '1' where id="+id
print "Executing update: " + sqlUpdateStr
def test_parse_json():
print "Loading json ..."
req = urllib2.Request("http://localhost/example.json")
opener = urllib2.build_opener()
f = opener.open(req)
# json.load() will deserialize your JSON document and return a Python object.
data = json.load(f)
print data['isChecked']
print ""
for id in data['isChecked']:
id_val = str2bool(data['isChecked'][id])
if id_val == True:
print "Found id for update: " + id
update_table(id)
else:
print "Ignoring record with id=" + id
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
def main():
test_parse_json()
return
if __name__ == '__main__':
main()
and the content of example.json is:
{
"isChecked":{
"49870":"true",
"49871":"false",
"113634":"false"
}
}

Problems using multipart_encode (poster library)

I am trying to upload a file using multipart_encode to realize the MIME process. However, I met the following error AttributeError: multipart_yielder instance has no attribute '__len__'. Below are is my approach, I really appreciate if anyone can give me some suggestions.
url = "https://pi-user-files.s3-external-1.amazonaws.com/"
post_data = {}
#data is a dict
post_data['AWSAccessKeyId']=(data['ticket']['AWSAccessKeyId'])
post_data['success_action_redirect']=(data['ticket']['success_action_redirect'])
post_data['acl']=(data['ticket']['acl'])
post_data['key']=(data['ticket']['key'])
post_data['signature']=(data['ticket']['signature'])
post_data['policy']=(data['ticket']['policy'])
post_data['Content-Type']=(data['ticket']['Content-Type'])
#I would like to upload a text file "new 2"
post_data['file']=open("new 2.txt", "rb")
datagen, headers = multipart_encode(post_data)
request2 = urllib2.Request(url, datagen, headers)
result = urllib2.urlopen(request2)
If you want to send a file you should wrap other parameters with a MultipartParam object, example code for creating a send file request:
from poster.encode import multipart_encode, MultipartParam
import urllib2
def postFileRequest(url, paramName, fileObj, additionalHeaders={}, additionalParams={}):
items = []
#wrap post parameters
for name, value in additionalParams.items():
items.append(MultipartParam(name, value))
#add file
items.append(MultipartParam.from_file(paramName, fileObj))
datagen, headers = multipart_encode(items)
#add headers
for item, value in additionalHeaders.iteritems():
headers[item] = value
return urllib2.Request(url, datagen, headers)
Also I think you should execute register_openers() once at the beginning. Some details you can find in docs
The problem is that in httplib.py, the generator is not detected as such and is treated instead like a string that holds the full data to be sent (and therefore it tries to find its length):
if hasattr(data,'read') and not isinstance(data, array): # generator
if self.debuglevel > 0: print "sendIng a read()able"
....
A solution is to make the generator act like a read()able:
class GeneratorToReadable():
def __init__(self, datagen):
self.generator = datagen
self._end = False
self.data = ''
def read(self, n_bytes):
while not self._end and len(self.data) < n_bytes:
try:
next_chunk = self.generator.next()
if next_chunk:
self.data += next_chunk
else:
self._end = True
except StopIteration:
self._end = True
result = self.data[0:n_bytes]
self.data = self.data[n_bytes:]
return result
and use like so:
datagen, headers = multipart_encode(post_data)
readable = GeneratorToReadable(datagen)
req = urllib2.Request(url, readable, headers)
result = urllib2.urlopen(req)

python getelementbyid from string

I have the following program, that is trying to upload a file (or files) to an image upload site, however I am struggling to find out how to parse the returned HTML to grab the direct link (contained in a <dd class="download"><input type="text" value="{hereisthelink}"></dd> ).
I have the code below:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pycurl
import urllib
import urlparse
import xml.dom.minidom
import StringIO
import sys
import gtk
import os
import imghdr
import locale
import gettext
try:
import pynotify
except:
print "Install pynotify. It's whoasome!"
APP="Uploadir Uploader"
DIR="locale"
locale.setlocale(locale.LC_ALL, '')
gettext.bindtextdomain(APP, DIR)
gettext.textdomain(APP)
_ = gettext.gettext
##STRINGS
uploading = _("Uploading image to Uploadir.")
oneimage = _("1 image has been successfully uploaded.")
multimages = _("images have been successfully uploaded.")
uploadfailed = _("Unable to upload to Uploadir.")
class Uploadir:
def __init__(self, args):
self.images = []
self.urls = []
self.broadcasts = []
self.username=""
self.password=""
if len(args) == 1:
return
else:
for file in args:
if file == args[0] or file == "":
continue
if file.startswith("-u"):
self.username = file.split("-u")[1]
#print self.username
continue
if file.startswith("-p"):
self.password = file.split("-p")[1]
#print self.password
continue
self.type = imghdr.what(file)
self.images.append(file)
for file in self.images:
self.upload(file)
self.setClipBoard()
self.broadcast(self.broadcasts)
def broadcast(self, l):
try:
str = '\n'.join(l)
n = pynotify.Notification(str)
n.set_urgency(pynotify.URGENCY_LOW)
n.show()
except:
for line in l:
print line
def upload(self, file):
#Try to login
cookie_file_name = "/tmp/uploadircookie"
if ( self.username!="" and self.password!=""):
print "Uploadir authentication in progress"
l=pycurl.Curl()
loginData = [ ("username",self.username),("password", self.password), ("login", "Login") ]
l.setopt(l.URL, "http://uploadir.com/user/login")
l.setopt(l.HTTPPOST, loginData)
l.setopt(l.USERAGENT,"User-Agent: Uploadir (Python Image Uploader)")
l.setopt(l.FOLLOWLOCATION,1)
l.setopt(l.COOKIEFILE,cookie_file_name)
l.setopt(l.COOKIEJAR,cookie_file_name)
l.setopt(l.HEADER,1)
loginDataReturnedBuffer = StringIO.StringIO()
l.setopt( l.WRITEFUNCTION, loginDataReturnedBuffer.write )
if l.perform():
self.broadcasts.append("Login failed. Please check connection.")
l.close()
return
loginDataReturned = loginDataReturnedBuffer.getvalue()
l.close()
#print loginDataReturned
if loginDataReturned.find("<li>Your supplied username or password is invalid.</li>")!=-1:
self.broadcasts.append("Uploadir authentication failed. Username/password invalid.")
return
else:
self.broadcasts.append("Uploadir authentication successful.")
#cookie = loginDataReturned.split("Set-Cookie: ")[1]
#cookie = cookie.split(";",0)
#print cookie
c = pycurl.Curl()
values = [
("file", (c.FORM_FILE, file)),
("terms", "1"),
("submit", "submit")
]
buf = StringIO.StringIO()
c.setopt(c.URL, "http://uploadir.com/file/upload")
c.setopt(c.HTTPPOST, values)
c.setopt(c.COOKIEFILE, cookie_file_name)
c.setopt(c.COOKIEJAR, cookie_file_name)
c.setopt(c.WRITEFUNCTION, buf.write)
if c.perform():
self.broadcasts.append(uploadfailed+" "+file+".")
c.close()
return
self.result = buf.getvalue()
#print self.result
c.close()
doc = urlparse.urlparse(self.result)
print doc
self.urls.append(doc.getElementsByTagName("download")[0].childNodes[0].nodeValue)
def setClipBoard(self):
c = gtk.Clipboard()
c.set_text('\n'.join(self.urls))
c.store()
if len(self.urls) == 1:
self.broadcasts.append(oneimage)
elif len(self.urls) != 0:
self.broadcasts.append(str(len(self.urls))+" "+multimages)
if __name__ == '__main__':
uploadir = Uploadir(sys.argv)
The code that deals with the HTML parsing is here:
doc = urlparse.urlparse(self.result)
self.urls.append(doc.getElementsByTagName("download")[0].childNodes[0].nodeValue)
The urlparse module has nothing to do with parsing HTML. All it does is break a URL up into bits: protocol, network address, path, etc. For example:
>>> urlparse.urlparse("http://www.stackoverflow.com/questions/4699888")
ParseResult(scheme='http', netloc='www.stackoverflow.com', path='/questions/4699888', params='', query='', fragment='')
For parsing HTML, try BeautifulSoup.

Categories