import sys
import os, time
import cognitive_face as CF
import global_variables as global_var
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Key = global_var.key
CF.Key.set(Key)
BASE_URL = global_var.BASE_URL # Replace with your regional Base URL
CF.BaseUrl.set(BASE_URL)
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
imgurl = imgurl[3:]
print("imageurl = {}".format(imgurl))
res = CF.face.detect(imgurl)
if len(res) != 1:
print("No face detected in image")
else:
res = CF.person.add_face(imgurl, global_var.personGroupId, person_id)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
A:\microsoft api FaceRecognition-Attendance-Marking-master>python add_person_faces.py user97
User.97.1.jpg
imageurl = A:/microsoft%20api%20FaceRecognition-Attendance-Marking-master/dataset/user97/User.97.1.jpg
Traceback (most recent call last):
File "add_person_faces.py", line 42, in <module>
res = CF.face.detect(imgurl)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\cognitive_face\face.py", line 41, in detect
'POST', url, headers=headers, params=params, json=json, data=data)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\cognitive_face\util.py", line 105, in request
error_msg.get('message'))
cognitive_face.util.CognitiveFaceException: Error when calling Cognitive Face API:
status_code: 400
code: InvalidURL
message: Invalid image URL.
Related
I am using this much-shared code to try and upload a file to Sharepoint using Shareplum, into the Shared Documents folder.
import requests
from shareplum import Office365
# Set Login Info
username = 'my.email#address.com'
password = 'myverifiedapppassword'
site_name = 'mysite'
base_path = 'https://xxxxxxxx.sharepoint.com'
doc_library = 'Shared%20Documents'
file_name = "hellotest.txt" #when your file in the same directory
# Obtain auth cookie
authcookie = Office365(base_path, username=username, password=password).GetCookies()
session = requests.Session()
session.cookies = authcookie
session.headers.update({'user-agent': 'python_bite/v1'})
session.headers.update({'accept': 'application/json;odata=verbose'})
session.headers.update({'X-RequestDigest': 'FormDigestValue'})
response = session.post(url=base_path + "/sites/" + site_name + "/_api/web/GetFolderByServerRelativeUrl('" + doc_library + "')/Files/add(url='a.txt',overwrite=true)",
data="")
session.headers.update({'X-RequestDigest': response.headers['X-RequestDigest']})
# Upload file
with open(file_name, 'rb') as file_input:
try:
response = session.post(
url=base_path + "/sites/" + site_name + f"/_api/web/GetFolderByServerRelativeUrl('" + doc_library + "')/Files/add(url='"
+ file_name + "',overwrite=true)",
data=file_input)
print("response: ", response.status_code) #it returns 200
if response.status_code == '200':
print("File uploaded successfully")
except Exception as err:
print("Something went wrong: " + str(err))
print('File Uploaded Successfully')
The problem is occuring wheen running the code....i am always getting a traceback and a keyerror as follows:
Traceback (most recent call last):
File "S:\upload.py", line 22, in
session.headers.update({'X-RequestDigest': response.headers['X-RequestDigest']})
File "C:\Python39\lib\site-packages\requests\structures.py", line 54, in getitem
return self._store[key.lower()][1]
KeyError: 'x-requestdigest'
Something to do with x-requestdigest isnt working properly, in line 22, but i cannot figure out what.
Any tips would be greatly appreciated!!!
thanks
I have tried the below code and it is working.
from shareplum import Office365
from shareplum import Site
from shareplum.site import Version
#Logging info
server_url = "https://example.sharepoint.com/"
site_url = server_url + "sites/my_site_name"
Username = 'myusername'
Password = 'mypassword'
Sharepoint_folder = 'Shared Documents'
fileName = 'myfilename'
def file_upload_to_sharepoint(**context):
authcookie = Office365(server_url, username = Username, password=Password).GetCookies()
site = Site(site_url, version=Version.v365, authcookie=authcookie)
folder = site.Folder(Sharepoint_folder)
with open(fileName, mode='rb') as file:
fileContent = file.read()
folder.upload_file(fileContent, "filename.bin")
file_upload_to_sharepoint()
Let me know if this works for you as well.
What I am trying to do is perform a search on Splunk's API using python, I am able to get a session key but thats it. I'm new to both python and splunk so im a bit out-of-depth and any help would be really appreciated.
The error:
Traceback (most recent call last):
File "splunkAPI.py", line 31, in <module>
sid = minidom.parseString(r.text).getElementsByTagName('sid')[0].firstChild.nodeValue
IndexError: list index out of range
python:
import time # need for sleep
from xml.dom import minidom
import json, pprint
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
base_url = 'https://___________:8089'
username = '______'
password = '______'
search_query = "____________"
#-------------------------get session token------------------------
r = requests.get(base_url+"/servicesNS/admin/search/auth/login",
data={'username':username,'password':password}, verify=False)
session_key = minidom.parseString(r.text).getElementsByTagName('sessionKey')[0].firstChild.nodeValue
print ("Session Key:", session_key)
#-------------------- perform search -------------------------
r = requests.post(base_url + '/services/search/jobs/', data=search_query,
headers = { 'Authorization': ('Splunk %s' %session_key)},
verify = False)
sid = minidom.parseString(r.text).getElementsByTagName('sid')[0].firstChild.nodeValue
done = False
while not done:
r = requests.get(base_url + '/services/search/jobs/' + sid,
headers = { 'Authorization': ('Splunk %s' %session_key)},
verify = False)
response = minidom.parseString(r.text)
for node in response.getElementsByTagName("s:key"):
if node.hasAttribute("name") and node.getAttribute("name") == "dispatchState":
dispatchState = node.firstChild.nodeValue
print ("Search Status: ", dispatchState)
if dispatchState == "DONE":
done = True
else:
time.sleep(1)
r = requests.get(base_url + '/services/search/jobs/' + sid + '/results/',
headers = { 'Authorization': ('Splunk %s' %session_key)},
data={'output_mode': 'json'},
verify = False)
pprint.pprint(json.loads(r.text))
Hmm... that code looks awfully familiar :P Unfortunately, error checking wasn't that important when I wrote it.
The issue you see occurs if the search_query is not defined properly. It must start with search=. Also note that you need to include an initial search command if doing a standard Splunk search,
For example, search=search index=* will work, search=index=* will not work.
If you need to include quotes in your search string, I suggest you use something like the following format.
search_query = """search=search index=* "a search expression" | stats count"""
Tried this but did not give needed result not sure what is missing
import urllib
import httplib2 #import library
import json
import pprint
import time
import re
from xml.dom import minidom
searchquery = 'search index="movable_in" sourcetype="movable:in:assets" | stats avg(exposure_score)'
myhttp = httplib2.Http()
baseurl = 'https://xxxx.splunkxxx.com:8089'
usernamesp = 'xxxx'
passwordsp = 'xxxx'
def get_splunk_result(searchquery):
# Step 1: Get a session key
servercontent = myhttp.request(f'{baseurl}/services/auth/login', 'POST', headers={},
body=urllib.parse.urlencode({'username': usernamesp, 'password': passwordsp}))[1]
sessionkey = minidom.parseString(servercontent).getElementsByTagName('sessionKey')[0].childNodes[0].nodeValue
# print ("====>sessionkey: %s <====" % sessionkey)
sid = ''
# ------------------
if not searchquery.startswith('search'):
searchquery = f'search {searchquery}'
# Step 2: Get a sid with the search query
i = 0
while True:
time.sleep(1)
try:
searchjob = myhttp.request(f'{baseurl}/services/search/jobs', 'POST',
headers={F'Authorization': F'Splunk %s' % sessionkey},
body=urllib.parse.urlencode({'search': searchquery}))[1]
sid = minidom.parseString(searchjob).getElementsByTagName('sid')[0].childNodes[0].nodeValue
break
except:
i = i + 1
# print(i)
if (i > 30): break
# print("====>SID: %s <====" % sid)
# Step 3: Get search status
myhttp.add_credentials(usernamesp, passwordsp)
servicessearchstatusstr = '/services/search/jobs/%s/' % sid
isnotdone = True
while isnotdone:
searchstatus = myhttp.request(f'{baseurl}{servicessearchstatusstr}', 'GET')[1]
isdonestatus = re.compile('isDone">(0|1)')
strstatus = str(searchstatus)
isdonestatus = isdonestatus.search(strstatus).groups()[0]
if (isdonestatus == '1'):
isnotdone = False
# Step 4: Get the search result
services_search_results_str = '/services/search/jobs/%s/results?output_mode=json_rows&count=0' % sid
searchresults = myhttp.request(f'{baseurl}{services_search_results_str}', 'GET')[1]
searchresults = json.loads(searchresults)
# searchresults = splunk_result(searchresults)
return searchresults
output = get_splunk_result(searchquery)
print(output)
this is the error I am getting from the logs:
[ERROR] KeyError: 'Text'
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 51, in lambda_handler
pdfText += item["Text"] + '\n'
I am trying to run a form analysis via textract to extract data from the form and save it into S3 as a .csv file.
My code is below:
import boto3
import os
def getJobResults(jobId):
pages = []
textract = boto3.client('textract')
response = textract.get_document_analysis(JobId=jobId)
pages.append(response)
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
while(nextToken):
response = textract.get_document_analysis(JobId=jobId, NextToken=nextToken)
pages.append(response)
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
return pages
def lambda_handler(event, context):
notificationMessage = json.loads(json.dumps(event))['Records'][0]['Sns']['Message']
pdfTextExtractionStatus = json.loads(notificationMessage)['Status']
pdfTextExtractionJobTag = json.loads(notificationMessage)['JobTag']
pdfTextExtractionJobId = json.loads(notificationMessage)['JobId']
pdfTextExtractionDocumentLocation = json.loads(notificationMessage)['DocumentLocation']
pdfTextExtractionS3ObjectName = json.loads(json.dumps(pdfTextExtractionDocumentLocation))['S3ObjectName']
pdfTextExtractionS3Bucket = json.loads(json.dumps(pdfTextExtractionDocumentLocation))['S3Bucket']
print(pdfTextExtractionJobTag + ' : ' + pdfTextExtractionStatus)
pdfText = ''
if(pdfTextExtractionStatus == 'SUCCEEDED'):
response = getJobResults(pdfTextExtractionJobId)
for resultPage in response:
for item in resultPage["Blocks"]:
if item["BlockType"] == "KEY_VALUE_SET" :
EntityTypes : ['KEY'|'VALUE']
pdfText += item["Text"] + '\n'
s3 = boto3.client('s3')
outputTextFileName = os.path.splitext(pdfTextExtractionS3ObjectName)[0] + '.csv'
s3.put_object(Body=pdfText, Bucket=pdfTextExtractionS3Bucket, Key=outputTextFileName)
The documentation I am following is : https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/textract.html#Textract.Client.get_document_analysis
Any advice would be greatly appreciated! Thank you
When I run the following code, I keep getting this error:
Traceback (most recent call last):
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 94, in <module>
main()
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 71, in main
for final_url in pool.imap(handle_listing, listings):
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenpool.py", line 232, in next
val = self.waiters.get().wait()
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 166, in wait
return self._exit_event.wait()
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\event.py", line 120, in wait
current.throw(*self._exc)
File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 192, in main
result = function(*args, **kwargs)
File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 35, in handle_listing
title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: 'NoneType' object has no attribute 'groups'
What is wrong?
It has something to do with the Title match but I don't know how to fix it!
If you could help me I would really appreciate it!
Thanks!
from gzip import GzipFile
from cStringIO import StringIO
import re
import webbrowser
import time
from difflib import SequenceMatcher
import os
import sys
from BeautifulSoup import BeautifulSoup
import eventlet
from eventlet.green import urllib2
import urllib2
import urllib
def download(url):
print "Downloading:", url
s = urllib2.urlopen(url).read()
if s[:2] == '\x1f\x8b':
ifh = GzipFile(mode='rb', fileobj=StringIO(s))
s = ifh.read()
print "Downloaded: ", url
return s
def replace_chars(text, replacements):
return ''.join(replacements.get(x,x) for x in text)
def handle_listing(listing_url):
listing_document = BeautifulSoup(download(listing_url))
# ignore pages that link to yellowpages
if not listing_document.find("a", href=re.compile(re.escape("http://www.yellowpages.com/") + ".*")):
listing_title = listing_document.title.text
reps = {' ':'-', ',':'', '\'':'', '[':'', ']':''}
title, = TITLE_MATCH.match(listing_title).groups()
address, = ADDRESS_MATCH.match(listing_title).groups()
yellow_page_url = "http://www.yellowpages.com/%s/%s?order=distance" % (
replace_chars(address, reps),
replace_chars(title, reps),
)
yellow_page = BeautifulSoup(download(yellow_page_url))
page_url = yellow_page.find("h3", {"class" : "business-name fn org"})
if page_url:
page_url = page_url.a["href"]
business_name = title[:title.index(",")]
page = BeautifulSoup(download(page_url))
yellow_page_address = page.find("span", {"class" : "street-address"})
if yellow_page_address:
if SequenceMatcher(None, address, yellow_page_address.text).ratio() >= 0.5:
pid, = re.search(r'p(\d{5,20})\.jsp', listing_url).groups(0)
page_escaped = replace_chars(page_url, {':':'%3A', '/':'%2F', '?':'%3F', '=':'%3D'})
final_url = "http://www.locationary.com/access/proxy.jsp?ACTION_TOKEN=proxy_jsp$JspView$SaveAction&inPlaceID=%s&xxx_c_1_f_987=%s" % (
pid, page_escaped)
return final_url
def main():
pool = eventlet.GreenPool()
listings_document = BeautifulSoup(download(START_URL))
listings = listings_document.findAll("a", href = LOCATION_LISTING)
listings = [listing['href'] for listing in listings]
for final_url in pool.imap(handle_listing, listings):
print final_url
if str(final_url) is not None:
url = str(final_url)
req = urllib2.Request(url)
response = urllib2.urlopen(req)
page = response.read()
time.sleep(2)
for a in range(2,3):
START_URL = 'http://www.locationary.com/place/en/US/New_Jersey/Randolph-page' + str(a) + '/?ACTION_TOKEN=NumericAction'
TITLE_MATCH = re.compile(r'(.*) \(\d{1,10}.{1,100}\)$')
ADDRESS_MATCH = re.compile(r'.{1,100}\((.*), .{4,14}, United States\)$')
LOCATION_LISTING = re.compile(r'http://www\.locationary\.com/place/en/US/.{1,50}/.{1,50}/.{1,100}\.jsp')
if __name__ == '__main__':
main()
Quoting from your error:
title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: 'NoneType' object has no attribute 'groups'
TITLE_MATCH.match(listing_title) returns None, so you can't call .groups().
When a re .match does not find anything to match, it returns None. Since you cannot call .groups() on None, you have to check for a match first. To do that:
Change this:
title, = TITLE_MATCH.match(listing_title).groups()
address, = ADDRESS_MATCH.match(listing_title).groups()
To this:
titleMatch = TITLE_MATCH.match(listing_title)
if titleMatch:
title, = titleMatch.groups()
else:
# handle it
addressMatch = ADDRESS_MATCH.match(listing_title)
if addressMatch:
address, = addressMatch.groups()
else:
# handle it
I have the following program, that is trying to upload a file (or files) to an image upload site, however I am struggling to find out how to parse the returned HTML to grab the direct link (contained in a <dd class="download"><input type="text" value="{hereisthelink}"></dd> ).
I have the code below:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pycurl
import urllib
import urlparse
import xml.dom.minidom
import StringIO
import sys
import gtk
import os
import imghdr
import locale
import gettext
try:
import pynotify
except:
print "Install pynotify. It's whoasome!"
APP="Uploadir Uploader"
DIR="locale"
locale.setlocale(locale.LC_ALL, '')
gettext.bindtextdomain(APP, DIR)
gettext.textdomain(APP)
_ = gettext.gettext
##STRINGS
uploading = _("Uploading image to Uploadir.")
oneimage = _("1 image has been successfully uploaded.")
multimages = _("images have been successfully uploaded.")
uploadfailed = _("Unable to upload to Uploadir.")
class Uploadir:
def __init__(self, args):
self.images = []
self.urls = []
self.broadcasts = []
self.username=""
self.password=""
if len(args) == 1:
return
else:
for file in args:
if file == args[0] or file == "":
continue
if file.startswith("-u"):
self.username = file.split("-u")[1]
#print self.username
continue
if file.startswith("-p"):
self.password = file.split("-p")[1]
#print self.password
continue
self.type = imghdr.what(file)
self.images.append(file)
for file in self.images:
self.upload(file)
self.setClipBoard()
self.broadcast(self.broadcasts)
def broadcast(self, l):
try:
str = '\n'.join(l)
n = pynotify.Notification(str)
n.set_urgency(pynotify.URGENCY_LOW)
n.show()
except:
for line in l:
print line
def upload(self, file):
#Try to login
cookie_file_name = "/tmp/uploadircookie"
if ( self.username!="" and self.password!=""):
print "Uploadir authentication in progress"
l=pycurl.Curl()
loginData = [ ("username",self.username),("password", self.password), ("login", "Login") ]
l.setopt(l.URL, "http://uploadir.com/user/login")
l.setopt(l.HTTPPOST, loginData)
l.setopt(l.USERAGENT,"User-Agent: Uploadir (Python Image Uploader)")
l.setopt(l.FOLLOWLOCATION,1)
l.setopt(l.COOKIEFILE,cookie_file_name)
l.setopt(l.COOKIEJAR,cookie_file_name)
l.setopt(l.HEADER,1)
loginDataReturnedBuffer = StringIO.StringIO()
l.setopt( l.WRITEFUNCTION, loginDataReturnedBuffer.write )
if l.perform():
self.broadcasts.append("Login failed. Please check connection.")
l.close()
return
loginDataReturned = loginDataReturnedBuffer.getvalue()
l.close()
#print loginDataReturned
if loginDataReturned.find("<li>Your supplied username or password is invalid.</li>")!=-1:
self.broadcasts.append("Uploadir authentication failed. Username/password invalid.")
return
else:
self.broadcasts.append("Uploadir authentication successful.")
#cookie = loginDataReturned.split("Set-Cookie: ")[1]
#cookie = cookie.split(";",0)
#print cookie
c = pycurl.Curl()
values = [
("file", (c.FORM_FILE, file)),
("terms", "1"),
("submit", "submit")
]
buf = StringIO.StringIO()
c.setopt(c.URL, "http://uploadir.com/file/upload")
c.setopt(c.HTTPPOST, values)
c.setopt(c.COOKIEFILE, cookie_file_name)
c.setopt(c.COOKIEJAR, cookie_file_name)
c.setopt(c.WRITEFUNCTION, buf.write)
if c.perform():
self.broadcasts.append(uploadfailed+" "+file+".")
c.close()
return
self.result = buf.getvalue()
#print self.result
c.close()
doc = urlparse.urlparse(self.result)
print doc
self.urls.append(doc.getElementsByTagName("download")[0].childNodes[0].nodeValue)
def setClipBoard(self):
c = gtk.Clipboard()
c.set_text('\n'.join(self.urls))
c.store()
if len(self.urls) == 1:
self.broadcasts.append(oneimage)
elif len(self.urls) != 0:
self.broadcasts.append(str(len(self.urls))+" "+multimages)
if __name__ == '__main__':
uploadir = Uploadir(sys.argv)
The code that deals with the HTML parsing is here:
doc = urlparse.urlparse(self.result)
self.urls.append(doc.getElementsByTagName("download")[0].childNodes[0].nodeValue)
The urlparse module has nothing to do with parsing HTML. All it does is break a URL up into bits: protocol, network address, path, etc. For example:
>>> urlparse.urlparse("http://www.stackoverflow.com/questions/4699888")
ParseResult(scheme='http', netloc='www.stackoverflow.com', path='/questions/4699888', params='', query='', fragment='')
For parsing HTML, try BeautifulSoup.