Pyuno indexing issue that I would like an explanation for - python

The following python libreoffice Uno macro works but only with the try..except statement.
The macro allows you to select text in a writer document and send it to a search engine in your default browser.
The issue, is that if you select a single piece of text,oSelected.getByIndex(0) is populated but if you select multiple pieces of text oSelected.getByIndex(0) is not populated. In this case the data starts at oSelected.getByIndex(1) and oSelected.getByIndex(0) is left blank.
I have no idea why this should be and would love to know if anyone can explain this strange behaviour.
#!/usr/bin/python
import os
import webbrowser
from configobj import ConfigObj
from com.sun.star.awt.MessageBoxButtons import BUTTONS_OK, BUTTONS_OK_CANCEL, BUTTONS_YES_NO, BUTTONS_YES_NO_CANCEL, BUTTONS_RETRY_CANCEL, BUTTONS_ABORT_IGNORE_RETRY
from com.sun.star.awt.MessageBoxButtons import DEFAULT_BUTTON_OK, DEFAULT_BUTTON_CANCEL, DEFAULT_BUTTON_RETRY, DEFAULT_BUTTON_YES, DEFAULT_BUTTON_NO, DEFAULT_BUTTON_IGNORE
from com.sun.star.awt.MessageBoxType import MESSAGEBOX, INFOBOX, WARNINGBOX, ERRORBOX, QUERYBOX
def fs3Browser(*args):
#get the doc from the scripting context which is made available to all scripts
desktop = XSCRIPTCONTEXT.getDesktop()
model = desktop.getCurrentComponent()
doc = XSCRIPTCONTEXT.getDocument()
parentwindow = doc.CurrentController.Frame.ContainerWindow
oSelected = model.getCurrentSelection()
oText = ""
try:
for i in range(0,4,1):
print ("Index No ", str(i))
try:
oSel = oSelected.getByIndex(i)
print (str(i), oSel.getString())
oText += oSel.getString()+" "
except:
break
except AttributeError:
mess = "Do not select text from more than one table cell"
heading = "Processing error"
MessageBox(parentwindow, mess, heading, INFOBOX, BUTTONS_OK)
return
lookup = str(oText)
special_c =str.maketrans("","",'!|##"$~%&/()=?+*][}{-;:,.<>')
lookup = lookup.translate(special_c)
lookup = lookup.strip()
configuration_dir = os.environ["HOME"]+"/fs3"
config_filename = configuration_dir + "/fs3.cfg"
if os.access(config_filename, os.R_OK):
cfg = ConfigObj(config_filename)
#define search engine from the configuration file
try:
searchengine = cfg["control"]["ENGINE"]
except:
searchengine = "https://duckduckgo.com"
if 'duck' in searchengine:
webbrowser.open_new('https://www.duckduckgo.com//?q='+lookup+'&kj=%23FFD700 &k7=%23C9C4FF &ia=meanings')
else:
webbrowser.open_new('https://www.google.com/search?/&q='+lookup)
return None
def MessageBox(ParentWindow, MsgText, MsgTitle, MsgType, MsgButtons):
ctx = XSCRIPTCONTEXT.getComponentContext()
sm = ctx.ServiceManager
si = sm.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
mBox = si.createMessageBox(ParentWindow, MsgType, MsgButtons, MsgTitle, MsgText)
mBox.execute()

Your code is missing something. This works without needing an extra try/except clause:
selected_strings = []
try:
for i in range(oSelected.getCount()):
oSel = oSelected.getByIndex(i)
if oSel.getString():
selected_strings.append(oSel.getString())
except AttributeError:
# handle exception...
return
result = " ".join(selected_strings)
To answer your question about the "strange behaviour," it seems pretty straightforward to me. If the 0th element is empty, then there are multiple selections which may need to be handled differently.

Related

Optimizing selenium code

So I wrote some code to grab data about classes at a college to build an interactive scheduler. Here is the code I have to get data:
from selenium import webdriver
import os
import pwd
import shlex
import re
import time
usr = pwd.getpwuid(os.getuid()).pw_name
Path = ('/Users/%s/Downloads/chromedriver') %usr # Have chromedriver dowloaded
# Create a new instance of the Chrome driver
options = webdriver.ChromeOptions()
options.binary_location = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
options.add_argument('headless') # Headless so no window is opened
options.add_argument('window-size=1200x600')
driver = webdriver.Chrome(Path, chrome_options=options)
driver.get('https://web.stevens.edu/scheduler/core/2017F/2017F.xml') # Go to database
classes = {}
def Database(AllSelectedCourseInfo):
ClassDict = {}
for item in AllSelectedCourseInfo: # Go through list of class info
try:
thing = item.split("=") # Split string by = to get subject name and value
name = thing[0]
if any(char.isdigit() for char in thing[1]): # Get rid of annoying Z at the end of numbers
thing[1] = re.sub("[Z]","",thing[1])
value = thing[1]
if value: # If subject has a value, store it
ClassDict[str(name)] = str(value) # Store value in a dictionary with the subject as the key
except:
pass
classes[str(ClassDict["Section"])] = ClassDict # Add to dictionary
def makeDatabase(section):
if "Title" in driver.find_element_by_xpath("//*[text()='%s']"%section).find_element_by_xpath("..").text:
classSection = driver.find_elements_by_xpath("//*[text()='%s']"%section) # If class name given find class
for i in range(0, len(classSection)):
AllSelectedCourseInfo = shlex.split(classSection[i].find_element_by_xpath(".." + "/.."*4).text.replace("/>", "").replace(">", "")) # sort into a list grouping string in quotes and getting rid of unnecessary symbols
Database(AllSelectedCourseInfo)
else:
classSection = driver.find_element_by_xpath("//*[text()='%s']"%section) # If class section give, find class
AllSelectedCourseInfo = shlex.split(classSection.find_element_by_xpath(".." + "/.."*3).text.replace("/>", "").replace(">", "")) # sort into a list grouping string in quotes and getting rid of unnecessary symbols
Database(AllSelectedCourseInfo)
def printDic():
for key in classes:
print "\n-------------%s------------" %key
for classkey in classes[key]:
print "%s : %s" %(classkey, classes[key][classkey])
start = time.time()
makeDatabase("Differential Calculus")
makeDatabase("MA 124B")
printDic()
end = time.time()
print end - start
driver.quit()
It takes about 20 seconds for me to pull data from one class and one class section, if I am to make this practical it is going to need at least 7 classes, and that would take over a minute just to create the dictionaries. Does anyone know of a way to make this run any faster?
I tried to integrate lxml and requests into my code but it just didn't have what I was looking for. After a few days of trying to use lxml to accomplish this with no avail I decided to try beautifulsoup4 with urllib. This worked better than I could have hoped,
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
import urllib
import shlex
import re
import time
h = HTMLParser()
page = urllib.urlopen('https://web.stevens.edu/scheduler/core/2017F/2017F.xml').read() # Get to database
soup = BeautifulSoup(page)
RawClassData = soup.contents[10].contents[0].contents[0].contents
classes = {}
backupClasses = {}
def makeDatabase():
for i in range(0, len(RawClassData)): # Parse through each class
try:
AllSelectedCourseInfo = shlex.split(h.unescape(str(RawClassData[i]).replace(">", " "))) # sort into a list grouping string in quotes and getting rid of unnecessary symbols
ClassDict = {}
for item in AllSelectedCourseInfo: # Go through list of class info
try:
thing = item.split("=") # Split string by = to get subject name and value
name = thing[0]
if any(char.isdigit() for char in thing[1]): # Get rid of annoying Z at the end of numbers
thing[1] = re.sub("[Z]","",thing[1])
value = thing[1]
if value: # If subject has a value, store it
ClassDict[str(name)] = str(value) # Store value in a dictionary with the subject as the key
except:
pass
classes[str(ClassDict["section"])] = ClassDict
except:
pass
def printDic():
with open("Classes", "w") as f:
for key in classes:
f.write("\n-------------%s------------" %key)
for classkey in classes[key]:
f.write( "\n%s : %s" %(classkey, classes[key][classkey]))
f.write("\n")
def printSection(selection):
print "\n-------------%s------------" %selection
for classkey in classes[selection]:
print "%s : %s" %(classkey, classes[selection][classkey])
def printClass(selection):
try:
for key in classes:
if classes[key]["title"] == selection:
print "\n-------------%s------------" %key
for classkey in classes[key]:
print "%s : %s" %(classkey, classes[key][classkey])
finally:
print "\n-------------%s------------" %selection
for classkey in classes[selection]:
print "%s : %s" %(classkey, classes[selection][classkey])
start = time.time()
makeDatabase()
end = time.time()
printClass("Circuits and Systems")
printClass("Differential Equations")
printClass("Writing & Communications Collqm")
printClass("Mechanics of Solids")
printClass("Electricity & Magnetism")
printClass("Engineering Design III")
printClass("Freshman Quiz")
printDic()
print end - start
This new code creates a library of all classes then prints out the desired class, all in 2 seconds. The selenium code took 89 seconds to just build the library for the desired classes and print them out, I would say thats a slight improvement... Thanks a ton to perfect5th for the suggestion!

Why can't I pickle this list?

The purpose of this form is to let users enter a lot of places (comma separated) and it'll retrieve the phone, name, website. Have it working in a python IDE, no problem, but having issues putting it into my webapp.
I'm getting the error Exception Value: Can't pickle local object 'GetNums.<locals>.get_data' at the line where a is assigned. I checked the type of inputText and verified that it is indeed a list. So, I'm not sure why it won't pickle.
def GetNums(request):
form = GetNumsForm(request.POST or None)
if form.is_valid():
inputText = form.cleaned_data.get('getnums')
# all experimental
inputText = inputText.split(',')
def get_data(i):
#DON'T FORGET TO MOVE THE PRIMARY KEY LATER TO SETTINGS
r1 = requests.get('https://maps.googleapis.com/maps/api/place/textsearch/json?query=' + i + '&key=GET_YOUR_OWN')
a = r1.json()
pid = a['results'][0]['place_id']
r2 = requests.get('https://maps.googleapis.com/maps/api/place/details/json?placeid=' + pid + '&key=GET_YOUR_OWN')
b = r2.json()
phone = b['result']['formatted_phone_number']
name = b['result']['name']
try:
website = b['result']['website']
except:
website ='No website found'
return ' '.join((phone, name, website))
v = str(type(inputText))
with Pool(5) as p:
a = (p.map(get_data, inputText))
# for line in p.map(get_data, inputText):
# print(line)
#code assist by http://stackoverflow.com/a/34512870/5037442
#end experimental
return render(request, 'about.html', {'v': a})
It's actually barfing when trying to pickle get_data, which is a nested function/closure.
Move get_data out of GetNums (and agh rename it to snake_case please) and it should work.

Reading specific test steps from Quality Center with python

I am working with Quality Center via OTA COM library. I figured out how to connect to server, but I am lost in OTA documentation on how to work with it. What I need is to create a function which takes a test name as an input and returns number of steps in this test from QC.
For now I am this far in this question.
import win32com
from win32com.client import Dispatch
# import codecs #to store info in additional codacs
import re
import json
import getpass #for password
qcServer = "***"
qcUser = "***"
qcPassword = getpass.getpass('Password: ')
qcDomain = "***"
qcProject = "***"
td = win32com.client.Dispatch("TDApiOle80.TDConnection.1")
#Starting to connect
td.InitConnectionEx(qcServer)
td.Login(qcUser,qcPassword)
td.Connect(qcDomain, qcProject)
if td.Connected == True:
print "Connected to " + qcProject
else:
print "Connection failed"
#Path = "Subject\Regression\C.001_Band_tones"
mg=td.TreeManager
npath="Subject\Regression"
tsFolder = td.TestSetTreeManager.NodeByPath(npath)
print tsFolder
td.Disconnect
td.Logout
print "Disconnected from " + qcProject
Any help on descent python examples or tutorials will be highly appreciated. For now I found this and this, but they doesn't help.
Using the OTA API to get data from Quality Center normally means to get some element by path, create a factory and then use the factory to get search the object. In your case you need the TreeManager to get a folder in the Test Plan, then you need a TestFactory to get the test and finally you need the DesignStepFactory to get the steps. I'm no Python programmer but I hope you can get something out of this:
mg=td.TreeManager
npath="Subject\Test"
tsFolder = mg.NodeByPath(npath)
testFactory = tsFolder.TestFactory
testFilter = testFactory.Filter
testFilter["TS_NAME"] = "Some Test"
testList = testFactory.NewList(testFilter.Text)
test = testList.Item(1) # There should be only 1 item
print test.Name
stepFactory = test.DesignStepFactory
stepList = stepFactory.NewList("")
for step in stepList:
print step.StepName
It takes some time to get used to the QC OTA API documentation but I find it very helpful. Nearly all of my knowledge comes from the examples in the API documentation—for your problem there are examples like "Finding a unique test" or "Get a test object with name and path". Both examples are examples to the Test object. Even if the examples are in VB it should be no big thing to adapt them to Python.
I figured out the solution, if there is a better way to do this you are welcome to post it.
import win32com
from win32com.client import Dispatch
import getpass
def number_of_steps(name):
qcServer = "***"
qcUser = "***"
qcPassword = getpass.getpass('Password: ')
qcDomain = "***"
qcProject = "***"
td = win32com.client.Dispatch("TDApiOle80.TDConnection.1")
#Starting to connect
td.InitConnectionEx(qcServer)
td.Login(qcUser, qcPassword)
td.Connect(qcDomain, qcProject)
if td.Connected is True:
print "Connected to " + qcProject
else:
print "Connection failed"
mg = td.TreeManager # Tree manager
folder = mg.NodeByPath("Subject\Regression")
testList = folder.FindTests(name) # Make a list of tests matching name (partial match is accepted)
if testList is not None:
if len(testList) > 1:
print "There are multiple tests matching this name, please check input parameter\nTests matching"
for test in testList:
print test.name
td.Disconnect
td.Logout
return False
if len(testList) == 1:
print "In test %s there is %d steps" % (testList[0].Name, testList[0].DesStepsNum)
else:
print "There are no test with this test name in Quality Center"
td.Disconnect
td.Logout
return False
td.Disconnect
td.Logout
print "Disconnected from " + qcProject
return testList[0].DesStepsNum # Return number of steps for given test

How can I check the value of a DNS TXT record for a host?

I'm looking to verify domain ownership via a script, specifically a Python script, and would like know how to lookup the value of a DNS TXT entry. I know there are services and websites out there for this, but I would like to do it with a script.
This is easy using dnspython. Here is an example:
import dns.resolver
print dns.resolver.resolve("aaa.asdflkjsadf.notatallsuspicio.us","TXT").response.answer[0][-1].strings[0]
This gives the following output:
PnCcKpPiGlLfApDbDoEcBbPjIfBnLpFaAaObAaAaMhNgNbIfPbHkMiEfPpGgJfOcPnLdDjBeHkOjFjIbPbIoKhIjHfJlAhAhFgGbGgNlMgKmFkLgNfBjMbCoBeNbGeOnAeHgLmKoFlLhLmDcKlEdEbDpFeHkFaBlGnHiOnChIoMlIhBgOnFfKoEhDnFkKfDaMgHbJhMgPgMjGiAoJpKjKkPaIcAdGiMbIbBbAfEiKjNbCeFoElKgOePmGjJaImL
Another option is to use dig in subprocess:
import subprocess
print subprocess.Popen(["dig","-t","txt","aaa.asdflkjsadf.notatallsuspicio.us","+short"], stdout=subprocess.PIPE).communicate()[0]
This may be overly simplified, but if all you want is a quick read of the TXT record and don't mind dealing with parsing the result separately:
nslookup -q=txt somedomain.com
I found this did what I needed, short & sweet.
Found another way to get list of all TXT records for a domain using dnspython.
import dns.resolver
[dns_record.to_text() for dns_record in dns.resolver.resolve("your-domain-here", "TXT").rrset]
update 2022/11/20
# -*- coding:utf-8 -*-
# Copyright (c) DadouLab.SIG MIT
import dns
import dns.query
import dns.resolver
import logging
logger = logging.getLogger(__name__)
class Digger(object):
def __init__(self, resolvers=["1.1.1.1"]):
self.mResolver = dns.resolver.Resolver()
self.mResolver.timeout = 1
self.mResolver.lifetime = 0.5
self.mResolver.nameservers = resolvers
self.spec_query_type = ['CNAME', 'TXT', 'MX', 'NS', 'SRV', 'CAA']
def query(self, domain, query_type="A"):
"""
answer = dns.resolver.resolve("_dnsauth.test.com", "TXT").rrset
for dns_record in answer:
print(dns_record.to_text())
"""
try:
query_type = query_type.upper()
answer = self.mResolver.resolve(domain, query_type, raise_on_no_answer=False)
answer_raw = answer.chaining_result.answer.to_text()
logger.info("resolved response data => {}".format(answer_raw))
if query_type in self.spec_query_type:
records = [data.to_text() for data in answer]
else:
records = [data.address for data in answer]
return records
except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer,
dns.resolver.NoNameservers, dns.exception.Timeout) as error:
logger.warning("resolved error => {}".format(error))
return
def is_valid(self, domain, query_type="A"):
try:
self.mResolver.resolve(domain, query_type, raise_on_no_answer=False)
return True
except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer,
dns.resolver.NoNameservers, dns.exception.Timeout) as error:
logger.warning("resolved error => {}".format(error))
return
if __name__ == '__main__':
dig = Digger()
print(dig.query("www.example.com", query_type="A"))
Something like this should work to at least get the value for the URL, I used google.com for the example.
import pycurl
import StringIO
url = "whatsmyip.us/dns_txt.php?host=google.com"
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.HTTPHEADER, ["Accept:"])
txtcurl = StringIO.StringIO()
c.setopt(pycurl.WRITEFUNCTION, txtcurl.write)
c.perform
data = txtcurl.getvalue()
data = data.replace("Done!", "")
print data
I did not test any of this but pulled it from a previous project.
Best of luck!

How to upload documents with new python-gdata (2.0.16)?

With python-gdata 2.0.14, I used the following pieces of code to create and upload documents:
# To create a document
import gdata.docs
import gdata.docs.client
from gdata.data import MediaSource
gdClient = gdata.docs.client.DocsClient(source="my-app")
gdClient.ssl = True
gdClient.ClientLogin("login", "pa$$word", gdClient.source)
ms = MediaSource(file_path="temp.html", content_type="text/html")
entry = gdClient.Upload(ms, "document title")
print "uploaded, url is", entry.GetAlternateLink().href
and
# To update a document
entry.title.text = "updated title"
entry = gdClient.Update(entry, media_source=ms, force=True)
print "updated, url is", entry.GetAlternateLink().href
However, this code does no longer work with python-gdata 2.0.16 because DocsClient class does no more have Upload and Update functions.
I tried to use this
# Try to create a document
gdClient = gdata.docs.client.DocsClient(source="my-app")
gdClient.ssl = True
gdClient.ClientLogin("login", "pa$$word", gdClient.source)
ms = MediaSource(file_path="temp.html", content_type="text/html")
entry = gdata.docs.data.Resource(type=gdata.docs.data.DOCUMENT_LABEL, title="document title")
self.resource = gdClient.CreateResource(entry, media=ms)
… but I get this error:
gdata.client.Unauthorized: Unauthorized - Server responded with: 401, 'Token invalid'
Can anybody tell me where's my mistake and how should I use that new API?
P.S. The documentation hasn't been updated and still uses the old-style code.
I was having issues with this recently too. This worked for me:
import gdata.docs.data
import gdata.docs.client
client = gdata.docs.client.DocsClient(source='your-app')
client.api_version = "3"
client.ssl = True
client.ClientLogin("your#email.com", "password", client.source)
filePath = "/path/to/file"
newResource = gdata.docs.data.Resource(filePath, "document title")
media = gdata.data.MediaSource()
media.SetFileHandle(filePath, 'mime/type')
newDocument = client.CreateResource(newResource, create_uri=gdata.docs.client.RESOURCE_UPLOAD_URI, media=media)
Edit: Added the packages to import to avoid confusion

Categories