How to put formatted text into the clipboard?

How to put formatted text into the clipboard? - python

I wanted to create some formatted text like this:
𝐇𝐞𝐥𝐥𝐨 𝐖𝐨𝐫𝐥𝐝!
And then put it onto the clipboard using Python, so when I paste it will show my text in formatted version in Windows.
I have tried to copy some text using some libraries, but all they copy is the text in simple format:
Hello world
I want my clipboard text in bold format like this:
𝗵𝗲𝗹𝗹𝗼 𝘄𝗼𝗿𝗹𝗱

"""
Created on Sep 24, 2013
#author: jordans
Requires pywin32
original: http://code.activestate.com/recipes/474121/
# HtmlClipboard
# An interface to the "HTML Format" clipboard data format
__author__ = "Phillip Piper (jppx1[at]bigfoot.com)"
__date__ = "2006-02-21"
__version__ = "0.1"
"""
import re
import time
import random
import win32clipboard
#---------------------------------------------------------------------------
# Convenience functions to do the most common operation
def HasHtml():
"""
Return True if there is a Html fragment in the clipboard..
"""
cb = HtmlClipboard()
return cb.HasHtmlFormat()
def GetHtml():
"""
Return the Html fragment from the clipboard or None if there is no Html in the clipboard.
"""
cb = HtmlClipboard()
if cb.HasHtmlFormat():
return cb.GetFragment()
else:
return None
def PutHtml(fragment):
"""
Put the given fragment into the clipboard.
Convenience function to do the most common operation
"""
cb = HtmlClipboard()
cb.PutFragment(fragment)
#---------------------------------------------------------------------------
class HtmlClipboard:
CF_HTML = None
MARKER_BLOCK_OUTPUT = \
"Version:1.0\r\n" \
"StartHTML:%09d\r\n" \
"EndHTML:%09d\r\n" \
"StartFragment:%09d\r\n" \
"EndFragment:%09d\r\n" \
"StartSelection:%09d\r\n" \
"EndSelection:%09d\r\n" \
"SourceURL:%s\r\n"
MARKER_BLOCK_EX = \
"Version:(\S+)\s+" \
"StartHTML:(\d+)\s+" \
"EndHTML:(\d+)\s+" \
"StartFragment:(\d+)\s+" \
"EndFragment:(\d+)\s+" \
"StartSelection:(\d+)\s+" \
"EndSelection:(\d+)\s+" \
"SourceURL:(\S+)"
MARKER_BLOCK_EX_RE = re.compile(MARKER_BLOCK_EX)
MARKER_BLOCK = \
"Version:(\S+)\s+" \
"StartHTML:(\d+)\s+" \
"EndHTML:(\d+)\s+" \
"StartFragment:(\d+)\s+" \
"EndFragment:(\d+)\s+" \
"SourceURL:(\S+)"
MARKER_BLOCK_RE = re.compile(MARKER_BLOCK)
DEFAULT_HTML_BODY = \
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" \
"<HTML><HEAD></HEAD><BODY><!--StartFragment-->%s<!--EndFragment--></BODY></HTML>"
def __init__(self):
self.html = None
self.fragment = None
self.selection = None
self.source = None
self.htmlClipboardVersion = None
def GetCfHtml(self):
"""
Return the FORMATID of the HTML format
"""
if self.CF_HTML is None:
self.CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
return self.CF_HTML
def GetAvailableFormats(self):
"""
Return a possibly empty list of formats available on the clipboard
"""
formats = []
try:
win32clipboard.OpenClipboard(0)
cf = win32clipboard.EnumClipboardFormats(0)
while (cf != 0):
formats.append(cf)
cf = win32clipboard.EnumClipboardFormats(cf)
finally:
win32clipboard.CloseClipboard()
return formats
def HasHtmlFormat(self):
"""
Return a boolean indicating if the clipboard has data in HTML format
"""
return (self.GetCfHtml() in self.GetAvailableFormats())
def GetFromClipboard(self):
"""
Read and decode the HTML from the clipboard
"""
# implement fix from: http://teachthe.net/?p=1137
cbOpened = False
while not cbOpened:
try:
win32clipboard.OpenClipboard(0)
src = win32clipboard.GetClipboardData(self.GetCfHtml())
src = src.decode("UTF-8")
#print(src)
self.DecodeClipboardSource(src)
cbOpened = True
win32clipboard.CloseClipboard()
except Exception as err:
# If access is denied, that means that the clipboard is in use.
# Keep trying until it's available.
if err.winerror == 5: # Access Denied
pass
# wait on clipboard because something else has it. we're waiting a
# random amount of time before we try again so we don't collide again
time.sleep( random.random()/50 )
elif err.winerror == 1418: # doesn't have board open
pass
elif err.winerror == 0: # open failure
pass
else:
print( 'ERROR in Clipboard section of readcomments: %s' % err)
pass
def DecodeClipboardSource(self, src):
"""
Decode the given string to figure out the details of the HTML that's on the string
"""
# Try the extended format first (which has an explicit selection)
matches = self.MARKER_BLOCK_EX_RE.match(src)
if matches:
self.prefix = matches.group(0)
self.htmlClipboardVersion = matches.group(1)
self.html = src[int(matches.group(2)):int(matches.group(3))]
self.fragment = src[int(matches.group(4)):int(matches.group(5))]
self.selection = src[int(matches.group(6)):int(matches.group(7))]
self.source = matches.group(8)
else:
# Failing that, try the version without a selection
matches = self.MARKER_BLOCK_RE.match(src)
if matches:
self.prefix = matches.group(0)
self.htmlClipboardVersion = matches.group(1)
self.html = src[int(matches.group(2)):int(matches.group(3))]
self.fragment = src[int(matches.group(4)):int(matches.group(5))]
self.source = matches.group(6)
self.selection = self.fragment
def GetHtml(self, refresh=False):
"""
Return the entire Html document
"""
if not self.html or refresh:
self.GetFromClipboard()
return self.html
def GetFragment(self, refresh=False):
"""
Return the Html fragment. A fragment is well-formated HTML enclosing the selected text
"""
if not self.fragment or refresh:
self.GetFromClipboard()
return self.fragment
def GetSelection(self, refresh=False):
"""
Return the part of the HTML that was selected. It might not be well-formed.
"""
if not self.selection or refresh:
self.GetFromClipboard()
return self.selection
def GetSource(self, refresh=False):
"""
Return the URL of the source of this HTML
"""
if not self.selection or refresh:
self.GetFromClipboard()
return self.source
def PutFragment(self, fragment, selection=None, html=None, source=None):
"""
Put the given well-formed fragment of Html into the clipboard.
selection, if given, must be a literal string within fragment.
html, if given, must be a well-formed Html document that textually
contains fragment and its required markers.
"""
if selection is None:
selection = fragment
if html is None:
html = self.DEFAULT_HTML_BODY % fragment
if source is None:
source = "file://HtmlClipboard.py"
fragmentStart = html.index(fragment)
fragmentEnd = fragmentStart + len(fragment)
selectionStart = html.index(selection)
selectionEnd = selectionStart + len(selection)
self.PutToClipboard(html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source)
def PutToClipboard(self, html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source="None"):
"""
Replace the Clipboard contents with the given html information.
"""
try:
win32clipboard.OpenClipboard(0)
win32clipboard.EmptyClipboard()
src = self.EncodeClipboardSource(html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source)
src = src.encode("UTF-8")
#print(src)
win32clipboard.SetClipboardData(self.GetCfHtml(), src)
finally:
win32clipboard.CloseClipboard()
def EncodeClipboardSource(self, html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source):
"""
Join all our bits of information into a string formatted as per the HTML format specs.
"""
# How long is the prefix going to be?
dummyPrefix = self.MARKER_BLOCK_OUTPUT % (0, 0, 0, 0, 0, 0, source)
lenPrefix = len(dummyPrefix)
prefix = self.MARKER_BLOCK_OUTPUT % (lenPrefix, len(html)+lenPrefix,
fragmentStart+lenPrefix, fragmentEnd+lenPrefix,
selectionStart+lenPrefix, selectionEnd+lenPrefix,
source)
return (prefix + html)
def DumpHtml():
cb = HtmlClipboard()
print("GetAvailableFormats()=%s" % str(cb.GetAvailableFormats()))
print("HasHtmlFormat()=%s" % str(cb.HasHtmlFormat()))
if cb.HasHtmlFormat():
cb.GetFromClipboard()
print("prefix=>>>%s<<<END" % cb.prefix)
print("htmlClipboardVersion=>>>%s<<<END" % cb.htmlClipboardVersion)
print("GetSelection()=>>>%s<<<END" % cb.GetSelection())
print("GetFragment()=>>>%s<<<END" % cb.GetFragment())
print("GetHtml()=>>>%s<<<END" % cb.GetHtml())
print("GetSource()=>>>%s<<<END" % cb.GetSource())
if __name__ == '__main__':
data = "<p>Writing to the clipboard is <strong>easy</strong> with this code.</p>"
PutHtml(data)
if GetHtml() == data:
print("passed")
print( GetHtml() )
else:
print("failed")
# DumpHtml()
from the question：
python 3.6 windows: retrieving the clipboard CF_HTML format - Stack Overflow
python 3.6 windows: retrieving the clipboard CF_HTML format

Related

Evernote Python API - Hitting rate limits

I've written a short piece of code that will append the tag names of my notes to the title, then remove all associated tags. When I try to run this on production, I hit the rate limit real quickly. Can someone help me optimise this piece of code? Or should I request for a special rate limit with Evernote?
Also, I get an error when a note has no tags. Any way to efficiently get the number of tags from a note so I don't get the error?
from evernote.api.client import EvernoteClient
from evernote.edam.notestore import NoteStore
dev_token = "dev_token"
client = EvernoteClient(token=dev_token, sandbox = False)
userStore = client.get_user_store()
user = userStore.getUser()
print
print user.username
print
noteStore = client.get_note_store()
notebooks = noteStore.listNotebooks()
for n in notebooks:
print "Notebook = " + n.name + " GUID = " + n.guid
filter = NoteStore.NoteFilter()
filter.ascending = False
filter.notebookGuid=n.guid
spec = NoteStore.NotesMetadataResultSpec()
spec.includeTitle = True
spec.includeNotebookGuid = True
spec.includeTagGuids = True
notesMetadataList = noteStore.findNotesMetadata(filter, 0, 25, spec)
for noteMetadata in notesMetadataList.notes:
print "%s :: %s" % (noteMetadata.title, noteMetadata.guid)
newNoteTitle = noteMetadata.title + " -- "
for tagGuid in noteMetadata.tagGuids:
tag = noteStore.getTag(tagGuid)
tagName = tag.name
print tagName
newNoteTitle = newNoteTitle + " " + tagName
print "newNoteTitle = " + newNoteTitle
noteMetadata.title = newNoteTitle
noteMetadata.tagGuids = []
noteMetadata = noteStore.updateNote(noteMetadata)
print noteMetadata.title

Here's how I deal with rate limiting, by wrapping the EvernoteClient in a rate limiting proxy (based on http://code.activestate.com/recipes/496741-object-proxying/)
from time import sleep
from evernote.api.client import EvernoteClient
from evernote.edam.error.ttypes import (EDAMSystemException, EDAMErrorCode)
def evernote_wait_try_again(f):
"""
Wait until mandated wait and try again
http://dev.evernote.com/doc/articles/rate_limits.php
"""
def f2(*args, **kwargs):
try:
return f(*args, **kwargs)
except EDAMSystemException as e:
if e.errorCode == EDAMErrorCode.RATE_LIMIT_REACHED:
print("rate limit: {0} s. wait".format(e.rateLimitDuration))
sleep(e.rateLimitDuration)
print("wait over")
return f(*args, **kwargs)
return f2
class RateLimitingEvernoteProxy(object):
__slots__ = ["_obj"]
def __init__(self, obj):
object.__setattr__(self, "_obj", obj)
def __getattribute__(self, name):
return evernote_wait_try_again(
getattr(object.__getattribute__(self, "_obj"), name))
_client = EvernoteClient(token=auth_token, sandbox=sandbox)
client = RateLimitingEvernoteProxy(_client)

Scrapy upload file

I am making a form request to a website using scrapy. The form requires to upload a pdf file, How can we do it in Scrapy. I am trying this like -
FormRequest(url,callback=self.parseSearchResponse,method="POST",formdata={'filename':'abc.xyz','file':'path to file/abc.xyz'})

At this very moment Scrapy has no built-in support for uploading files.
File uploading via forms in HTTP was specified in RFC1867. According to the spec, an HTTP request with Content-Type: multipart/form-data is required (in your code it would be application/x-www-form-urlencoded).
To achieve file uploading with Scrapy, you would need to:
Get familiar with the basic concepts of HTTP file uploading.
Start with scrapy.Request (instead of FormRequest).
Give it a proper Content-Type header value.
Build the request body yourself.
See also: How does HTTP file upload work?

I just spent an entire day trying to figure out how to implement this.
Finally, I came upon a Scrapy pull request from 2016 that was never merged, with an implementation of a multipart form request:
from scrapy import FormRequest
from six.moves.urllib.parse import urljoin, urlencode
import lxml.html
from parsel.selector import create_root_node
import six
import string
import random
from scrapy.http.request import Request
from scrapy.utils.python import to_bytes, is_listlike
from scrapy.utils.response import get_base_url
class MultipartFormRequest(FormRequest):
def __init__(self, *args, **kwargs):
formdata = kwargs.pop('formdata', None)
kwargs.setdefault('method', 'POST')
super(MultipartFormRequest, self).__init__(*args, **kwargs)
content_type = self.headers.setdefault(b'Content-Type', [b'multipart/form-data'])[0]
method = kwargs.get('method').upper()
if formdata and method == 'POST' and content_type == b'multipart/form-data':
items = formdata.items() if isinstance(formdata, dict) else formdata
self._boundary = ''
# encode the data using multipart spec
self._boundary = to_bytes(''.join(
random.choice(string.digits + string.ascii_letters) for i in range(20)), self.encoding)
self.headers[b'Content-Type'] = b'multipart/form-data; boundary=' + self._boundary
request_data = _multpart_encode(items, self._boundary, self.encoding)
self._set_body(request_data)
class MultipartFile(object):
def __init__(self, name, content, mimetype='application/octet-stream'):
self.name = name
self.content = content
self.mimetype = mimetype
def _get_form_url(form, url):
if url is None:
return urljoin(form.base_url, form.action)
return urljoin(form.base_url, url)
def _urlencode(seq, enc):
values = [(to_bytes(k, enc), to_bytes(v, enc))
for k, vs in seq
for v in (vs if is_listlike(vs) else [vs])]
return urlencode(values, doseq=1)
def _multpart_encode(items, boundary, enc):
body = []
for name, value in items:
body.append(b'--' + boundary)
if isinstance(value, MultipartFile):
file_name = value.name
content = value.content
content_type = value.mimetype
body.append(
b'Content-Disposition: form-data; name="' + to_bytes(name, enc) + b'"; filename="' + to_bytes(file_name,
enc) + b'"')
body.append(b'Content-Type: ' + to_bytes(content_type, enc))
body.append(b'')
body.append(to_bytes(content, enc))
else:
body.append(b'Content-Disposition: form-data; name="' + to_bytes(name, enc) + b'"')
body.append(b'')
body.append(to_bytes(value, enc))
body.append(b'--' + boundary + b'--')
return b'\r\n'.join(body)
def _get_form(response, formname, formid, formnumber, formxpath):
"""Find the form element """
root = create_root_node(response.text, lxml.html.HTMLParser,
base_url=get_base_url(response))
forms = root.xpath('//form')
if not forms:
raise ValueError("No <form> element found in %s" % response)
if formname is not None:
f = root.xpath('//form[#name="%s"]' % formname)
if f:
return f[0]
if formid is not None:
f = root.xpath('//form[#id="%s"]' % formid)
if f:
return f[0]
# Get form element from xpath, if not found, go up
if formxpath is not None:
nodes = root.xpath(formxpath)
if nodes:
el = nodes[0]
while True:
if el.tag == 'form':
return el
el = el.getparent()
if el is None:
break
encoded = formxpath if six.PY3 else formxpath.encode('unicode_escape')
raise ValueError('No <form> element found with %s' % encoded)
# If we get here, it means that either formname was None
# or invalid
if formnumber is not None:
try:
form = forms[formnumber]
except IndexError:
raise IndexError("Form number %d not found in %s" %
(formnumber, response))
else:
return form
def _get_inputs(form, formdata, dont_click, clickdata, response):
try:
formdata = dict(formdata or ())
except (ValueError, TypeError):
raise ValueError('formdata should be a dict or iterable of tuples')
inputs = form.xpath('descendant::textarea'
'|descendant::select'
'|descendant::input[not(#type) or #type['
' not(re:test(., "^(?:submit|image|reset)$", "i"))'
' and (../#checked or'
' not(re:test(., "^(?:checkbox|radio)$", "i")))]]',
namespaces={
"re": "http://exslt.org/regular-expressions"})
values = [(k, u'' if v is None else v)
for k, v in (_value(e) for e in inputs)
if k and k not in formdata]
if not dont_click:
clickable = _get_clickable(clickdata, form)
if clickable and clickable[0] not in formdata and not clickable[0] is None:
values.append(clickable)
values.extend(formdata.items())
return values
def _value(ele):
n = ele.name
v = ele.value
if ele.tag == 'select':
return _select_value(ele, n, v)
return n, v
def _select_value(ele, n, v):
multiple = ele.multiple
if v is None and not multiple:
# Match browser behaviour on simple select tag without options selected
# And for select tags wihout options
o = ele.value_options
return (n, o[0]) if o else (None, None)
elif v is not None and multiple:
# This is a workround to bug in lxml fixed 2.3.1
# fix https://github.com/lxml/lxml/commit/57f49eed82068a20da3db8f1b18ae00c1bab8b12#L1L1139
selected_options = ele.xpath('.//option[#selected]')
v = [(o.get('value') or o.text or u'').strip() for o in selected_options]
return n, v
def _get_clickable(clickdata, form):
"""
Returns the clickable element specified in clickdata,
if the latter is given. If not, it returns the first
clickable element found
"""
clickables = [
el for el in form.xpath(
'descendant::*[(self::input or self::button)'
' and re:test(#type, "^submit$", "i")]'
'|descendant::button[not(#type)]',
namespaces={"re": "http://exslt.org/regular-expressions"})
]
if not clickables:
return
# If we don't have clickdata, we just use the first clickable element
if clickdata is None:
el = clickables[0]
return (el.get('name'), el.get('value') or '')
# If clickdata is given, we compare it to the clickable elements to find a
# match. We first look to see if the number is specified in clickdata,
# because that uniquely identifies the element
nr = clickdata.get('nr', None)
if nr is not None:
try:
el = list(form.inputs)[nr]
except IndexError:
pass
else:
return (el.get('name'), el.get('value') or '')
# We didn't find it, so now we build an XPath expression out of the other
# arguments, because they can be used as such
xpath = u'.//*' + \
u''.join(u'[#%s="%s"]' % c for c in six.iteritems(clickdata))
el = form.xpath(xpath)
if len(el) == 1:
return (el[0].get('name'), el[0].get('value') or '')
elif len(el) > 1:
raise ValueError("Multiple elements found (%r) matching the criteria "
"in clickdata: %r" % (el, clickdata))
else:
raise ValueError('No clickable element matching clickdata: %r' % (clickdata,))
This is the code I used to call the request (in my case I needed to upload an image):
with open(img_path, 'rb') as file:
img = file.read()
file_name = os.path.basename(img_path)
multipart_file = MultipartFile(file_name, img, "image/png")
form_data = {
"param": "value", # this is an example of a text parameter
"PicUpload": multipart_file
}
yield MultipartFormRequest(url=upload_url, formdata=form_data,
callback=self.my_callback)
It's a shame that so much time has passed and Scrapy still doesn't have a built in way to do this, especially since someone wrote a very simple implementation years ago.

Getting type error in python

I am using a class based service in python and I get error whenever I want to use it. Unable to figure out the reason.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from xml.dom import minidom
from pysimplesoap.client import SoapClient
from pysimplesoap.helpers import sort_dict
MEDIA_ROOT = '/User/sunand/documents/resumes/'
parser = ResumeParser()
names = parser.get_names(MEDIA_ROOT)
print names
class ParserClient(SoapClient):
""" Extends the soap client to encode the response with utf-8 encoding.
"""
def wsdl_call(
self,
method,
*args,
**kwargs
):
""" Override wsdl_call method to make sure unmarshall is not called.
"""
operation = self.get_operation(method)
# get i/o type declarations:
inp = operation['input']
header = operation.get('header')
if 'action' in operation:
self.action = operation['action']
# construct header and parameters
if header:
self.__call_headers = sort_dict(header, self.__headers)
(method, params) = self.wsdl_call_get_params(method, inp,
*args, **kwargs)
response = self.call(method, *params)
return response
def send(self, method, xml):
""" Overrides the send method to get the actual xml content.
"""
content = super(ParserClient, self).send(method, xml)
self.result = content
return content
class ResumeParser(object):
""" Connects to the Resume Parser's XML api to get parsed data.
"""
def __init__(self, simple=True, timeout=60):
""" Initializes the ResumeParser class.
"""
self.wsdl = \
'http://jobsite.onlineresumeparser.com/rPlusParseResume.asmx?WSDL'
self.secret = 'my-secret-key' # Enter key here
self.encoding = 'base64'
self.simple = simple
self.client = ParserClient(wsdl=self.wsdl, timeout=timeout)
self.names = []
def get_file_content(self, file_path):
""" Return the encoded content for the given file.
"""
file_obj = open(os.path.abspath(file_path), 'r')
content = file_obj.read().encode(self.encoding)
file_obj.close()
return content
def get_names(self, path):
"""
Given a path to a folder that contains resume files this method
will parse the resumes and will return the names of the candidates
as a list.
"""
opt = os.path
resumes = [opt.join(path, r) for r in os.listdir(path)
if opt.isfile(opt.join(path, r))]
# Parse information for each resume.
for resume in resumes:
try:
xml_data = self.get_xml(resume)
name = self.get_name_from_xml(xml_data)
if name:
self.names.append(name)
except Exception, err:
# print name
print 'Error parsing resume: %s' % str(err)
return list(set(self.names))
def get_name_from_xml(self, data):
""" Returns the full name from the xml data given.
"""
xmldata = minidom.parseString(data)
name = xmldata.getElementsByTagName('CANDIDATE_FULL_NAME')
name = name[0].childNodes[0].data.title()
return name
def get_xml(self, filepath):
""" Fetches and returns the xml for the given file from the api.
"""
filename = os.path.basename(filepath)
extension = os.path.splitext(filepath)[1]
base64 = self.get_file_content(filepath)
filedata = {
'B64FileZippedContent': base64,
'FileName': filename,
'InputType': extension,
'UserID': 1,
'secretKey': self.secret,
}
get = \
(self.client.GetSimpleXML if self.simple else self.client.getHRXML)
get(**filedata)
return self.process_raw_xml()
def process_raw_xml(self, data=None):
""" Processes and returns the clean XML.
"""
raw = (data if data else self.client.result)
parsed = minidom.parseString(raw)
result = parsed.getElementsByTagName('GetSimpleXMLResult')[0]
text_node = result.childNodes[0]
data = text_node.data.encode('UTF-8')
return data
Upon running the code I am getting an error
TypeError: wsdl_call_get_params() got an unexpected keyword argument 'secretKey'
What am I doing wrong?

It looks like you are incorrectly overriding wsdl_call.
Firstly, we can see that SoapClient (which you extend in ParserClient), has a __getattr__ function that fetches pseudo-attributes of the SoapClient.
def __getattr__(self, attr):
"Return a pseudo-method that can be called"
if not self.services: # not using WSDL?
return lambda self=self, *args, **kwargs: self.call(attr,*args,**kwargs)
else: # using WSDL:
return lambda *args, **kwargs: self.wsdl_call(attr,*args,**kwargs)
You can see that this function is using wsdl_call to help it map functions to unknown attributes.
The specific pseudo-method that is causing the problem is in your code (or appears to be):
filedata = {
'B64FileZippedContent': base64,
'FileName': filename,
'InputType': extension,
'UserID': 1,
'secretKey': self.secret, # <-- the secretKey key word argument
}
get = \
(self.client.GetSimpleXML if self.simple else self.client.getHRXML)
get(**filedata)
# here client is an instance of your `ParserClient` (and `SoapClient`).
This above bit took me a while to track down. With a full stack trace I would have found it much quicker. Please always post stack traces (when there is one) in future when asking for help.
How to solve this
Provide a concrete implementation of GetSimpleXML and getHRXML. This will solve the immediate problem, but not the larger problem.
Rewrite wsdl_call
The rewritten section of code should check the value of the method argument and either do what you want, or delegate to the SoapClient implementation.
eg.
def wsdl_call(self, method, *args, **kwargs):
if method == "some_method":
return self._my_wsdl_call(method, *args, **kwargs)
else:
return super(ParserClient, self).wsdl_call(method, *args, **kwargs)
def _my_wsdl_call(self, method, *args, **kwargs):
...

wordpresslib attaching images to posts

looking at the example provided by wordpresslib, its very straight forward on how to upload images to the media library. However, the attachment of images looks like it was never finished. Has anyone successfully attached the images?
#!/usr/bin/env python
"""
Small example script that publish post with JPEG image
"""
# import library
import wordpresslib
print 'Example of posting.'
print
url = raw_input('Wordpress URL (xmlrpc.php will be added):')
user = raw_input('Username:')
password = raw_input('Password:')
# prepare client object
wp = wordpresslib.WordPressClient(url+"xmlrpc.php", user, password)
# select blog id
wp.selectBlog(0)
# upload image for post
# imageSrc = wp.newMediaObject('python.jpg')
# FIXME if imageSrc:
# create post object
post = wordpresslib.WordPressPost()
post.title = 'Test post'
post.description = '''
Python is the best programming language in the earth !
No image BROKEN FIXME <img src="" />
'''
#post.categories = (wp.getCategoryIdFromName('Python'),)
# Add tags
post.tags = ["python", "snake"]
# do not publish post
idNewPost = wp.newPost(post, False)
print
print 'Posting successfull! (Post has not been published though)'
WordPressPost class:
class WordPressPost:
"""Represents post item
"""
def __init__(self):
self.id = 0
self.title = ''
self.date = None
self.permaLink = ''
self.description = ''
self.textMore = ''
self.excerpt = ''
self.link = ''
self.categories = []
self.user = ''
self.allowPings = False
self.allowComments = False
self.tags = []
self.customFields = []
def addCustomField(self, key, value):
kv = {'key':key, 'value':value}
self.customFields.append(kv)

Wordpress saves images as website.com/wp-content/uploads/YEAR/MONTH/FILENAME
Adding a simple image tag with the above format in to post.description display the image on the post.
where YEAR is the current year with a 4 digit format (ex. 2015)
and MONTH is the current month with a leading zero (ex. 01,02,... 12)
and FILENAME is the file name submitted via imageSrc = wp.newMediaObject('python.jpg')
Example file name: website.com/wp-content/uploads/2015/06/image.jpg
Here is how I posted my image:
import time
import wordpresslib
import Image
from datetime import datetime
time = datetime.now()
h = str(time.strftime('%H'))
m = str(time.strftime('%M'))
s = str(time.strftime('%S'))
mo = str(time.strftime('%m'))
yr = str(time.strftime('%Y'))
url = 'WORDPRESSURL.xmlrpc.php'
wp = wordpresslib.WordPressClient(url,'USERNAME','PASSWORD')
wp.selectBlog(0)
imageSrc = wp.newMediaObject('testimage'+h+m+s'.jpg') #Used this format so that if i post images with the same name its unlikely they will override eachother
img = 'http://WORDPRESSURL/wp-content/uploads/'+yr+'/'+mo+'/testimage'+h+m+s+'.jpg'
post=wordpresslib.WordPressPost()
post.title='title'
post.description='<img src="'+img+'"/>'
idPost=wp.newPost(post,true)

Configure auto reload template and enable bytecode cache for jinja2 in appengine

How to configure jinja2 in Appengine to:
Auto reload when template is updated.
Enable bytecode cache, so it can be share among each instances. I prefer jinja2 to produce bytecode when compiling template, and store it to datastore. So next instance will load bytecode instead of repeatedly compile the template.

I have added the bcc like this, using the app engine memcache Client()::
loader = dynloaders.DynLoader() # init Function loader
bcc = MemcachedBytecodeCache(memcache.Client(), prefix='jinja2/bytecode/', timeout=None)
return Environment(auto_reload=True, cache_size=100, loader=FunctionLoader(loader.load_dyn_all),
bytecode_cache=bcc)
My function loader:
def html(self, cid):
def _html_txt_up_to_date(): # closure to check if template is up to date
return CMSUpdates.check_no_update(cid, template.modified)
template = ndb.Key('Templates', cid, parent=self.parent_key).get()
if not template:
logging.error('DynLoader (HTML/TXT): %s' % cid)
return None # raises TemplateNotFound exception
return template.content, None, _html_txt_up_to_date
The template model uses template.modified : ndb.DateTimeProperty(auto_now=True)
The closure function:
class CMSUpdates(ndb.Model):
updates = ndb.JsonProperty()
#classmethod
def check_no_update(cls, cid, cid_modified):
cms_updates = cls.get_or_insert('cms_updates', updates=dict()).updates
if cid in cms_updates: # cid modified has dt microseconds
if cid_modified >= datetime.strptime(cms_updates[cid], '%Y-%m-%d %H:%M:%S'):
if (datetime.now() - timedelta(days=1)) > cid_modified:
del cms_updates[cid]
cls(id='cms_updates', updates=cms_updates).put_async()
return True
return False # reload the template
return True

Been few weeks i looking for the solution. And finally i figured it out, i would like to share my code for everyone. There are 4 python source files in my code.
TemplateEngine.py, ContentRenderer.py, TestContent.py & Update_Template.py
File: TemplateEngine.py
Note:
i use now = datetime.utcnow() + timedelta(hours=8) because my timezone is GMT+8
You must use ndb.BlobProperty to store the bytecode, ndb.TextProperty will not work!
from google.appengine.ext import ndb
from datetime import datetime,timedelta
class SiteTemplates(ndb.Model):
name = ndb.StringProperty(indexed=True, required=True)
data = ndb.TextProperty()
uptodate = ndb.BooleanProperty(required=True)
class SiteTemplateBytecodes(ndb.Model):
key = ndb.StringProperty(indexed=True, required=True)
data = ndb.BlobProperty(required=True)
mod_datetime = ndb.DateTimeProperty(required=True)
class LocalCache(jinja2.BytecodeCache):
def load_bytecode(self, bucket):
q = SiteTemplateBytecodes.query(SiteTemplateBytecodes.key == bucket.key)
if q.count() > 0:
r = q.get()
bucket.bytecode_from_string(r.data)
def dump_bytecode(self, bucket):
now = datetime.utcnow() + timedelta(hours=8)
q = SiteTemplateBytecodes.query(SiteTemplateBytecodes.key == bucket.key)
if q.count() > 0:
r = q.get()
r.data = bucket.bytecode_to_string()
r.mod_datetime = now
else:
r = SiteTemplateBytecodes(key=bucket.key, data=bucket.bytecode_to_string(), mod_datetime=now)
r.put()
def Update_Template_Source(tn, source):
try:
q = SiteTemplates.query(SiteTemplates.name == tn)
if q.count() == 0:
u = mkiniTemplates(name=tn, data=source, uptodate=False)
else:
u = q.get()
u.name=tn
u.data=source
u.uptodate=False
u.put()
return True
except Exception,e:
logging.exception(e)
return False
def Get_Template_Source(tn):
uptodate = False
def Template_Uptodate():
return uptodate
try:
q = SiteTemplates.query(SiteTemplates.name == tn)
if q.count() > 0:
r = q.get()
uptodate = r.uptodate
if r.uptodate == False:
r.uptodate=True
r.put()
return r.data, tn, Template_Uptodate
else:
return None
except Exception,e:
logging.exception(e)
return None
File: ContentRenderer.py
Note: It is very important to set cache_size=0, otherwise bytecode cache function will be disable. I have no idea why.
from TemplateEngine import Get_Template_Source
import jinja2
def Render(tn,tags):
global te
return te.Render(tn, tags)
bcc = LocalCache()
te = jinja2.Environment(loader=jinja2.FunctionLoader(Get_Template_Source), cache_size=0, extensions=['jinja2.ext.autoescape'], bytecode_cache=bcc)
File: Update_Template.py
Note: Use Update_Template_Source() to update template source to datastore.
from TemplateEngine import Update_Template_Source
template_source = '<html><body>hello word to {{title}}!</body></html>'
if Update_Template_Source('my-template.html', template_source):
print 'template is updated'
else:
print 'error when updating template source'
File: TestContent.py
Note: Do some test
from ContentRenderer import Render
print Render('my-template.htmnl', {'title':'human'})
'hello world to human!'
You will realize, even you have more than 20 instances in your application, the latency time will not increase, even you update your template. And the template source will update in 5 to 10 seconds.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to put formatted text into the clipboard? - python

Related

Evernote Python API - Hitting rate limits

Scrapy upload file

Getting type error in python

wordpresslib attaching images to posts

Configure auto reload template and enable bytecode cache for jinja2 in appengine

Categories

Resources