Pygments in QScintilla

Pygments in QScintilla - python

Consider this mcve:
import math
import sys
import textwrap
import time
from pathlib import Path
from collections import defaultdict
from PyQt5.Qsci import QsciLexerCustom, QsciScintilla
from PyQt5.Qt import *
from pygments import lexers, styles, highlight, formatters
from pygments.lexer import Error, RegexLexer, Text, _TokenType
from pygments.style import Style
EXTRA_STYLES = {
"monokai": {
"background": "#272822",
"caret": "#F8F8F0",
"foreground": "#F8F8F2",
"invisibles": "#F8F8F259",
"lineHighlight": "#3E3D32",
"selection": "#49483E",
"findHighlight": "#FFE792",
"findHighlightForeground": "#000000",
"selectionBorder": "#222218",
"activeGuide": "#9D550FB0",
"misspelling": "#F92672",
"bracketsForeground": "#F8F8F2A5",
"bracketsOptions": "underline",
"bracketContentsForeground": "#F8F8F2A5",
"bracketContentsOptions": "underline",
"tagsOptions": "stippled_underline",
}
}
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return f"{s} {size_name[i]}"
class ViewLexer(QsciLexerCustom):
def __init__(self, lexer_name, style_name):
super().__init__()
# Lexer + Style
self.pyg_style = styles.get_style_by_name(style_name)
self.pyg_lexer = lexers.get_lexer_by_name(lexer_name, stripnl=False)
self.cache = {
0: ('root',)
}
self.extra_style = EXTRA_STYLES[style_name]
# Generate QScintilla styles
self.font = QFont("Consolas", 8, weight=QFont.Bold)
self.token_styles = {}
index = 0
for k, v in self.pyg_style:
self.token_styles[k] = index
if v.get("color", None):
self.setColor(QColor(f"#{v['color']}"), index)
if v.get("bgcolor", None):
self.setPaper(QColor(f"#{v['bgcolor']}"), index)
self.setFont(self.font, index)
index += 1
def defaultPaper(self, style):
return QColor(self.extra_style["background"])
def language(self):
return self.pyg_lexer.name
def get_tokens_unprocessed(self, text, stack=('root',)):
"""
Split ``text`` into (tokentype, text) pairs.
``stack`` is the inital stack (default: ``['root']``)
"""
lexer = self.pyg_lexer
pos = 0
tokendefs = lexer._tokens
statestack = list(stack)
statetokens = tokendefs[statestack[-1]]
while 1:
for rexmatch, action, new_state in statetokens:
m = rexmatch(text, pos)
if m:
if action is not None:
if type(action) is _TokenType:
yield pos, action, m.group()
else:
for item in action(lexer, m):
yield item
pos = m.end()
if new_state is not None:
# state transition
if isinstance(new_state, tuple):
for state in new_state:
if state == '#pop':
statestack.pop()
elif state == '#push':
statestack.append(statestack[-1])
else:
statestack.append(state)
elif isinstance(new_state, int):
# pop
del statestack[new_state:]
elif new_state == '#push':
statestack.append(statestack[-1])
else:
assert False, "wrong state def: %r" % new_state
statetokens = tokendefs[statestack[-1]]
break
else:
# We are here only if all state tokens have been considered
# and there was not a match on any of them.
try:
if text[pos] == '\n':
# at EOL, reset state to "root"
statestack = ['root']
statetokens = tokendefs['root']
yield pos, Text, u'\n'
pos += 1
continue
yield pos, Error, text[pos]
pos += 1
except IndexError:
break
def highlight_slow(self, start, end):
style = self.pyg_style
view = self.editor()
code = view.text()[start:]
tokensource = self.get_tokens_unprocessed(code)
self.startStyling(start)
for _, ttype, value in tokensource:
self.setStyling(len(value), self.token_styles[ttype])
def styleText(self, start, end):
view = self.editor()
t_start = time.time()
self.highlight_slow(start, end)
t_elapsed = time.time() - t_start
len_text = len(view.text())
text_size = convert_size(len_text)
view.setWindowTitle(f"Text size: {len_text} - {text_size} Elapsed: {t_elapsed}s")
def description(self, style_nr):
return str(style_nr)
class View(QsciScintilla):
def __init__(self, lexer_name, style_name):
super().__init__()
view = self
# -------- Lexer --------
self.setEolMode(QsciScintilla.EolUnix)
self.lexer = ViewLexer(lexer_name, style_name)
self.setLexer(self.lexer)
# -------- Shortcuts --------
self.text_size = 1
self.s1 = QShortcut(f"ctrl+1", view, self.reduce_text_size)
self.s2 = QShortcut(f"ctrl+2", view, self.increase_text_size)
# self.gen_text()
# # -------- Multiselection --------
self.SendScintilla(view.SCI_SETMULTIPLESELECTION, True)
self.SendScintilla(view.SCI_SETMULTIPASTE, 1)
self.SendScintilla(view.SCI_SETADDITIONALSELECTIONTYPING, True)
# -------- Extra settings --------
self.set_extra_settings(EXTRA_STYLES[style_name])
def get_line_separator(self):
m = self.eolMode()
if m == QsciScintilla.EolWindows:
eol = '\r\n'
elif m == QsciScintilla.EolUnix:
eol = '\n'
elif m == QsciScintilla.EolMac:
eol = '\r'
else:
eol = ''
return eol
def set_extra_settings(self, dct):
self.setIndentationGuidesBackgroundColor(QColor(0, 0, 255, 0))
self.setIndentationGuidesForegroundColor(QColor(0, 255, 0, 0))
if "caret" in dct:
self.setCaretForegroundColor(QColor(dct["caret"]))
if "line_highlight" in dct:
self.setCaretLineBackgroundColor(QColor(dct["line_highlight"]))
if "brackets_background" in dct:
self.setMatchedBraceBackgroundColor(QColor(dct["brackets_background"]))
if "brackets_foreground" in dct:
self.setMatchedBraceForegroundColor(QColor(dct["brackets_foreground"]))
if "selection" in dct:
self.setSelectionBackgroundColor(QColor(dct["selection"]))
if "background" in dct:
c = QColor(dct["background"])
self.resetFoldMarginColors()
self.setFoldMarginColors(c, c)
def increase_text_size(self):
self.text_size *= 2
self.gen_text()
def reduce_text_size(self):
if self.text_size == 1:
return
self.text_size //= 2
self.gen_text()
def gen_text(self):
content = Path(__file__).read_text()
while len(content) < self.text_size:
content *= 2
self.setText(content[:self.text_size])
if __name__ == '__main__':
app = QApplication(sys.argv)
view = View("python", "monokai")
view.setText(textwrap.dedent("""\
'''
Ctrl+1 = You'll decrease the size of existing text
Ctrl+2 = You'll increase the size of existing text
Warning: Check the window title to see how long it takes rehighlighting
'''
"""))
view.resize(800, 600)
view.show()
app.exec_()
To run it you need to install:
QScintilla==2.10.8
Pygments==2.3.1
PyQt5==5.12
I'm trying to figure out how to use pygments on a QScintilla widget and right now the main problem I need to solve is the performance when dealing with non-tiny documents.
I'd like the editor to become responsive & usable when dealing with large documents (>=100kb) but I don't know very well what's the approach I should take here. In order to test performance you can use Ctrl+1 or Ctrl+2 and the widget text will be decreased/increased respectively.
When I say "responsive" I mean that the highlighting computation of the visible screen should take no longer of [1-2]frame/highglight <=> [17-34]ms/highlight (assuming 60fps) so when typing you won't feel any slowdown.
Note: As you can see in the above mcve, I've included the pygments tokenizer so you can play around with it... it feels like in order to achieve "real-time highlighting" I'd need to use memoization/caching in some smart way but I'm struggling to figure out what's the data I need to cache and what's the best way to cache it... :/
Demo:
In the above demo you can see using this naive highlighting the editor will become unusable very soon, in my laptop rehighlighting text chunks of 32kb is still giving interactive framerate but with something higher than that the editor becomes completely unusable.
CONSIDERATIONS:
The most typical case will happen when you're typing/coding on the visible screen with no selections
It may happen you're editing multiple selections spread over the whole document, which means you won't know if these selections are near the visible screen or not. For instance, in Sublime when you press Alt+F3 you select all ocurrences under cursor
In the above snippet I've used a python lexer but the algorithm shouldn't focus too much on that one. Pygments support ~300 lexers afterall
The worst case scenario would happen if the visible screen is at the end of the file and one of the selections happens to live at the beginning of the screen... In case you need to rehighlight the whole document you'd need to find an alternative way even if that means the "highlighting" is not correct on the first pass
The most important is performance but also correctness... that is, if you give enough time the whole document should become highlighted correctly
REFERENCES:
https://qscintilla.com/styletext-the-highlighting-engine/
http://pygments.org/docs/
https://www.riverbankcomputing.com/static/Docs/QScintilla/annotated.html
The following documents are not specific to this particular problem but they talk about possible strategies of caching and syntax highlighting:
rope_science_11
https://raphlinus.github.io/personal/2017/10/12/what-im-working-on.html
syntax highlighting doc
https://code.visualstudio.com/blogs/2017/02/08/syntax-highlighting-optimizations

In highlight_slow, you're receiving start and end values, but you're ignoring the end value. As a result, any time you type a single character, the code is rehighlighting the entire rest of the buffer. This is why, if you type at the end of a long buffer, the timing is very fast - around .1 - .2 ms - but if you type at the beginning, it's very slow.
Thinking just in terms of correct highlighting, in most cases (with Python, at least) when you introduce a new character only the current line needs to be re-styled. Sometimes, like if you start a function definition or open a bracket, multiple lines might need to be styled. Only when you open or close a multiline """ or ''' string - will the rest of the buffer need to be restyled.
If you include start and end in your logging, you'll see that most of the time when you type they span a very small range. If you change one line of your highlight_code method from
code = view.text()[start:]
to
code = view.text()[start:end]
you'll see that the method almost always take sub-millisecond time now, and it almost always gets the highlighting correct.
From what I've been able to tell, this only gets the styling wrong when multiline quotes are involved. However, your current code has the same problem: try opening a multiline string, typing enter, and continuing the string on the next line. The second line will be highlighted as code. Qscintilla is leading you astray a bit here, by giving a start that does not include the beginning of the multiline quote. It's not trying to be perfect, though - the docs say
In fact, QScintilla says: “Hey, I think you should restyle the text between the character at position start up to the character at position end“. You are completely free to ignore this suggestion.
Handling mutliline quoting correctly will be a bit tricky! If it were me, and I wanted to get something working quickly, I'd probably impement a keystroke to refresh the highlighting for the entire buffer and use that when things look wrong.

If you're happy to write your own syntax highlighter, here's a possible way of speeding it up dramatically. You can do this with Pygments with a little effort; see the bottom of the answer for one possible way of doing this.
The syntax highlighter is simple. It has a small internal data structure, representing the current context, which it updates as it goes along. So, for the following Python code:
import time
def sleep_ms(ms):
"""sleeps for a length of time
given in milliseconds"""
time.sleep(
ms / 1000
)
sleep_ms(1000)
syntax error
its context might change like this, as it goes through the tokens¹:
>>> [nothing]
>>> IMPORT
IMPORT modulename
>>> [nothing]
>>> DEF
DEF functionname
DEF functionname, OPENPAREN
DEF functionname, OPENPAREN
DEF functionname ARGLIST
DEF functionname ARGLIST COLON
>>> FUNCBODY 4s
FUNCBODY 4s, DOUBLE_MLSTR
>>> FUNCBODY 4s, DOUBLE_MLSTR
FUNCBODY 4s
>>> FUNCBODY 4s
>>> FUNCBODY 4s, varname
FUNCBODY 4s, varname ATTR
FUNCBODY 4s, varname ATTR attrname
FUNCBODY 4s, varname ATTR attrname, OPENPAREN
>>> FUNCBODY 4s, varname ATTR attrname, OPENPAREN
>>> FUNCBODY 4s, varname ATTR attrname, OPENPAREN, varname
FUNCBODY 4s, varname ATTR attrname, OPENPAREN, TRUEDIV varname
FUNCBODY 4s, varname ATTR attrname, OPENPAREN, TRUEDIV varname intliteral
>>> FUNCBODY 4s, FUNCCALL
>>> FUNCBODY 4s
>>> [nothing]
varname
varname, OPENPAREN
varname, OPENPAREN, intliteral
FUNCCALL
>>> [nothing]
varname
ERROR
If you cache the final contexts of each line, then you can start the syntax highlighting at the line that changed and keep going until you get to a line where the context is the same as is cached; you don't have to recompute the whole file, but if you add something like """ then it'll recompute until the end. If you get to an ERROR then you can just stop there; there's no point recalculating the syntax highlighting past a syntax error, because you don't know what the context's meant to be. (For the initial version when you open the file, you could assume that there's no context after a syntax error; this heuristic seems to work well enough.)
This syntax highlighter has the potential to be ridiculously accurate, or just "good enough", with virtually no perceivable difference in speed between the two. Language-specific highlighters could even be dynamically linked plugins, and it'd still be reasonably fast! Additionally, if you add debouncing for highlighting of subsequent lines, typing """""" quickly enough will be just as fast as typing "" or 42, no matter how big the file is.
Note that this highlighter is single-pass – it doesn't highlight known variable names differently to unknown ones, for example. If you wish to do this, the problem becomes considerably harder.
¹: This example Python highlighter is a "ridiculously accurate" one; I probably wouldn't go with something like this if I had a time limit. Nevertheless, I've got it planned out in my head and – for now, at least – could explain it in detail if required.
Your code requires surprisingly few changes to work with this technique.
Change the beginning of your get_tokens_unprocessed to:
def get_tokens_unprocessed(self, text, stack=('root',), mutate_stack=False):
"""
Split ``text`` into (tokentype, text) pairs.
``stack`` is the inital stack (default: ``['root']``)
"""
lexer = self.pyg_lexer
pos = 0
tokendefs = lexer._tokens
if not mutate_stack:
statestack = list(stack)
statetokens = tokendefs[statestack[-1]]
Find some way of detecting the line number.
In highlight_slow's loop, do something like this (except better):
stack = list(self.cache[line_no_of(start)])
tokensource = self.get_tokens_unprocessed(code, stack, True)
self.startStyling(start)
pos = start;
for _, ttype, value in tokensource:
self.setStyling(len(value), self.token_styles[ttype])
pos += len(value)
if is_line_end(pos):
if pos >= end and stack == self.cache[line_no_of(start)]:
break
self.cache[line_no_of(start)] = tuple(stack)
Obviously, the code would have to be better than this, and you'd have to find some efficient way of implementing is_line_end and line_no_of; there's probably some Pygments way of doing this.
This solution has at least one benefit over yours already: it supports multi-line comments.

Related

Is there a way to give a function access to the (external-scope) name of the variable being passed in? [duplicate]

This question already has answers here:
Getting the name of a variable as a string
(32 answers)
Closed 4 months ago.
Is it possible to get the original variable name of a variable passed to a function? E.g.
foobar = "foo"
def func(var):
print var.origname
So that:
func(foobar)
Returns:
>>foobar
EDIT:
All I was trying to do was make a function like:
def log(soup):
f = open(varname+'.html', 'w')
print >>f, soup.prettify()
f.close()
.. and have the function generate the filename from the name of the variable passed to it.
I suppose if it's not possible I'll just have to pass the variable and the variable's name as a string each time.

EDIT: To make it clear, I don't recommend using this AT ALL, it will break, it's a mess, it won't help you in any way, but it's doable for entertainment/education purposes.
You can hack around with the inspect module, I don't recommend that, but you can do it...
import inspect
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.getframeinfo(frame[0]).code_context[0].strip()
args = string[string.find('(') + 1:-1].split(',')
names = []
for i in args:
if i.find('=') != -1:
names.append(i.split('=')[1].strip())
else:
names.append(i)
print names
def main():
e = 1
c = 2
foo(e, 1000, b = c)
main()
Output:
['e', '1000', 'c']

To add to Michael Mrozek's answer, you can extract the exact parameters versus the full code by:
import re
import traceback
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
vars_name = re.compile(r'\((.*?)\).*$').search(code).groups()[0]
print vars_name
return
foobar = "foo"
func(foobar)
# PRINTS: foobar

Looks like Ivo beat me to inspect, but here's another implementation:
import inspect
def varName(var):
lcls = inspect.stack()[2][0].f_locals
for name in lcls:
if id(var) == id(lcls[name]):
return name
return None
def foo(x=None):
lcl='not me'
return varName(x)
def bar():
lcl = 'hi'
return foo(lcl)
bar()
# 'lcl'
Of course, it can be fooled:
def baz():
lcl = 'hi'
x='hi'
return foo(lcl)
baz()
# 'x'
Moral: don't do it.

Another way you can try if you know what the calling code will look like is to use traceback:
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
code will contain the line of code that was used to call func (in your example, it would be the string func(foobar)). You can parse that to pull out the argument

You can't. It's evaluated before being passed to the function. All you can do is pass it as a string.

#Ivo Wetzel's answer works in the case of function call are made in one line, like
e = 1 + 7
c = 3
foo(e, 100, b=c)
In case that function call is not in one line, like:
e = 1 + 7
c = 3
foo(e,
1000,
b = c)
below code works:
import inspect, ast
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.findsource(frame[0])[0]
nodes = ast.parse(''.join(string))
i_expr = -1
for (i, node) in enumerate(nodes.body):
if hasattr(node, 'value') and isinstance(node.value, ast.Call)
and hasattr(node.value.func, 'id') and node.value.func.id == 'foo' # Here goes name of the function:
i_expr = i
break
i_expr_next = min(i_expr + 1, len(nodes.body)-1)
lineno_start = nodes.body[i_expr].lineno
lineno_end = nodes.body[i_expr_next].lineno if i_expr_next != i_expr else len(string)
str_func_call = ''.join([i.strip() for i in string[lineno_start - 1: lineno_end]])
params = str_func_call[str_func_call.find('(') + 1:-1].split(',')
print(params)
You will get:
[u'e', u'1000', u'b = c']
But still, this might break.

You can use python-varname package
from varname import nameof
s = 'Hey!'
print (nameof(s))
Output:
s
Package below:
https://github.com/pwwang/python-varname

For posterity, here's some code I wrote for this task, in general I think there is a missing module in Python to give everyone nice and robust inspection of the caller environment. Similar to what rlang eval framework provides for R.
import re, inspect, ast
#Convoluted frame stack walk and source scrape to get what the calling statement to a function looked like.
#Specifically return the name of the variable passed as parameter found at position pos in the parameter list.
def _caller_param_name(pos):
#The parameter name to return
param = None
#Get the frame object for this function call
thisframe = inspect.currentframe()
try:
#Get the parent calling frames details
frames = inspect.getouterframes(thisframe)
#Function this function was just called from that we wish to find the calling parameter name for
function = frames[1][3]
#Get all the details of where the calling statement was
frame,filename,line_number,function_name,source,source_index = frames[2]
#Read in the source file in the parent calling frame upto where the call was made
with open(filename) as source_file:
head=[source_file.next() for x in xrange(line_number)]
source_file.close()
#Build all lines of the calling statement, this deals with when a function is called with parameters listed on each line
lines = []
#Compile a regex for matching the start of the function being called
regex = re.compile(r'\.?\s*%s\s*\(' % (function))
#Work backwards from the parent calling frame line number until we see the start of the calling statement (usually the same line!!!)
for line in reversed(head):
lines.append(line.strip())
if re.search(regex, line):
break
#Put the lines we have groked back into sourcefile order rather than reverse order
lines.reverse()
#Join all the lines that were part of the calling statement
call = "".join(lines)
#Grab the parameter list from the calling statement for the function we were called from
match = re.search('\.?\s*%s\s*\((.*)\)' % (function), call)
paramlist = match.group(1)
#If the function was called with no parameters raise an exception
if paramlist == "":
raise LookupError("Function called with no parameters.")
#Use the Python abstract syntax tree parser to create a parsed form of the function parameter list 'Name' nodes are variable names
parameter = ast.parse(paramlist).body[0].value
#If there were multiple parameters get the positional requested
if type(parameter).__name__ == 'Tuple':
#If we asked for a parameter outside of what was passed complain
if pos >= len(parameter.elts):
raise LookupError("The function call did not have a parameter at postion %s" % pos)
parameter = parameter.elts[pos]
#If there was only a single parameter and another was requested raise an exception
elif pos != 0:
raise LookupError("There was only a single calling parameter found. Parameter indices start at 0.")
#If the parameter was the name of a variable we can use it otherwise pass back None
if type(parameter).__name__ == 'Name':
param = parameter.id
finally:
#Remove the frame reference to prevent cyclic references screwing the garbage collector
del thisframe
#Return the parameter name we found
return param

If you want a Key Value Pair relationship, maybe using a Dictionary would be better?
...or if you're trying to create some auto-documentation from your code, perhaps something like Doxygen (http://www.doxygen.nl/) could do the job for you?

I wondered how IceCream solves this problem. So I looked into the source code and came up with the following (slightly simplified) solution. It might not be 100% bullet-proof (e.g. I dropped get_text_with_indentation and I assume exactly one function argument), but it works well for different test cases. It does not need to parse source code itself, so it should be more robust and simpler than previous solutions.
#!/usr/bin/env python3
import inspect
from executing import Source
def func(var):
callFrame = inspect.currentframe().f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
expression = source.asttokens().get_text(callNode.args[0])
print(expression, '=', var)
i = 1
f = 2.0
dct = {'key': 'value'}
obj = type('', (), {'value': 42})
func(i)
func(f)
func(s)
func(dct['key'])
func(obj.value)
Output:
i = 1
f = 2.0
s = string
dct['key'] = value
obj.value = 42
Update: If you want to move the "magic" into a separate function, you simply have to go one frame further back with an additional f_back.
def get_name_of_argument():
callFrame = inspect.currentframe().f_back.f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
return source.asttokens().get_text(callNode.args[0])
def func(var):
print(get_name_of_argument(), '=', var)

If you want to get the caller params as in #Matt Oates answer answer without using the source file (ie from Jupyter Notebook), this code (combined from #Aeon answer) will do the trick (at least in some simple cases):
def get_caller_params():
# get the frame object for this function call
thisframe = inspect.currentframe()
# get the parent calling frames details
frames = inspect.getouterframes(thisframe)
# frame 0 is the frame of this function
# frame 1 is the frame of the caller function (the one we want to inspect)
# frame 2 is the frame of the code that calls the caller
caller_function_name = frames[1][3]
code_that_calls_caller = inspect.findsource(frames[2][0])[0]
# parse code to get nodes of abstract syntact tree of the call
nodes = ast.parse(''.join(code_that_calls_caller))
# find the node that calls the function
i_expr = -1
for (i, node) in enumerate(nodes.body):
if _node_is_our_function_call(node, caller_function_name):
i_expr = i
break
# line with the call start
idx_start = nodes.body[i_expr].lineno - 1
# line with the end of the call
if i_expr < len(nodes.body) - 1:
# next expression marks the end of the call
idx_end = nodes.body[i_expr + 1].lineno - 1
else:
# end of the source marks the end of the call
idx_end = len(code_that_calls_caller)
call_lines = code_that_calls_caller[idx_start:idx_end]
str_func_call = ''.join([line.strip() for line in call_lines])
str_call_params = str_func_call[str_func_call.find('(') + 1:-1]
params = [p.strip() for p in str_call_params.split(',')]
return params
def _node_is_our_function_call(node, our_function_name):
node_is_call = hasattr(node, 'value') and isinstance(node.value, ast.Call)
if not node_is_call:
return False
function_name_correct = hasattr(node.value.func, 'id') and node.value.func.id == our_function_name
return function_name_correct
You can then run it as this:
def test(*par_values):
par_names = get_caller_params()
for name, val in zip(par_names, par_values):
print(name, val)
a = 1
b = 2
string = 'text'
test(a, b,
string
)
to get the desired output:
a 1
b 2
string text

Since you can have multiple variables with the same content, instead of passing the variable (content), it might be safer (and will be simpler) to pass it's name in a string and get the variable content from the locals dictionary in the callers stack frame. :
def displayvar(name):
import sys
return name+" = "+repr(sys._getframe(1).f_locals[name])

If it just so happens that the variable is a callable (function), it will have a __name__ property.
E.g. a wrapper to log the execution time of a function:
def time_it(func, *args, **kwargs):
start = perf_counter()
result = func(*args, **kwargs)
duration = perf_counter() - start
print(f'{func.__name__} ran in {duration * 1000}ms')
return result

How do you obtain the body of a function?

I would need to separate the signature from the body of a function in Python, i.e. :
def mult(a, b):
"Multiplication"
if a or b is None:
# border case
return None
else:
return a * b
(the function is there for demonstration only).
I know that inspect.getsourcelines(mult) will get me the full code, but I would like to have only the body, i.e. less the signature and the docstring:
if a or b is None:
# border case
return None
else:
return a * b
Is there an elegant way to obtain that, ideally using Python3 built-in parsing tools, rather than string manipulation?

As I don't know any function that does this, here is a homemade function that should work.
I don't have time for more in-depth use-cases. So i'll leave my incomplete answer here.
def get_body(func):
""" Using the magic method __doc__, we KNOW the size of the docstring.
We then, just substract this from the total length of the function
"""
try:
lines_to_skip = len(func.__doc__.split('\n'))
except AttributeError:
lines_to_skip = 0
lines = getsourcelines(func)[0]
return ''.join( lines[lines_to_skip+1:] )
def ex_0(a, b):
""" Please !
Don't
Show
This
"""
if a or b is None:
# border case
return None
else:
return a * b
def ex_1(a, b):
''' Please !Don'tShowThis'''
if a or b is None:
# border case
return None
else:
return a * b
def ex_2(a, b):
if a or b is None:
# border case
return None
else:
return a * b
def ex_3(bullshit, hello):
pass
get_body(ex_0)
# if a or b is None:
# # border case
# return None
# else:
# return a * b
get_body(ex_1)
# if a or b is None:
# # border case
# return None
# else:
# return a * b
get_body(ex_2)
# if a or b is None:
# # border case
# return None
# else:
# return a * b
get_body(ex_3)
# pass

you can use this also
i = inspect.getsource(fun_name).index('\n')
j = inspect.getsource(fun_name).rindex(':',0,i)
print(inspect.getsource(fun_name)[j+1:])

If you know your code will always have a non-empty docstring like a above, You can do this with the built in inspect library.
The below snippet should get you acquainted with how to see the source code body of a function.
def hello_world():
"""Python port of original Fortran code"""
print("hello_world")
import inspect
source = inspect.getsource(hello_world)
doc = hello_world.__doc__
code = source.split(doc)
body = code[1]
print(body)
EDIT:
I thought about this some more, you can first remove the defintion line and then strip out the docstring if it exists:
def hello_world():
"""Python port of original Fortran code"""
print("hello_world")
import inspect
source = inspect.getsource(hello_world)
doc = hello_world.__doc__
code = source.split(':',1)
body= code[1].replace(doc, "")
body = body.replace('""""""',"")
print(body)

Thanks a lot for the inspiring answers! I tried to take this further and I found a solution that uses Python's ast facilities.
import inspect, ast
from astunparse import unparse
def mult(a,
b): # comment
"""
This is a 'doctstring', "hello"
"""
if not a is None or b is None:
# border case
return None
else:
return a * b
def get_body(f, doc_string=False):
"""
Get the body text of a function, i.e. without the signature.
NOTE: Comments are stripped.
"""
COMMENT_START = ("'", '"')
code = inspect.getsource(mult)
# print("Function's code:\n", code)
module_tree = ast.parse(code) # this creates a module
# print("Dump:\n", ast.dump(module_tree))
# the first element of the module is evidently the function:
function_body = module_tree.body[0].body
# strip the code lines to remove the additional lines:
lines = [unparse(code_line).strip() for code_line in function_body]
# for i, line in enumerate(lines): print("%s: %s" % (i, line.strip()))
# in principle the notion of body contains the docstring:
if not doc_string:
lines = (line for line in lines if not line.startswith(COMMENT_START))
return '\n'.join(lines).strip()
s =get_body(mult)
print("---------")
print(s)
Here is the result:
$ python3 function_body.py
---------
if ((not (a is None)) or (b is None)):
return None
else:
return (a * b)
You can choose if you want the docstring or not. The downside of this approach (which should not be a downside in my use case) is that comments are stripped.
I also left some commented print statements, in case someone would like to explore the various steps.

Processing Strings from data file containing punctuation coding in python

I am trying to make a simple programme that can help make army lists for a popular tabletop wargame. More as an excercise for my own experience as there are plenty of pre made software packages that do this, but the idea behind it seems fairly straightforward
The programme reads the data for all the units available in an army from a spreadsheet and creates various classes for each unit. The main bit I am looking at now is the options/ upgrades.
In the file I want a straightforward syntax for the option field for each unit. i.e. the following options string itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ would mean
1. you may take itemA (X pts per model)
2. for every 3 models, you may exchange itemB with
a) itemC (net X pts per model)
3. each model may take 2 of itemD (X pts per model)
4. each model may take one of either
a)itemE (X pts per model)
b)itemF (X pts per model)
c)itemG (X pts per model
5. each model may take either
a)itemH (X points per model)
b)itemI and itemJ (X points per model)
At the moment I am processing the string using lots of splits and if statements, that make it very hard to keep track of and assign correctly once the user input their choice.
for index, option in enumerate(self.options):
output = "{}.".format(index+1)
if '-' in option:
sub_option, no_models = option.split('-')
no_models = int(no_models)
print(sub_option)
print(no_models)
output += "For every {} models ".format(no_models)
if '/' in sub_option:
temp_str, temp_options, points_list = exchange_option(sub_option)
else:
temp_str, temp_options, points_list = standard_option(sub_option)
index_points.append(points_list)
temp_options.append(no_models)
index_options.append(temp_options)
else:
if '/' in option:
temp_str, temp_options, points_list = exchange_option(option)
else:
temp_str, temp_options, points_list = standard_option(option)
index_points.append(points_list)
index_options.append(temp_options)
output += temp_str
the *_option() functions are additional helper functions I have defined above which have a similar structure with further if statements within them.
The main question I am asking, is there an easier way to process a code like string such as this? While it works to produce the output in the example above it seems awfully cumbersome to then deal with the user input.
What I am aiming to do is first output the string as given in my example at the top of the question, and then taking the user input index of the given option, modify the associated unit class to have the correct wargear and points value.
I thought about trying to make some kind of options class, but again labelling and defining each option so that they can interact with one another properly seems equally complex, and I feel there must be something more pythonic or just generally better coding practice to processing encoded strings such as this?

So, here's a full blown parser to do that! Now, this only outputs the list as in the previous version of your question, but it shouldn't be too hard to add more features as you want. Also please note that at the moment, the lexer does not error out when a string contains invalid tokens, but that's just a proof-of-concept, so it should be fine.
Part I: the lexer
This tokenises the input string - looks through it from left to right and attempts to classify non-overlapping substrings as instances of tokens. It's to be used before parsing. When given a string, Lexer.tokenize yields a stream of Tokens.
# FILE: lex.py
import re
import enum
class Token:
def __init__(self, type, value: str, lineno: int, pos: int):
self.type, self.value, self.lineno, self.pos = type, value, lineno, pos
def __str__(self):
v = f'({self.value!r})' if self.value else ''
return f'{self.type.name}{v} at {self.lineno}:{self.pos}'
__repr__ = __str__
class Lexer:
def __init__(self, token_types: enum.Enum, tokens_regexes: dict):
self.token_types = token_types
regex = '|'.join(map('(?P<{}>{})'.format, *zip(*((tok.name, regex) for tok, regex in tokens_regexes.items()))))
self.regex = re.compile(regex)
def tokenize(self, string, skip=['space']):
# TODO: detect invalid input
lineno, pos = 0, 0
skip = set(map(self.token_types.__getitem__, skip))
for matchobj in self.regex.finditer(string):
type_name = matchobj.lastgroup
value = matchobj.groupdict()[type_name]
Type = self.token_types[type_name]
if Type == self.token_types.newline: # possibly buggy, but not catastrophic
self.lineno += 1
self.pos = 0
continue
pos = matchobj.end()
if Type not in skip:
yield Token(Type, value, lineno, pos)
yield Token(self.token_types.EOF, '', lineno, pos)
Part II: the parser (with syntax-driven evaluation):
This parses the given stream of tokens provided by lex.Lexer.tokenize and translates individual symbols to English according to the following grammar:
Opt_list -> Option Opt_list_
Opt_list_ -> comma Option Opt_list_ | empty
Option -> Choice | Mult
Choice -> Compound More_choices Exchange
Compound -> item Add_item
Add_item -> plus item Add_item | empty
More_choices -> slash Compound More_choices | empty
Exchange -> minus num | empty
Mult -> num star Compound
The uppercase symbols are nonterminals, the lowercase ones are terminals. There's also a special symbol EOF that's not present here.
Also, take a look at the vital statistics of this grammar. This grammar is LL(1), so we can use an LL(1) recursive descent predictive parser, as shown below.
If you modify the grammar, you should modify the parser accordingly! The methods that do the actual parsing are called parse_<something>, and to change the output of the parser (the Parser.parse function, actually) you should change the return values of these parse_<something> functions.
# FILE: parse.py
import lex
class Parser:
def __init__(self, lexer):
self.string, self.tokens = None, None
self.lexer = lexer
self.t = self.lexer.token_types
self.__lookahead = None
#property
def lookahead(self):
if not self.__lookahead:
try:
self.__lookahead = next(self.tokens)
except StopIteration:
self.__lookahead = lex.Token(self.t.EOF, '', 0, -1)
return self.__lookahead
def next(self):
if self.__lookahead and self.__lookahead.type == self.t.EOF:
return self.__lookahead
self.__lookahead = None
return self.lookahead
def match(self, token_type):
if self.lookahead.type == token_type:
return self.next()
raise SyntaxError(f'Expected {token_type}, got {self.lookahead.type}', ('<string>', self.lookahead.lineno, self.lookahead.pos, self.string))
# THE PARSING STARTS HERE
def parse(self, string):
# setup
self.string = string
self.tokens = self.lexer.tokenize(string)
self.__lookahead = None
self.next()
# do parsing
ret = [''] + self.parse_opt_list()
return ' '.join(ret)
def parse_opt_list(self) -> list:
ret = self.parse_option(1)
ret.extend(self.parse_opt_list_(1))
return ret
def parse_opt_list_(self, curr_opt_number) -> list:
if self.lookahead.type in {self.t.EOF}:
return []
self.match(self.t.comma)
ret = self.parse_option(curr_opt_number + 1)
ret.extend(self.parse_opt_list_(curr_opt_number + 1))
return ret
def parse_option(self, opt_number) -> list:
ret = [f'{opt_number}.']
if self.lookahead.type == self.t.item:
ret.extend(self.parse_choice())
elif self.lookahead.type == self.t.num:
ret.extend(self.parse_mult())
else:
raise SyntaxError(f'Expected {token_type}, got {self.lookahead.type}', ('<string>', self.lookahead.lineno, self.lookahead.pos, self.string))
ret[-1] += '\n'
return ret
def parse_choice(self) -> list:
c = self.parse_compound()
m = self.parse_more_choices()
e = self.parse_exchange()
if not m:
if not e:
ret = f'You may take {" ".join(c)}'
else:
ret = f'for every {e} models you may take item {" ".join(c)}'
elif m:
c.extend(m)
if not e:
ret = f'each model may take one of: {", ".join(c)}'
else:
ret = f'for every {e} models you may exchange the following items with each other: {", ".join(c)}'
else:
ret = 'Semantic error!'
return [ret]
def parse_compound(self) -> list:
ret = [self.lookahead.value]
self.match(self.t.item)
_ret = self.parse_add_item()
return [' '.join(ret + _ret)]
def parse_add_item(self) -> list:
if self.lookahead.type in {self.t.comma, self.t.minus, self.t.slash, self.t.EOF}:
return []
ret = ['with']
self.match(self.t.plus)
ret.append(self.lookahead.value)
self.match(self.t.item)
return ret + self.parse_add_item()
def parse_more_choices(self) -> list:
if self.lookahead.type in {self.t.comma, self.t.minus, self.t.EOF}:
return []
self.match(self.t.slash)
ret = self.parse_compound()
return ret + self.parse_more_choices()
def parse_exchange(self) -> str:
if self.lookahead.type in {self.t.comma, self.t.EOF}:
return ''
self.match(self.t.minus)
ret = self.lookahead.value
self.match(self.t.num)
return ret
def parse_mult(self) -> list:
ret = [f'each model may take {self.lookahead.value} of:']
self.match(self.t.num)
self.match(self.t.star)
return ret + self.parse_compound()
Part III: usage
Here's how to use all of that code:
# FILE: evaluate.py
import enum
from lex import Lexer
from parse import Parser
# these are all the types of tokens present in our grammar
token_types = enum.Enum('Types', 'item num plus minus star slash comma space newline empty EOF')
t = token_types
# these are the regexes that the lexer uses to recognise the tokens
terminals_regexes = {
t.item: r'[a-zA-Z_]\w*',
t.num: '0|[1-9][0-9]*',
t.plus: r'\+',
t.minus: '-',
t.star: r'\*',
t.slash: '/',
t.comma: ',',
t.space: r'[ \t]',
t.newline: r'\n'
}
lexer = Lexer(token_types, terminals_regexes)
parser = Parser(lexer)
string = 'itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ'
print(f'STRING FROM THE QUESTION: {string!r}\nRESULT:')
print(parser.parse(string), '\n\n')
string = input('Enter a command: ')
while string and string.lower() not in {'q', 'quit', 'e', 'exit'}:
try:
print(parser.parse(string))
except SyntaxError as e:
print(f' Syntax error: {e}\n {e.text}\n' + ' ' * (4 + e.offset - 1) + '^\n')
string = input('Enter a command: ')
Example session:
# python3 evaluate.py
STRING FROM THE QUESTION: 'itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ'
RESULT:
1. You may take itemA
2. for every 3 models you may exchange the following items with each other: itemB, itemC
3. each model may take 2 of: itemD
4. each model may take one of: itemE, itemF, itemG
5. each model may take one of: itemH, itemI with itemJ
Enter a command: itemA/b/c/stuff
1. each model may take one of: itemA, b, c, stuff
Enter a command: 4 * anything
1. each model may take 4 of: anything
Enter a command: 5 * anything + more
1. each model may take 5 of: anything with more
Enter a command: a + b + c+ d
1. You may take a with b with c with d
Enter a command: a+b/c
1. each model may take one of: a with b, c
Enter a command: itemA/itemB-2
1. for every 2 models you may exchange the following items with each other: itemA, itemB
Enter a command: itemA+itemB/itemC - 5
1. for every 5 models you may exchange the following items with each other: itemA with itemB, itemC
Enter a command: q

Query a key press using ctypes

I'd like to query each key of a keyboard without using win32api. I have it working using win32api.GetAsyncKeyState(key), but I'd also like to add support for if the module is not installed.
So far I've found one piece of fully working code, though it seems a bit heavyweight as it'd require its own thread, and would need over 1600 separate functions as I want to catch each key no matter of modifiers (there are 14 possible combinations per key).
Here is the code I found, would anyone be able to either suggest an alternative or how to get around the modifier problem?
import ctypes
import ctypes.wintypes
import win32con
class GlobalHotKeys(object):
"""
Register a key using the register() method, or using the #register decorator
Use listen() to start the message pump
Example:
from globalhotkeys import GlobalHotKeys
#GlobalHotKeys.register(GlobalHotKeys.VK_F1)
def hello_world():
print 'Hello World'
GlobalHotKeys.listen()
"""
key_mapping = []
user32 = ctypes.windll.user32
MOD_ALT = win32con.MOD_ALT
MOD_CTRL = win32con.MOD_CONTROL
MOD_CONTROL = win32con.MOD_CONTROL
MOD_SHIFT = win32con.MOD_SHIFT
MOD_WIN = win32con.MOD_WIN
#classmethod
def register(cls, vk, modifier=0, func=None):
"""
vk is a windows virtual key code
- can use ord('X') for A-Z, and 0-1 (note uppercase letter only)
- or win32con.VK_* constants
- for full list of VKs see: http://msdn.microsoft.com/en-us/library/dd375731.aspx
modifier is a win32con.MOD_* constant
func is the function to run. If False then break out of the message loop
"""
# Called as a decorator?
if func is None:
def register_decorator(f):
cls.register(vk, modifier, f)
return f
return register_decorator
else:
cls.key_mapping.append((vk, modifier, func))
#classmethod
def listen(cls):
"""
Start the message pump
"""
for index, (vk, modifiers, func) in enumerate(cls.key_mapping):
if not cls.user32.RegisterHotKey(None, index, modifiers, vk):
raise Exception('Unable to register hot key: ' + str(vk) + ' error code is: ' + str(ctypes.windll.kernel32.GetLastError()))
try:
msg = ctypes.wintypes.MSG()
i = 0
while cls.user32.GetMessageA(ctypes.byref(msg), None, 0, 0) != 0:
if msg.message == win32con.WM_HOTKEY:
(vk, modifiers, func) = cls.key_mapping[msg.wParam]
if not func:
break
func()
cls.user32.TranslateMessage(ctypes.byref(msg))
cls.user32.DispatchMessageA(ctypes.byref(msg))
finally:
for index, (vk, modifiers, func) in enumerate(cls.key_mapping):
cls.user32.UnregisterHotKey(None, index)
#classmethod
def _include_defined_vks(cls):
for item in win32con.__dict__:
item = str(item)
if item[:3] == 'VK_':
setattr(cls, item, win32con.__dict__[item])
#classmethod
def _include_alpha_numeric_vks(cls):
for key_code in (list (range(ord('A'), ord('Z') + 1)) + list(range(ord('0'), ord('9') + 1)) ):
setattr(cls, 'VK_' + chr(key_code), key_code)
GlobalHotKeys._include_defined_vks()
GlobalHotKeys._include_alpha_numeric_vks()
This is an example of how it'd be used to read a:
#GlobalHotKeys.register(ord('A'))
def a():
print 'a'
#GlobalHotKeys.register(ord('A'), GlobalHotKeys.MOD_SHIFT)
def a_shift():
print 'shift + a'
#GlobalHotKeys.register(ord('A'), GlobalHotKeys.MOD_CONTROL | GlobalHotKeys.MOD_SHIFT)
def a_ctrl_shift():
print 'ctrl + shift + a'
...
GlobalHotKeys.listen()

Turned out to be an extremely simple answer, I finally stumbled across it when trying to read the microsoft info for the GetKeyState function.
ctypes.windll.user32.GetKeyState(key)
The state will either be 0 or 1 when not pressed, and increase to something like 60000 when pressed, so to get a True/False result, checking for > 1 seems to do the trick.
GetAsyncKeyState also kinda works, but sometimes results in a negative number, and sometimes doesn't, so I thought it'd be best using the alternative.

Static classes being initialised on import. How does python 2 initialise static classes on import

I am trying to introduce python 3 support for the package mime and the code is doing something I have never seen before.
There is a class Types() that is used in the package as a static class.
class Types(with_metaclass(ItemMeta, object)): # I changed this for 2-3 compatibility
type_variants = defaultdict(list)
extension_index = defaultdict(list)
# __metaclass__ = ItemMeta # unnessecary now
def __init__(self, data_version=None):
self.data_version = data_version
The type_variants defaultdict is what is getting filled in python 2 but not in 3.
It very much seems to be getting filled by this class when is in a different file called mime_types.py.
class MIMETypes(object):
_types = Types(VERSION)
def __repr__(self):
return '<MIMETypes version:%s>' % VERSION
#classmethod
def load_from_file(cls, type_file):
data = open(type_file).read()
data = data.split('\n')
mime_types = Types()
for index, line in enumerate(data):
item = line.strip()
if not item:
continue
try:
ret = TEXT_FORMAT_RE.match(item).groups()
except Exception as e:
__parsing_error(type_file, index, line, e)
(unregistered, obsolete, platform, mediatype, subtype, extensions,
encoding, urls, docs, comment) = ret
if mediatype is None:
if comment is None:
__parsing_error(type_file, index, line, RuntimeError)
continue
extensions = extensions and extensions.split(',') or []
urls = urls and urls.split(',') or []
mime_type = Type('%s/%s' % (mediatype, subtype))
mime_type.extensions = extensions
...
mime_type.url = urls
mime_types.add(mime_type) # instance of Type() is being filled?
return mime_types
The function startup() is being run whenever mime_types.py is imported and it does this.
def startup():
global STARTUP
if STARTUP:
type_files = glob(join(DIR, 'types', '*'))
type_files.sort()
for type_file in type_files:
MIMETypes.load_from_file(type_file) # class method is filling Types?
STARTUP = False
This all seems pretty weird to me. The MIMETypes class first creates an instance of Types() on the first line. _types = Types(VERSION). It then seems to do nothing with this instance and only use the mime_types instance created in the load_from_file() class method. mime_types = Types().
This sort of thing vaguely reminds me of javascript class construction. How is the instance mime_types filling Types.type_variants so that when it is imported like this.
from mime import Type, Types
The class's type_variants defaultdict can be used. And why isn't this working in python 3?
EDIT:
Adding extra code to show how type_variants is filled
(In "Types" Class)
#classmethod
def add_type_variant(cls, mime_type):
cls.type_veriants[mime_type.simplified].append(mime_type)
#classmethod
def add(cls, *types):
for mime_type in types:
if isinstance(mime_type, Types):
cls.add(*mime_type.defined_types())
else:
mts = cls.type_veriants.get(mime_type.simplified)
if mts and mime_type in mts:
Warning('Type %s already registered as a variant of %s.',
mime_type, mime_type.simplified)
cls.add_type_variant(mime_type)
cls.index_extensions(mime_type)
You can see that MIMETypes uses the add() classmethod.

Without posting more of your code, it's hard to say. I will say that I was able to get that package ported to Python 3 with only a few changes (print statement -> function, basestring -> str, adding a dot before same-package imports, and a really ugly hack to compensate for their love of cmp:
def cmp(x,y):
if isinstance(x, Type): return x.__cmp__(y)
if isinstance(y, Type): return y.__cmp__(x) * -1
return 0 if x == y else (1 if x > y else -1)
Note, I'm not even sure this is correct.
Then
import mime
print(mime.Types.type_veriants) # sic
printed out a 1590 entry defaultdict.
Regarding your question about MIMETypes._types not being used, I agree, it's not.
Regarding your question about how the dictionary is being populated, it's quite simple, and you've identified most of it.
import mime
Imports the package's __init__.py which contains the line:
from .mime_types import MIMETypes, VERSION
And mime_types.py includes the lines:
def startup():
global STARTUP
if STARTUP:
type_files = glob(join(DIR, 'types', '*'))
type_files.sort()
for type_file in type_files:
MIMETypes.load_from_file(type_file)
STARTUP = False
startup()
And MIMETypes.load_from_file() has the lines:
mime_types = Types()
#...
for ... in ...:
mime_types.add(mime_type)
And Types.add(): has the line:
cls.add_type_variant(mime_type)
And that classmethod contains:
cls.type_veriants[mime_type.simplified].append(mime_type)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pygments in QScintilla - python

Related

Is there a way to give a function access to the (external-scope) name of the variable being passed in? [duplicate]

How do you obtain the body of a function?

Processing Strings from data file containing punctuation coding in python

Query a key press using ctypes

Static classes being initialised on import. How does python 2 initialise static classes on import

Categories

Resources