SQLAlchemy window frames as time interval - python

Is there any way in SQLAlchemy to specify window frame as time interval like this?
OVER(
PARTITION BY some_col
ORDER BY other_date_type_col
RANGE BETWEEN '30 days'::INTERVAL PRECEDING AND CURRENT ROW
)
There's a method sqlalchemy.sql.functions.FunctionElement.over(partition_by=None, order_by=None, rows=None, range_=None) in their docs. By it takes only numeric data as range_.

As of SQLAlchemy 1.4.25, there is no built-in support.
Workaround
Implement a str subclass that overrides __abs__ and __lt__.
class RangeDays(str):
def __new__(cls, x):
obj = super().__new__(cls, f"{abs(x)} day" if abs(x) == 1 else f"{abs(x)} days")
obj.x = x
return obj
def __abs__(self):
# abs(range_[0]) called in SQLCompiler._format_frame_clause
return self
def __lt__(self, other):
# range_[0] < 0 called in SQLCompiler._format_frame_clause
return self.x.__lt__(other)
Patch Over._interpret_range to handle RangeDays.
from sqlalchemy.sql.elements import Over
_old_interpret_range = Over._interpret_range
def _interpret_range(self, range_):
lower, lower_ = (None, range_[0]) if isinstance(range_[0], RangeDays) else (range_[0], None)
upper, upper_ = (None, range_[1]) if isinstance(range_[1], RangeDays) else (range_[1], None)
lower, upper = _old_interpret_range(self, (lower, upper))
return lower_ or lower, upper_ or upper
Over._interpret_range = _interpret_range
Usage:
# '30 days' PRECEDING AND CURRENT ROW
range_=(RangeDays(-30), 0)
# '1 day' PRECEDING AND '10 days' FOLLOWING
range_=(RangeDays(-1), RangeDays(10))

Related

Objects passing objects

I'm new to python and am currently trying to use an old module to output graphs. The code below is a excerpt from the module that uses rpy to design
standard celeration charts (don't look it up).
I'm having trouble understanding how the class Element and class Vector work together.
I've been trying to pass the a element object to the vector get_elements but I'm not sure if that's what I should be doing.
Any help would be appreciated. Thanks!
class Element(object):
"""Base class for Chartshare vector elements."""
def __init__(self, offset=0, value=0):
self.offset=offset
self.value=value
self.text=''
def setText(self, value):
self.value=value
def getText(self):
return self.value
text = property(getText, setText)
class Vector(object):
"""Base class for Chartshare Vectors."""
def __init__(self, name='', color='black', linetype='o', symbol=1, clutter=0, start=0, end=140, continuous=False, debug=False):
self.name=name
self.color=color
self.linetype=linetype
self.symbol=symbol
self.start=start
self.end=end
self.elements={}
self.debug=debug
self.continuous=continuous
if not self.continuous:
for i in range(self.start, self.end+1):
self.elements[i]='NaN'
def getSymbol(self):
return self._symbol
def setSymbol(self, value):
if (type(value) == int):
if (value >= 0) and (value <= 18):
self._symbol = value
else:
raise SymbolOutOfRange, "Symbol should be an integer between 0 and 18."
elif (type(value) == str):
try:
self._symbol = value[0]
except IndexError:
self._symbol=1
else:
self._symbol = 1
symbol = property(getSymbol, setSymbol)
def getLinetype(self):
return self._linetype
def setLinetype(self, value):
if (value == 'p') or (value == 'o') or (value == 'l'):
self._linetype = value
else:
raise InvalidLinetype, "Line type should be 'o', 'p', or 'l'"
linetype = property(getLinetype, setLinetype)
def get_elements(self):
"""Returns a list with the elements of a Vector."""
retval = []
for i in range(self.start, self.end+1):
if (not self.continuous):
retval.append(self.elements[i])
else:
if (self.elements[i] != 'NaN'):
retval.append(self.elements[i])
return retval
def get_offsets(self):
"""Returns a list of the offsets of a Vector."""
retval = []
for i in range(self.start, self.end+1):
if (not self.continuous):
retval.append(i)
else:
if (self.elements[i] == 'NaN'):
retval.append(i)
return retval
def to_xml(self, container=False):
"""Returns an xml representation of the Vector."""
if (container == False):
container = StringIO.StringIO()
xml = XMLGenerator(container)
attrs = {}
attrs[u'name'] = u"%s" % self.name
attrs[u'symbol'] = u"%s" % self.symbol
attrs[u'linetype'] = u"%s" % self.linetype
attrs[u'color'] = u"%s" % self.color
xml.startElement(u'vector', attrs)
for i in range(self.start, self.end+1):
if (self.elements[i] != 'NaN'):
attrs.clear()
attrs[u'offset'] = u"%s" % i
xml.startElement(u'element', attrs)
xml.characters(u"%s" % self.elements[i])
xml.endElement(u'element')
xml.endElement(u'vector')
def render(self):
"""Plots the current vector."""
if (self.debug):
print "Rendering Vector: %s" % self.name
print self.elements
r.points(x=range(self.start, self.end+1),
y=self.elements,
col=self.color,
type=self.linetype,
pch=self.symbol)
if (self.debug):
print "Finished rendering Vector: %s" % self.name
Vector's get_elements() doesn't take any arguments. Well, technically it does. It takes self. self is syntactic sugar that lets you do this:
vec = Vector()
vec.get_elements()
It's equivalent to this:
vec = Vector()
Vector.get_elements(vec)
Since get_elements() doesn't take any arguments, you can't pass a to it. Skimming the code, I don't see a set_elements() analog. This means you'll have to modify the vector's element's dictionary directly.
vec = Vector()
vec.elements[a] = ...
print(vec.get_elements()) # >>> [a,...]
As I can see, there is no place in this code where you are assigning self.elements with any input from a function. You are only initialising it or obtaining values
Also note that the .get_elements() function doesn't have any arguments (only self, that is the object where you are calling it in), so of course it won't work.
Unless you can do something such as the following, we would need more code to understand how to manipulate and connect these two objects.
element_obj = Element()
vector_obj = Vector()
position = 4
vector_obj.elements[4] = element_obj
I got to this answer with the following: as I can see, the elements property in the Vector class is a dictonary, that when you call vector_obj.get_elements() is casted to an array using the start and end parameters as delimiters.
Unless there is something else missing, this would be the only way I could think out of adding the an element into a vector object. Otheriwse, we would need some more code or context to understand how these classes behave with each other!
Hope it helps!

Python class condition: getting an [x,x] output from an x input?

I'm playing around with some interval calculations in Python. This is an excerpt of what I've written. I want to include degenerate intervals i.e. the interval for the real number 1 is [1,1].
So when I type Interval(1), I want [1,1] returned. But I've defined my interval class in terms of two parameters.
I can't make a subclass - it would still expect two parameters. Can anyone point me in the right direction please? Could I extend the __contains__ in some sense?
TLDR: How can I get an [x,x] output from an x input?
from numpy import *
import numpy as np
from pylab import *
class Interval:
def __init__(self, LeftEndpoint, RightEndpoint):
self.min = LeftEndpoint
self.max = RightEndpoint
if LeftEndpoint > RightEndpoint:
raise ValueError('LeftEndpoint must not be greater than RightEndpoint')
def __repr__(self): #TASK 3
return '[{},{}]'.format(self.min,self.max)
def __contains__(self, num):
if num < self.min:
raise Exception('The number is not in the given interval')
if num > self.max:
raise Exception('The number is not in the given interval')
p = Interval(1,2)
print(p) #returns [1,2]
Just give RightEndpoint a default value, like None; if it is still None in the function, you know no value was assigned to it and thus can be set to the same value as LeftEndpoint:
def __init__(self, LeftEndpoint, RightEndpoint=None):
if RightEndpoint is None:
RightEndpoint = LeftEndpoint
Note: if you follow the Python styleguide, PEP 8, parameter and local variable names should be all lowercase_with_underscores. And the __contains__ method should really return True or False, not raise an exception:
class Interval:
def __init__(self, left, right=None):
if right is None:
right = left
if left > right:
raise ValueError(
'left must not be greater than right')
self.left = left
self.right = right
def __repr__(self):
return f'[{self.left}, {self.right}]'
def __contains__(self, num):
return self.left <= num <= self.right
Note that the __contains__ method can now be expressed as a single test; either num is in the (inclusive) range of left to right, or it is not.
Demo:
>>> Interval(1, 2)
[1, 2]
>>> Interval(1)
[1, 1]
>>> 11 in Interval(42, 81)
False
>>> 55 in Interval(42, 81)
True
You can use *args to pass in a variable number of arguments, then just assign RightEndpoint dynamically:
def __init__(self, *args):
self.min = args[0]
if len(args) < 2:
RightEndpoint = args[0]
elif len(args) == 2:
RightEndpoint = args[1]
else:
# decide what to do with more than 2 inputs
self.max = RightEndpoint
#...
p = Interval(1,2)
print(p) #returns [1,2]
p1 = Interval(1)
print(p1) #returns [1,1]
(Alternately you can use **kwargs if you want to use keyword arguments instead.)

Python: Why is my generator based range is X2 slower than xrange?

Just of curiosity, I've written 3 tests in Python and timed them out using timeit:
import timeit
# simple range based on generator
def my_range(start, stop):
i = start
while (i < stop):
yield i
i += 1
# test regular range
def test_range():
x = range(1, 100000)
sum = 0
for i in x:
sum += i
# test xrange
def test_xrange():
x = xrange(1, 100000)
sum = 0
for i in x:
sum += i
# test my range
def test_my_range():
x = my_range(1, 100000)
sum = 0
for i in x:
sum += i
print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)
And I've got these benchmarks:
regular range based test - 0.616795163262
xrange based test - 0.537716731096
my_range (generator) based test - **1.27872886337**
My range was X2 slower even than a range that creates a list. Why?
Are xrange() / range() implemented using C directly?
Are they implemented without condition check?
Thanks!
I feel that the simple answer is that xrange() is builtin and written in C.
I added another case to your test (see below): A pure-Python reference implementation of xrange() based on the CPython source.
import timeit
from collections import Sequence, Iterator
from math import ceil
# simple range based on generator
def my_range(start, stop):
i = start
while (i < stop):
yield i
i += 1
# test regular range
def test_range():
x = range(1, 100000)
sum = 0
for i in x:
sum += i
# test xrange
def test_xrange():
x = xrange(1, 100000)
sum = 0
for i in x:
sum += i
# test my range
def test_my_range():
x = my_range(1, 100000)
sum = 0
for i in x:
sum += i
class pure_python_xrange(Sequence):
"""Pure-Python implementation of an ``xrange`` (aka ``range``
in Python 3) object. See `the CPython documentation
<http://docs.python.org/py3k/library/functions.html#range>`_
for details.
"""
def __init__(self, *args):
if len(args) == 1:
start, stop, step = 0, args[0], 1
elif len(args) == 2:
start, stop, step = args[0], args[1], 1
elif len(args) == 3:
start, stop, step = args
else:
raise TypeError('pure_python_xrange() requires 1-3 int arguments')
try:
start, stop, step = int(start), int(stop), int(step)
except ValueError:
raise TypeError('an integer is required')
if step == 0:
raise ValueError('pure_python_xrange() arg 3 must not be zero')
elif step < 0:
stop = min(stop, start)
else:
stop = max(stop, start)
self._start = start
self._stop = stop
self._step = step
self._len = (stop - start) // step + bool((stop - start) % step)
def __repr__(self):
if self._start == 0 and self._step == 1:
return 'pure_python_xrange(%d)' % self._stop
elif self._step == 1:
return 'pure_python_xrange(%d, %d)' % (self._start, self._stop)
return 'pure_python_xrange(%d, %d, %d)' % (self._start, self._stop, self._step)
def __eq__(self, other):
return isinstance(other, xrange) and \
self._start == other._start and \
self._stop == other._stop and \
self._step == other._step
def __len__(self):
return self._len
def index(self, value):
"""Return the 0-based position of integer `value` in
the sequence this xrange represents."""
diff = value - self._start
quotient, remainder = divmod(diff, self._step)
if remainder == 0 and 0 <= quotient < self._len:
return abs(quotient)
raise ValueError('%r is not in range' % value)
def count(self, value):
"""Return the number of ocurrences of integer `value`
in the sequence this xrange represents."""
# a value can occur exactly zero or one times
return int(value in self)
def __contains__(self, value):
"""Return ``True`` if the integer `value` occurs in
the sequence this xrange represents."""
try:
self.index(value)
return True
except ValueError:
return False
def __reversed__(self):
"""Return an xrange which represents a sequence whose
contents are the same as the sequence this xrange
represents, but in the opposite order."""
sign = self._step / abs(self._step)
last = self._start + ((self._len - 1) * self._step)
return pure_python_xrange(last, self._start - sign, -1 * self._step)
def __getitem__(self, index):
"""Return the element at position ``index`` in the sequence
this xrange represents, or raise :class:`IndexError` if the
position is out of range."""
if isinstance(index, slice):
return self.__getitem_slice(index)
if index < 0:
# negative indexes access from the end
index = self._len + index
if index < 0 or index >= self._len:
raise IndexError('xrange object index out of range')
return self._start + index * self._step
def __getitem_slice(self, slce):
"""Return an xrange which represents the requested slce
of the sequence represented by this xrange.
"""
start, stop, step = slce.start, slce.stop, slce.step
if step == 0:
raise ValueError('slice step cannot be 0')
start = start or self._start
stop = stop or self._stop
if start < 0:
start = max(0, start + self._len)
if stop < 0:
stop = max(start, stop + self._len)
if step is None or step > 0:
return pure_python_xrange(start, stop, step or 1)
else:
rv = reversed(self)
rv._step = step
return rv
def __iter__(self):
"""Return an iterator which enumerates the elements of the
sequence this xrange represents."""
return xrangeiterator(self)
class xrangeiterator(Iterator):
"""An iterator for an :class:`xrange`.
"""
def __init__(self, xrangeobj):
self._xrange = xrangeobj
# Intialize the "last outputted value" to the value
# just before the first value; this simplifies next()
self._last = self._xrange._start - self._xrange._step
self._count = 0
def __iter__(self):
"""An iterator is already an iterator, so return ``self``.
"""
return self
def next(self):
"""Return the next element in the sequence represented
by the xrange we are iterating, or raise StopIteration
if we have passed the end of the sequence."""
self._last += self._xrange._step
self._count += 1
if self._count > self._xrange._len:
raise StopIteration()
return self._last
# test xrange
def test_pure_python_xrange():
x = pure_python_xrange(1, 100000)
sum = 0
for i in x:
sum += i
print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)
print timeit.timeit("test_pure_python_xrange()", setup = "from __main__ import test_pure_python_xrange", number = 100)
The results?
$ python so.py
0.426695823669
0.371111869812
0.964643001556
6.06390094757
This is simply the difference between interpreted Python code and C. Additionally, as #byels mentioned above, xrange() is limited to short integers, which likely has positive effect.
This is an interesting test. Looking at the python 2 docs on xrange, one guess that comes to mind is that xrange is alowed to take advantage of type restrictions (only uses "short" integers)

Why does the print statement at the bottom of my main method not print anything?

I'm working on the MIT open courseware for CS-600 and I can't figure out why the last print statement isn't printing anything. Here's the code I wrote:
#!/usr/bin/env python
# encoding: utf-8
# 6.00 Problem Set 9
#
# Name:
# Collaborators:
# Time:
from string import *
class Shape(object):
def area(self):
raise AttributeException("Subclasses should override this method.")
class Square(Shape):
def __init__(self, h):
"""
h: length of side of the square
"""
self.side = float(h)
def area(self):
"""
Returns area of the square
"""
return self.side**2
def __str__(self):
return 'Square with side ' + str(self.side)
def __eq__(self, other):
"""
Two squares are equal if they have the same dimension.
other: object to check for equality
"""
return type(other) == Square and self.side == other.side
class Circle(Shape):
def __init__(self, radius):
"""
radius: radius of the circle
"""
self.radius = float(radius)
def area(self):
"""
Returns approximate area of the circle
"""
return 3.14159*(self.radius**2)
def __str__(self):
return 'Circle with radius ' + str(self.radius)
def __eq__(self, other):
"""
Two circles are equal if they have the same radius.
other: object to check for equality
"""
return type(other) == Circle and self.radius == other.radius
#
# Problem 1: Create the Triangle class
#
## TO DO: Implement the `Triangle` class, which also extends `Shape`.
class Triangle(Shape):
def __init__(self, base, height):
self.base = float(base)
self.height = float(height)
def area(self):
return self.base*self.height/2
def __str__(self):
return 'Triangle with base ' + str(self.base) + 'and height ' + str(self.height)
def __eq__(self, other):
return type(other) == Triangle and self.base == other.base and self.height == other.height
#
# Problem 2: Create the ShapeSet class
#
## TO DO: Fill in the following code skeleton according to the
## specifications.
class ShapeSet(object):
def __init__(self):
"""
Initialize any needed variables
"""
self.allCircles = []
self.allSquares = []
self.allTriangles = []
self.allShapes = self.allCircles + self.allSquares + self.allTriangles
self.place = None
def addShape(self, sh):
"""
Add shape sh to the set; no two shapes in the set may be
identical
sh: shape to be added
"""
if not isinstance(sh, Shape): raise TypeError('not a shape')
if isinstance(sh, Square):
for sq in self.allSquares:
if sh == sq:
raise ValueError('shape already in the set')
self.allSquares.append(sh)
if isinstance(sh, Triangle):
for tri in self.allTriangles:
if sh == tri:
raise ValueError('shape already in the set')
self.allTriangles.append(sh)
if isinstance(sh, Circle):
for circ in self.allCircles:
if sh == circ:
raise ValueError('shape already in the set')
self.allCircles.append(sh)
def __iter__(self):
"""
Return an iterator that allows you to iterate over the set of
shapes, one shape at a time
"""
self.place = 0
return self
def next(self):
if self.place >= len(self.allShapes):
raise StopIteration
self.place += 1
return self.allShapes[self.place - 1]
def __str__(self):
"""
Return the string representation for a set, which consists of
the string representation of each shape, categorized by type
(circles, then squares, then triangles)
"""
shapeList = ""
for item in self.allShapes:
shapeList += item.get__str__ + "br/"
return shapeList
#
# Problem 3: Find the largest shapes in a ShapeSet
#
def findLargest(shapes):
"""
Returns a tuple containing the elements of ShapeSet with the
largest area.
shapes: ShapeSet
"""
## TO DO
#
# Problem 4: Read shapes from a file into a ShapeSet
#
def readShapesFromFile(filename):
"""
Retrieves shape information from the given file.
Creates and returns a ShapeSet with the shapes found.
filename: string
"""
## TO DO
def main():
sq1 = Square(4.0)
sq2 = Square(5.0)
sq3 = Square(3.0)
circ1 = Circle(3.0)
circ2 = Circle(3.2)
tri1 = Triangle(3.0, 4.0)
tri2 = Triangle(4.0, 3.0)
tri3 = Triangle(1.0, 1.0)
thisSet = ShapeSet()
thisSet.addShape(sq1)
thisSet.addShape(sq2)
thisSet.addShape(sq3)
thisSet.addShape(circ1)
thisSet.addShape(circ2)
thisSet.addShape(tri1)
thisSet.addShape(tri2)
thisSet.addShape(tri3)
print thisSet
if __name__ == '__main__':
main()
This line:
self.allShapes = self.allCircles + self.allSquares + self.allTriangles
doesn't do what you think it does. It sets allShapes to an empty list, and then as you add shapes later, nothing updates allShapes.
Then your __str__ function just loops over allShapes, which is still empty, so your __str__ returns an empty string.
This line makes allShapes an empty list:
self.allShapes = self.allCircles + self.allSquares + self.allTriangles
If you modify allCircles, that doesn't affect allShapes. I would personally eliminate allShapes, and in the str method, add them at the last possible second:
for item in self.allCircles + self.allSquares + self.allTriangles:
The problem is here:
self.allShapes = self.allCircles + self.allSquares + self.allTriangles
When you concatenate lists like this, the result is a copy of the component lists. So when those lists are changed later, the concatenated list isn't changed. In this case, self.allCircles, etc. are all empty. So self.allShapes is an empty list too; the for loop in ShapeSet.__str__ doesn't append anything to ShapeList, and so the result is an empty string.
One simple way to fix this would be to make allShapes a method that you call, and that returns a new concatenation of self.allCircles... etc. each time it's called. That way, allShapes is always up-to-date.
If this is your actual code, then it must be because of
item.get__str__
which should raise an exception.
Edit: as others have noted, this isn't the actual problem, but I leave this here as a hint for further progress. Mind you, it's considered bad style ("unpythonic") to call x.__str__() directly, as you probably intended. Call str(x) instead, even in the implementation of __str__.
You assign allShapes to be the value of self.allCircles + self.allSquares + self.allTriangles at the start in your init method (when the other lists are empty).
It's value is then never changed, so it remains empty.
You need this in addShape:
self.allShapes.append(sh)

What is the best way to sort items on one of the items values

I have two instances of an object in a list
class Thing():
timeTo = 0
timeFrom = 0
name = ""
o1 = Thing()
o1.name = "One"
o1.timeFrom = 2
o2 = Thing()
o2.timeTo = 20
o2.name = "Two"
myList = [o1, o2]
biggestIndex = (myList[0].timeFrom < myList[1].timeTo) & 1
bigger = myList.pop(biggestIndex)
lesser = myList.pop()
print bigger.name
print lesser.name
both o1 and o2 have two properties that I want to compare the first in the lists timeFrom property and the second ones timeTo property to eachother.
I feel this is a bit awkward and wierd, is there perhaps a better and more readable approach to this?
The best solution is to make Thing instances sortable. You do this by implementing __lt__:
class Thing():
timeTo = 0
timeFrom = 0
name = ""
def __lt__(self, other):
return self.timeFrom < other.timeTo
lesser, bigger = sorted(myList)
Python2 has lesser, bigger = sorted(myList, cmp=lambda one,other: one.timeFrom < other.timeTo).
In Python3 cmp is gone, I guess to force people to do (or learn) OOP and write a adapter.
class SortAdaper(object):
def __init__(self, obj ):
self.obj = obj
class TimeLineSorter(SortAdaper):
""" sorts in a timeline """
def __lt__(self, other):
return self.obj.timeFrom < other.obj.timeTo
class NameSorter(SortAdaper):
""" sorts by name """
def __lt__(self, other):
return self.obj.name < other.obj.name
print sorted( myList, key=TimeLineSorter)
print sorted( myList, key=NameSorter)
see attrgetter
import operator
getter = operator.attrgetter('timeFrom')
bigger = max(myList, key=getter)
lesser = min(myList, key=getter)
print bigger.name
print lesser.name
EDIT :
attrgetter also wokrs with sorted or anywhere a key function is needed.
lesser, bigger = sorted(myList, key=getter)
I would do this, if object can have only one of the time values:
class Thing(object):
def __init__(self, name, time = 0, timename = 'to'):
self.name, self.time, self.timename = (name,time,timename)
def __repr__(self):
return "Thing(%r, %i, %r)" % (self.name, self.time, self.timename)
def __lt__(self, other):
return self.time < other.time
o1 = Thing("One", 5, 'from')
o2 = Thing("Two", 20, 'to')
myList = [o1, o2]
print myList
print max(myList)
print min(myList)

Categories