Is there a range() equivalent for floats in Python?
>>> range(0.5,5,1.5)
[0, 1, 2, 3, 4]
>>> range(0.5,5,0.5)
Traceback (most recent call last):
File "<pyshell#10>", line 1, in <module>
range(0.5,5,0.5)
ValueError: range() step argument must not be zero
You can either use:
[x / 10.0 for x in range(5, 50, 15)]
or use lambda / map:
map(lambda x: x/10.0, range(5, 50, 15))
I don't know a built-in function, but writing one like [this](https://stackoverflow.com/a/477610/623735) shouldn't be too complicated.
def frange(x, y, jump):
while x < y:
yield x
x += jump
---
As the comments mention, this could produce unpredictable results like:
>>> list(frange(0, 100, 0.1))[-1]
99.9999999999986
To get the expected result, you can use one of the other answers in this question, or as #Tadhg mentioned, you can use decimal.Decimal as the jump argument. Make sure to initialize it with a string rather than a float.
>>> import decimal
>>> list(frange(0, 100, decimal.Decimal('0.1')))[-1]
Decimal('99.9')
Or even:
import decimal
def drange(x, y, jump):
while x < y:
yield float(x)
x += decimal.Decimal(jump)
And then:
>>> list(drange(0, 100, '0.1'))[-1]
99.9
[editor's not: if you only use positive jump and integer start and stop (x and y) , this works fine. For a more general solution see here.]
I used to use numpy.arange but had some complications controlling the number of elements it returns, due to floating point errors. So now I use linspace, e.g.:
>>> import numpy
>>> numpy.linspace(0, 10, num=4)
array([ 0. , 3.33333333, 6.66666667, 10. ])
Pylab has frange (a wrapper, actually, for matplotlib.mlab.frange):
>>> import pylab as pl
>>> pl.frange(0.5,5,0.5)
array([ 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])
Eagerly evaluated (2.x range):
[x * .5 for x in range(10)]
Lazily evaluated (2.x xrange, 3.x range):
itertools.imap(lambda x: x * .5, xrange(10)) # or range(10) as appropriate
Alternately:
itertools.islice(itertools.imap(lambda x: x * .5, itertools.count()), 10)
# without applying the `islice`, we get an infinite stream of half-integers.
using itertools: lazily evaluated floating point range:
>>> from itertools import count, takewhile
>>> def frange(start, stop, step):
return takewhile(lambda x: x< stop, count(start, step))
>>> list(frange(0.5, 5, 1.5))
# [0.5, 2.0, 3.5]
I helped add the function numeric_range to the package more-itertools.
more_itertools.numeric_range(start, stop, step) acts like the built in function range but can handle floats, Decimal, and Fraction types.
>>> from more_itertools import numeric_range
>>> tuple(numeric_range(.1, 5, 1))
(0.1, 1.1, 2.1, 3.1, 4.1)
There is no such built-in function, but you can use the following (Python 3 code) to do the job as safe as Python allows you to.
from fractions import Fraction
def frange(start, stop, jump, end=False, via_str=False):
"""
Equivalent of Python 3 range for decimal numbers.
Notice that, because of arithmetic errors, it is safest to
pass the arguments as strings, so they can be interpreted to exact fractions.
>>> assert Fraction('1.1') - Fraction(11, 10) == 0.0
>>> assert Fraction( 0.1 ) - Fraction(1, 10) == Fraction(1, 180143985094819840)
Parameter `via_str` can be set to True to transform inputs in strings and then to fractions.
When inputs are all non-periodic (in base 10), even if decimal, this method is safe as long
as approximation happens beyond the decimal digits that Python uses for printing.
For example, in the case of 0.1, this is the case:
>>> assert str(0.1) == '0.1'
>>> assert '%.50f' % 0.1 == '0.10000000000000000555111512312578270211815834045410'
If you are not sure whether your decimal inputs all have this property, you are better off
passing them as strings. String representations can be in integer, decimal, exponential or
even fraction notation.
>>> assert list(frange(1, 100.0, '0.1', end=True))[-1] == 100.0
>>> assert list(frange(1.0, '100', '1/10', end=True))[-1] == 100.0
>>> assert list(frange('1', '100.0', '.1', end=True))[-1] == 100.0
>>> assert list(frange('1.0', 100, '1e-1', end=True))[-1] == 100.0
>>> assert list(frange(1, 100.0, 0.1, end=True))[-1] != 100.0
>>> assert list(frange(1, 100.0, 0.1, end=True, via_str=True))[-1] == 100.0
"""
if via_str:
start = str(start)
stop = str(stop)
jump = str(jump)
start = Fraction(start)
stop = Fraction(stop)
jump = Fraction(jump)
while start < stop:
yield float(start)
start += jump
if end and start == stop:
yield(float(start))
You can verify all of it by running a few assertions:
assert Fraction('1.1') - Fraction(11, 10) == 0.0
assert Fraction( 0.1 ) - Fraction(1, 10) == Fraction(1, 180143985094819840)
assert str(0.1) == '0.1'
assert '%.50f' % 0.1 == '0.10000000000000000555111512312578270211815834045410'
assert list(frange(1, 100.0, '0.1', end=True))[-1] == 100.0
assert list(frange(1.0, '100', '1/10', end=True))[-1] == 100.0
assert list(frange('1', '100.0', '.1', end=True))[-1] == 100.0
assert list(frange('1.0', 100, '1e-1', end=True))[-1] == 100.0
assert list(frange(1, 100.0, 0.1, end=True))[-1] != 100.0
assert list(frange(1, 100.0, 0.1, end=True, via_str=True))[-1] == 100.0
assert list(frange(2, 3, '1/6', end=True))[-1] == 3.0
assert list(frange(0, 100, '1/3', end=True))[-1] == 100.0
Code available on GitHub
As kichik wrote, this shouldn't be too complicated. However this code:
def frange(x, y, jump):
while x < y:
yield x
x += jump
Is inappropriate because of the cumulative effect of errors when working with floats.
That is why you receive something like:
>>>list(frange(0, 100, 0.1))[-1]
99.9999999999986
While the expected behavior would be:
>>>list(frange(0, 100, 0.1))[-1]
99.9
Solution 1
The cumulative error can simply be reduced by using an index variable. Here's the example:
from math import ceil
def frange2(start, stop, step):
n_items = int(ceil((stop - start) / step))
return (start + i*step for i in range(n_items))
This example works as expected.
Solution 2
No nested functions. Only a while and a counter variable:
def frange3(start, stop, step):
res, n = start, 1
while res < stop:
yield res
res = start + n * step
n += 1
This function will work well too, except for the cases when you want the reversed range. E.g:
>>>list(frange3(1, 0, -.1))
[]
Solution 1 in this case will work as expected. To make this function work in such situations, you must apply a hack, similar to the following:
from operator import gt, lt
def frange3(start, stop, step):
res, n = start, 0.
predicate = lt if start < stop else gt
while predicate(res, stop):
yield res
res = start + n * step
n += 1
With this hack you can use these functions with negative steps:
>>>list(frange3(1, 0, -.1))
[1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.3999999999999999, 0.29999999999999993, 0.19999999999999996, 0.09999999999999998]
Solution 3
You can go even further with plain standard library and compose a range function for the most of numeric types:
from itertools import count
from itertools import takewhile
def any_range(start, stop, step):
start = type(start + step)(start)
return takewhile(lambda n: n < stop, count(start, step))
This generator is adapted from the Fluent Python book (Chapter 14. Iterables, Iterators and generators). It will not work with decreasing ranges. You must apply a hack, like in the previous solution.
You can use this generator as follows, for example:
>>>list(any_range(Fraction(2, 1), Fraction(100, 1), Fraction(1, 3)))[-1]
299/3
>>>list(any_range(Decimal('2.'), Decimal('4.'), Decimal('.3')))
[Decimal('2'), Decimal('2.3'), Decimal('2.6'), Decimal('2.9'), Decimal('3.2'), Decimal('3.5'), Decimal('3.8')]
And of course you can use it with float and int as well.
Be careful
If you want to use these functions with negative steps, you should add a check for the step sign, e.g.:
no_proceed = (start < stop and step < 0) or (start > stop and step > 0)
if no_proceed: raise StopIteration
The best option here is to raise StopIteration, if you want to mimic the range function itself.
Mimic range
If you would like to mimic the range function interface, you can provide some argument checks:
def any_range2(*args):
if len(args) == 1:
start, stop, step = 0, args[0], 1.
elif len(args) == 2:
start, stop, step = args[0], args[1], 1.
elif len(args) == 3:
start, stop, step = args
else:
raise TypeError('any_range2() requires 1-3 numeric arguments')
# here you can check for isinstance numbers.Real or use more specific ABC or whatever ...
start = type(start + step)(start)
return takewhile(lambda n: n < stop, count(start, step))
I think, you've got the point. You can go with any of these functions (except the very first one) and all you need for them is python standard library.
Why Is There No Floating Point Range Implementation In The Standard Library?
As made clear by all the posts here, there is no floating point version of range(). That said, the omission makes sense if we consider that the range() function is often used as an index (and of course, that means an accessor) generator. So, when we call range(0,40), we're in effect saying we want 40 values starting at 0, up to 40, but non-inclusive of 40 itself.
When we consider that index generation is as much about the number of indices as it is their values, the use of a float implementation of range() in the standard library makes less sense. For example, if we called the function frange(0, 10, 0.25), we would expect both 0 and 10 to be included, but that would yield a generator with 41 values, not the 40 one might expect from 10/0.25.
Thus, depending on its use, an frange() function will always exhibit counter intuitive behavior; it either has too many values as perceived from the indexing perspective or is not inclusive of a number that reasonably should be returned from the mathematical perspective. In other words, it's easy to see how such a function would appear to conflate two very different use cases – the naming implies the indexing use case; the behavior implies a mathematical one.
The Mathematical Use Case
With that said, as discussed in other posts, numpy.linspace() performs the generation from the mathematical perspective nicely:
numpy.linspace(0, 10, 41)
array([ 0. , 0.25, 0.5 , 0.75, 1. , 1.25, 1.5 , 1.75,
2. , 2.25, 2.5 , 2.75, 3. , 3.25, 3.5 , 3.75,
4. , 4.25, 4.5 , 4.75, 5. , 5.25, 5.5 , 5.75,
6. , 6.25, 6.5 , 6.75, 7. , 7.25, 7.5 , 7.75,
8. , 8.25, 8.5 , 8.75, 9. , 9.25, 9.5 , 9.75, 10.
])
The Indexing Use Case
And for the indexing perspective, I've written a slightly different approach with some tricksy string magic that allows us to specify the number of decimal places.
# Float range function - string formatting method
def frange_S (start, stop, skip = 1.0, decimals = 2):
for i in range(int(start / skip), int(stop / skip)):
yield float(("%0." + str(decimals) + "f") % (i * skip))
Similarly, we can also use the built-in round function and specify the number of decimals:
# Float range function - rounding method
def frange_R (start, stop, skip = 1.0, decimals = 2):
for i in range(int(start / skip), int(stop / skip)):
yield round(i * skip, ndigits = decimals)
A Quick Comparison & Performance
Of course, given the above discussion, these functions have a fairly limited use case. Nonetheless, here's a quick comparison:
def compare_methods (start, stop, skip):
string_test = frange_S(start, stop, skip)
round_test = frange_R(start, stop, skip)
for s, r in zip(string_test, round_test):
print(s, r)
compare_methods(-2, 10, 1/3)
The results are identical for each:
-2.0 -2.0
-1.67 -1.67
-1.33 -1.33
-1.0 -1.0
-0.67 -0.67
-0.33 -0.33
0.0 0.0
...
8.0 8.0
8.33 8.33
8.67 8.67
9.0 9.0
9.33 9.33
9.67 9.67
And some timings:
>>> import timeit
>>> setup = """
... def frange_s (start, stop, skip = 1.0, decimals = 2):
... for i in range(int(start / skip), int(stop / skip)):
... yield float(("%0." + str(decimals) + "f") % (i * skip))
... def frange_r (start, stop, skip = 1.0, decimals = 2):
... for i in range(int(start / skip), int(stop / skip)):
... yield round(i * skip, ndigits = decimals)
... start, stop, skip = -1, 8, 1/3
... """
>>> min(timeit.Timer('string_test = frange_s(start, stop, skip); [x for x in string_test]', setup=setup).repeat(30, 1000))
0.024284090992296115
>>> min(timeit.Timer('round_test = frange_r(start, stop, skip); [x for x in round_test]', setup=setup).repeat(30, 1000))
0.025324633985292166
Looks like the string formatting method wins by a hair on my system.
The Limitations
And finally, a demonstration of the point from the discussion above and one last limitation:
# "Missing" the last value (10.0)
for x in frange_R(0, 10, 0.25):
print(x)
0.25
0.5
0.75
1.0
...
9.0
9.25
9.5
9.75
Further, when the skip parameter is not divisible by the stop value, there can be a yawning gap given the latter issue:
# Clearly we know that 10 - 9.43 is equal to 0.57
for x in frange_R(0, 10, 3/7):
print(x)
0.0
0.43
0.86
1.29
...
8.14
8.57
9.0
9.43
There are ways to address this issue, but at the end of the day, the best approach would probably be to just use Numpy.
A solution without numpy etc dependencies was provided by kichik but due to the floating point arithmetics, it often behaves unexpectedly. As noted by me and blubberdiblub, additional elements easily sneak into the result. For example naive_frange(0.0, 1.0, 0.1) would yield 0.999... as its last value and thus yield 11 values in total.
A bit more robust version is provided here:
def frange(x, y, jump=1.0):
'''Range for floats.'''
i = 0.0
x = float(x) # Prevent yielding integers.
x0 = x
epsilon = jump / 2.0
yield x # yield always first value
while x + epsilon < y:
i += 1.0
x = x0 + i * jump
if x < y:
yield x
Because the multiplication, the rounding errors do not accumulate. The use of epsilon takes care of possible rounding error of the multiplication, even though issues of course might rise in the very small and very large ends. Now, as expected:
> a = list(frange(0.0, 1.0, 0.1))
> a[-1]
0.9
> len(a)
10
And with somewhat larger numbers:
> b = list(frange(0.0, 1000000.0, 0.1))
> b[-1]
999999.9
> len(b)
10000000
The code is also available as a GitHub Gist.
This can be done with numpy.arange(start, stop, stepsize)
import numpy as np
np.arange(0.5,5,1.5)
>> [0.5, 2.0, 3.5, 5.0]
# OBS you will sometimes see stuff like this happening,
# so you need to decide whether that's not an issue for you, or how you are going to catch it.
>> [0.50000001, 2.0, 3.5, 5.0]
Note 1:
From the discussion in the comment section here, "never use numpy.arange() (the numpy documentation itself recommends against it). Use numpy.linspace as recommended by wim, or one of the other suggestions in this answer"
Note 2:
I have read the discussion in a few comments here, but after coming back to this question for the third time now, I feel this information should be placed in a more readable position.
A simpler library-less version
Aw, heck -- I'll toss in a simple library-less version. Feel free to improve on it[*]:
def frange(start=0, stop=1, jump=0.1):
nsteps = int((stop-start)/jump)
dy = stop-start
# f(i) goes from start to stop as i goes from 0 to nsteps
return [start + float(i)*dy/nsteps for i in range(nsteps)]
The core idea is that nsteps is the number of steps to get you from start to stop and range(nsteps) always emits integers so there's no loss of accuracy. The final step is to map [0..nsteps] linearly onto [start..stop].
edit
If, like alancalvitti you'd like the series to have exact rational representation, you can always use Fractions:
from fractions import Fraction
def rrange(start=0, stop=1, jump=0.1):
nsteps = int((stop-start)/jump)
return [Fraction(i, nsteps) for i in range(nsteps)]
[*] In particular, frange() returns a list, not a generator. But it sufficed for my needs.
Usage
# Counting up
drange(0, 0.4, 0.1)
[0, 0.1, 0.2, 0.30000000000000004, 0.4]
# Counting down
drange(0, -0.4, -0.1)
[0, -0.1, -0.2, -0.30000000000000004, -0.4]
To round each step to N decimal places
drange(0, 0.4, 0.1, round_decimal_places=4)
[0, 0.1, 0.2, 0.3, 0.4]
drange(0, -0.4, -0.1, round_decimal_places=4)
[0, -0.1, -0.2, -0.3, -0.4]
Code
def drange(start, end, increment, round_decimal_places=None):
result = []
if start < end:
# Counting up, e.g. 0 to 0.4 in 0.1 increments.
if increment < 0:
raise Exception("Error: When counting up, increment must be positive.")
while start <= end:
result.append(start)
start += increment
if round_decimal_places is not None:
start = round(start, round_decimal_places)
else:
# Counting down, e.g. 0 to -0.4 in -0.1 increments.
if increment > 0:
raise Exception("Error: When counting down, increment must be negative.")
while start >= end:
result.append(start)
start += increment
if round_decimal_places is not None:
start = round(start, round_decimal_places)
return result
Why choose this answer?
Many other answers will hang when asked to count down.
Many other answers will give incorrectly rounded results.
Other answers based on np.linspace are hit-and-miss, they may or may not work due to difficulty in choosing the correct number of divisions. np.linspace really struggles with decimal increments of 0.1, and the order of divisions in the formula to convert the increment into a number of splits can result in either correct or broken code.
Other answers based on np.arange are deprecated.
If in doubt, try the four tests cases above.
I do not know if the question is old but there is a arange function in the NumPy library, it could work as a range.
np.arange(0,1,0.1)
#out:
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
i wrote a function that returns a tuple of a range of double precision floating point numbers without any decimal places beyond the hundredths. it was simply a matter of parsing the range values like strings and splitting off the excess. I use it for displaying ranges to select from within a UI. I hope someone else finds it useful.
def drange(start,stop,step):
double_value_range = []
while start<stop:
a = str(start)
a.split('.')[1].split('0')[0]
start = float(str(a))
double_value_range.append(start)
start = start+step
double_value_range_tuple = tuple(double_value_range)
#print double_value_range_tuple
return double_value_range_tuple
Whereas integer-based ranges are well defined in that "what you see is what you get", there are things that are not readily seen in floats that cause troubles in getting what appears to be a well defined behavior in a desired range.
There are two approaches that one can take:
split a given range into a certain number of segment: the linspace approach in which you accept the large number of decimal digits when you select a number of points that does not divide the span well (e.g. 0 to 1 in 7 steps will give a first step value of 0.14285714285714285)
give the desired WYSIWIG step size that you already know should work and wish that it would work. Your hopes will often be dashed by getting values that miss the end point that you wanted to hit.
Multiples can be higher or lower than you expect:
>>> 3*.1 > .3 # 0.30000000000000004
True
>>> 3*.3 < 0.9 # 0.8999999999999999
True
You will try to avoid accumulating errors by adding multiples of your step and not incrementing, but the problem will always present itself and you just won't get what you expect if you did it by hand on paper -- with exact decimals. But you know it should be possible since Python shows you 0.1 instead of the underlying integer ratio having a close approximation to 0.1:
>>> (3*.1).as_integer_ratio()
(1351079888211149, 4503599627370496)
In the methods offered as answers, the use of Fraction here with the option to handle input as strings is best. I have a few suggestions to make it better:
make it handle range-like defaults so you can start from 0 automatically
make it handle decreasing ranges
make the output look like you would expect if you were using exact arithmetic
I offer a routine that does these same sort of thing but which does not use the Fraction object. Instead, it uses round to create numbers having the same apparent digits as the numbers would have if you printed them with python, e.g. 1 decimal for something like 0.1 and 3 decimals for something like 0.004:
def frange(start, stop, step, n=None):
"""return a WYSIWYG series of float values that mimic range behavior
by excluding the end point and not printing extraneous digits beyond
the precision of the input numbers (controlled by n and automatically
detected based on the string representation of the numbers passed).
EXAMPLES
========
non-WYSIWYS simple list-comprehension
>>> [.11 + i*.1 for i in range(3)]
[0.11, 0.21000000000000002, 0.31]
WYSIWYG result for increasing sequence
>>> list(frange(0.11, .33, .1))
[0.11, 0.21, 0.31]
and decreasing sequences
>>> list(frange(.345, .1, -.1))
[0.345, 0.245, 0.145]
To hit the end point for a sequence that is divisibe by
the step size, make the end point a little bigger by
adding half the step size:
>>> dx = .2
>>> list(frange(0, 1 + dx/2, dx))
[0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
"""
if step == 0:
raise ValueError('step must not be 0')
# how many decimal places are showing?
if n is None:
n = max([0 if '.' not in str(i) else len(str(i).split('.')[1])
for i in (start, stop, step)])
if step*(stop - start) > 0: # a non-null incr/decr range
if step < 0:
for i in frange(-start, -stop, -step, n):
yield -i
else:
steps = round((stop - start)/step)
while round(step*steps + start, n) < stop:
steps += 1
for i in range(steps):
yield round(start + i*step, n)
def Range(*argSequence):
if len(argSequence) == 3:
imin = argSequence[0]; imax = argSequence[1]; di = argSequence[2]
i = imin; iList = []
while i <= imax:
iList.append(i)
i += di
return iList
if len(argSequence) == 2:
return Range(argSequence[0], argSequence[1], 1)
if len(argSequence) == 1:
return Range(1, argSequence[0], 1)
Please note the first letter of Range is capital. This naming method is not encouraged for functions in Python. You can change Range to something like drange or frange if you want. The "Range" function behaves just as you want it to. You can check it's manual here [ http://reference.wolfram.com/language/ref/Range.html ].
I think that there is a very simple answer that really emulates all the features of range but for both float and integer. In this solution, you just suppose that your approximation by default is 1e-7 (or the one you choose) and you can change it when you call the function.
def drange(start,stop=None,jump=1,approx=7): # Approx to 1e-7 by default
'''
This function is equivalent to range but for both float and integer
'''
if not stop: # If there is no y value: range(x)
stop= start
start= 0
valor= round(start,approx)
while valor < stop:
if valor==int(valor):
yield int(round(valor,approx))
else:
yield float(round(valor,approx))
valor += jump
for i in drange(12):
print(i)
Talk about making a mountain out of a mole hill.
If you relax the requirement to make a float analog of the range function, and just create a list of floats that is easy to use in a for loop, the coding is simple and robust.
def super_range(first_value, last_value, number_steps):
if not isinstance(number_steps, int):
raise TypeError("The value of 'number_steps' is not an integer.")
if number_steps < 1:
raise ValueError("Your 'number_steps' is less than 1.")
step_size = (last_value-first_value)/(number_steps-1)
output_list = []
for i in range(number_steps):
output_list.append(first_value + step_size*i)
return output_list
first = 20.0
last = -50.0
steps = 5
print(super_range(first, last, steps))
The output will be
[20.0, 2.5, -15.0, -32.5, -50.0]
Note that the function super_range is not limited to floats. It can handle any data type for which the operators +, -, *, and / are defined, such as complex, Decimal, and numpy.array:
import cmath
first = complex(1,2)
last = complex(5,6)
steps = 5
print(super_range(first, last, steps))
from decimal import *
first = Decimal(20)
last = Decimal(-50)
steps = 5
print(super_range(first, last, steps))
import numpy as np
first = np.array([[1, 2],[3, 4]])
last = np.array([[5, 6],[7, 8]])
steps = 5
print(super_range(first, last, steps))
The output will be:
[(1+2j), (2+3j), (3+4j), (4+5j), (5+6j)]
[Decimal('20.0'), Decimal('2.5'), Decimal('-15.0'), Decimal('-32.5'), Decimal('-50.0')]
[array([[1., 2.],[3., 4.]]),
array([[2., 3.],[4., 5.]]),
array([[3., 4.],[5., 6.]]),
array([[4., 5.],[6., 7.]]),
array([[5., 6.],[7., 8.]])]
There will be of course some rounding errors, so this is not perfect, but this is what I use generally for applications, which don't require high precision. If you wanted to make this more accurate, you could add an extra argument to specify how to handle rounding errors. Perhaps passing a rounding function might make this extensible and allow the programmer to specify how to handle rounding errors.
arange = lambda start, stop, step: [i + step * i for i in range(int((stop - start) / step))]
If I write:
arange(0, 1, 0.1)
It will output:
[0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6000000000000001, 0.7000000000000001, 0.8, 0.9]
Is there a range() equivalent for floats in Python?
NO
Use this:
def f_range(start, end, step, coef=0.01):
a = range(int(start/coef), int(end/coef), int(step/coef))
var = []
for item in a:
var.append(item*coef)
return var
There several answers here that don't handle simple edge cases like negative step, wrong start, stop etc. Here's the version that handles many of these cases correctly giving same behaviour as native range():
def frange(start, stop=None, step=1):
if stop is None:
start, stop = 0, start
steps = int((stop-start)/step)
for i in range(steps):
yield start
start += step
Note that this would error out step=0 just like native range. One difference is that native range returns object that is indexable and reversible while above doesn't.
You can play with this code and test cases here.
I am trying to write an application to convert bytes to kb to mb to gb to tb.
Here's what I have so far:
def size_format(b):
if b < 1000:
return '%i' % b + 'B'
elif 1000 <= b < 1000000:
return '%.1f' % float(b/1000) + 'KB'
elif 1000000 <= b < 1000000000:
return '%.1f' % float(b/1000000) + 'MB'
elif 1000000000 <= b < 1000000000000:
return '%.1f' % float(b/1000000000) + 'GB'
elif 1000000000000 <= b:
return '%.1f' % float(b/1000000000000) + 'TB'
The problem is, when I try the application I get everything after the decimal zeroing out.
example
size_format(623) yields
'623B'
but with size_format(6200),
instead of getting '6.2kb'
I'm getting '6.0kb'.
Any ideas why?
Fixed version of Bryan_Rch's answer:
def format_bytes(size):
# 2**10 = 1024
power = 2**10
n = 0
power_labels = {0 : '', 1: 'kilo', 2: 'mega', 3: 'giga', 4: 'tera'}
while size > power:
size /= power
n += 1
return size, power_labels[n]+'bytes'
def humanbytes(B):
"""Return the given bytes as a human friendly KB, MB, GB, or TB string."""
B = float(B)
KB = float(1024)
MB = float(KB ** 2) # 1,048,576
GB = float(KB ** 3) # 1,073,741,824
TB = float(KB ** 4) # 1,099,511,627,776
if B < KB:
return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte')
elif KB <= B < MB:
return '{0:.2f} KB'.format(B / KB)
elif MB <= B < GB:
return '{0:.2f} MB'.format(B / MB)
elif GB <= B < TB:
return '{0:.2f} GB'.format(B / GB)
elif TB <= B:
return '{0:.2f} TB'.format(B / TB)
tests = [1, 1024, 500000, 1048576, 50000000, 1073741824, 5000000000, 1099511627776, 5000000000000]
for t in tests: print("{0} == {1}".format(t,humanbytes(t)))
Output:
1 == 1.0 Byte
1024 == 1.00 KB
500000 == 488.28 KB
1048576 == 1.00 MB
50000000 == 47.68 MB
1073741824 == 1.00 GB
5000000000 == 4.66 GB
1099511627776 == 1.00 TB
5000000000000 == 4.55 TB
and for future me here it is in Perl too:
sub humanbytes {
my $B = shift;
my $KB = 1024;
my $MB = $KB ** 2; # 1,048,576
my $GB = $KB ** 3; # 1,073,741,824
my $TB = $KB ** 4; # 1,099,511,627,776
if ($B < $KB) {
return "$B " . (($B == 0 || $B > 1) ? 'Bytes' : 'Byte');
} elsif ($B >= $KB && $B < $MB) {
return sprintf('%0.02f',$B/$KB) . ' KB';
} elsif ($B >= $MB && $B < $GB) {
return sprintf('%0.02f',$B/$MB) . ' MB';
} elsif ($B >= $GB && $B < $TB) {
return sprintf('%0.02f',$B/$GB) . ' GB';
} elsif ($B >= $TB) {
return sprintf('%0.02f',$B/$TB) . ' TB';
}
}
WARNING: other answers are likely to contain bugs. The ones posted before this one were unable to handle filesizes that are close to the boundary of the next unit.
Dividing bytes to get a human-readable answer may seem easy, right? Wrong!
Many answers are incorrect and contains floating point rounding bugs that cause incorrect output such as "1024 KiB" instead of "1 MiB". They shouldn't feel sad about it, though, since it's a bug that even Android's OS programmers had in the past, and tens of thousands of programmer eyes never noticed the bug in the world's most popular StackOverflow answer either, despite years of people using that old Java answer.
So what's the problem? Well, it's due to the way that floating point rounding works. A float such as "1023.95" will actually round up to "1024.0" when told to format itself as a single-decimal number. Most programmers don't think about that bug, but it COMPLETELY breaks the "human readable bytes" formatting. So their code thinks "Oh, 1023.95, that's fine, we've found the correct unit since the number is less than 1024", but they don't realize that it will get rounded to "1024.0" which SHOULD be formatted as the NEXT size-unit.
Furthermore, many of the other answers are using horribly slow code with a bunch of math functions such as pow/log, which may look "neat" but completely wrecks performance. Most of the other answers use crazy if/else nesting, or other performance-killers such as temporary lists, live string concatenation/creation, etc. In short, they waste CPU cycles doing pointless, heavy work.
Most of them also forget to include larger units, and therefore only support a small subset of the most common filesizes. Given a larger number, such code would output something like "1239213919393491123.1 Gigabytes", which is silly. Some of them won't even do that, and will simply break if the input number is larger than the largest unit they've implemented.
Furthermore, almost none of them handle negative input, such as "minus 2 megabytes", and completely break on such input.
They also hardcode very personal choices such as precision (how many decimals) and unit type (metric or binary). Which means that their code is barely reusable.
So... okay, we have a situation where the current answers aren't correct... so why not do everything right instead? Here's my function, which focuses on both performance and configurability. You can choose between 0-3 decimals, and whether you want metric (power of 1000) or binary (power of 1024) representation. It contains some code comments and usage examples, to help people understand why it does what it does and what bugs it avoids by working this way. If all the comments are deleted, it would shrink the line numbers by a lot, but I suggest keeping the comments when copypasta-ing so that you understand the code again in the future. ;-)
from typing import List, Union
class HumanBytes:
METRIC_LABELS: List[str] = ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
BINARY_LABELS: List[str] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]
PRECISION_OFFSETS: List[float] = [0.5, 0.05, 0.005, 0.0005] # PREDEFINED FOR SPEED.
PRECISION_FORMATS: List[str] = ["{}{:.0f} {}", "{}{:.1f} {}", "{}{:.2f} {}", "{}{:.3f} {}"] # PREDEFINED FOR SPEED.
#staticmethod
def format(num: Union[int, float], metric: bool=False, precision: int=1) -> str:
"""
Human-readable formatting of bytes, using binary (powers of 1024)
or metric (powers of 1000) representation.
"""
assert isinstance(num, (int, float)), "num must be an int or float"
assert isinstance(metric, bool), "metric must be a bool"
assert isinstance(precision, int) and precision >= 0 and precision <= 3, "precision must be an int (range 0-3)"
unit_labels = HumanBytes.METRIC_LABELS if metric else HumanBytes.BINARY_LABELS
last_label = unit_labels[-1]
unit_step = 1000 if metric else 1024
unit_step_thresh = unit_step - HumanBytes.PRECISION_OFFSETS[precision]
is_negative = num < 0
if is_negative: # Faster than ternary assignment or always running abs().
num = abs(num)
for unit in unit_labels:
if num < unit_step_thresh:
# VERY IMPORTANT:
# Only accepts the CURRENT unit if we're BELOW the threshold where
# float rounding behavior would place us into the NEXT unit: F.ex.
# when rounding a float to 1 decimal, any number ">= 1023.95" will
# be rounded to "1024.0". Obviously we don't want ugly output such
# as "1024.0 KiB", since the proper term for that is "1.0 MiB".
break
if unit != last_label:
# We only shrink the number if we HAVEN'T reached the last unit.
# NOTE: These looped divisions accumulate floating point rounding
# errors, but each new division pushes the rounding errors further
# and further down in the decimals, so it doesn't matter at all.
num /= unit_step
return HumanBytes.PRECISION_FORMATS[precision].format("-" if is_negative else "", num, unit)
print(HumanBytes.format(2251799813685247)) # 2 pebibytes
print(HumanBytes.format(2000000000000000, True)) # 2 petabytes
print(HumanBytes.format(1099511627776)) # 1 tebibyte
print(HumanBytes.format(1000000000000, True)) # 1 terabyte
print(HumanBytes.format(1000000000, True)) # 1 gigabyte
print(HumanBytes.format(4318498233, precision=3)) # 4.022 gibibytes
print(HumanBytes.format(4318498233, True, 3)) # 4.318 gigabytes
print(HumanBytes.format(-4318498233, precision=2)) # -4.02 gibibytes
By the way, the hardcoded PRECISION_OFFSETS is created that way for maximum performance. We could have programmatically calculated the offsets using the formula unit_step_thresh = unit_step - (0.5/(10**precision)) to support arbitrary precisions. But it really makes NO sense to format filesizes with massive 4+ trailing decimal numbers. That's why my function supports exactly what people use: 0, 1, 2 or 3 decimals. Thus we avoid a bunch of pow and division math. This decision is one of many small attention-to-detail choices that make this function FAST. Another example of performance choices was the decision to use a string-based if unit != last_label check to detect the end of the List, rather than iterating by indices and seeing if we've reached the final List-index. Generating indices via range() or tuples via enumerate() is slower than just doing an address comparison of Python's immutable string objects stored in the _LABELS lists, which is what this code does instead!
Sure, it's a bit excessive to put that much work into performance, but I hate the "write sloppy code and only optimize after all the thousands of slow functions in a project makes the whole project sluggish" attitude. The "premature optimization" quote that most programmers live by is completely misunderstood and used as an excuse for sloppiness. :-P
I place this code in the public domain. Feel free to use it in your projects, both freeware and commercial. I actually suggest that you place it in a .py module and change it from a "class namespace" into a normal module instead. I only used a class to keep the code neat for StackOverflow and to make it easy to paste into self-contained python scripts if you don't want to use modules.
Enjoy and have fun! :-)
good idea for me:
def convert_bytes(num):
"""
this function will convert bytes to MB.... GB... etc
"""
step_unit = 1000.0 #1024 bad the size
for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
if num < step_unit:
return "%3.1f %s" % (num, x)
num /= step_unit
Yet another humanbytes version, with no loops/if..else, in python3 syntax.
Test numbers stolen from #whereisalext's answer.
Mind you, it's still a sketch, e.g. if the numbers are large enough it will traceback.
import math as m
MULTIPLES = ["B", "k{}B", "M{}B", "G{}B", "T{}B", "P{}B", "E{}B", "Z{}B", "Y{}B"]
def humanbytes(i, binary=False, precision=2):
base = 1024 if binary else 1000
multiple = m.trunc(m.log2(i) / m.log2(base))
value = i / m.pow(base, multiple)
suffix = MULTIPLES[multiple].format("i" if binary else "")
return f"{value:.{precision}f} {suffix}"
if __name__ == "__main__":
sizes = [
1, 1024, 500000, 1048576, 50000000, 1073741824, 5000000000,
1099511627776, 5000000000000]
for i in sizes:
print(f"{i} == {humanbytes(i)}, {humanbytes(i, binary=True)}")
Results:
1 == 1.00 B, 1.00 B
1024 == 1.02 kB, 1.00 kiB
500000 == 500.00 kB, 488.28 kiB
1048576 == 1.05 MB, 1.00 MiB
50000000 == 50.00 MB, 47.68 MiB
1073741824 == 1.07 GB, 1.00 GiB
5000000000 == 5.00 GB, 4.66 GiB
1099511627776 == 1.10 TB, 1.00 TiB
5000000000000 == 5.00 TB, 4.55 TiB
Update:
As pointed out in comments (and as noted originally: "Mind you, it's still a sketch"), this code is slow and buggy. Please see #mitch-mcmabers 's answer.
Update 2: I was also lying about having no ifs.
There is now a convenient DataSize package :
pip install datasize
import datasize
import sys
a = [i for i in range(1000000)]
s = sys.getsizeof(a)
print(f"{datasize.DataSize(s):MiB}")
Output :
8.2945556640625MiB
I have quite readable function to convert bytes into greater units:
def bytes_2_human_readable(number_of_bytes):
if number_of_bytes < 0:
raise ValueError("!!! number_of_bytes can't be smaller than 0 !!!")
step_to_greater_unit = 1024.
number_of_bytes = float(number_of_bytes)
unit = 'bytes'
if (number_of_bytes / step_to_greater_unit) >= 1:
number_of_bytes /= step_to_greater_unit
unit = 'KB'
if (number_of_bytes / step_to_greater_unit) >= 1:
number_of_bytes /= step_to_greater_unit
unit = 'MB'
if (number_of_bytes / step_to_greater_unit) >= 1:
number_of_bytes /= step_to_greater_unit
unit = 'GB'
if (number_of_bytes / step_to_greater_unit) >= 1:
number_of_bytes /= step_to_greater_unit
unit = 'TB'
precision = 1
number_of_bytes = round(number_of_bytes, precision)
return str(number_of_bytes) + ' ' + unit
Using logarithms is probably the most concise way to do it:
from math import floor, log
def format_bytes(size):
power = 0 if size <= 0 else floor(log(size, 1024))
return f"{round(size / 1024 ** power, 2)} {['B', 'KB', 'MB', 'GB', 'TB'][int(power)]}"
Rather than modifying your code, you can change the behaviour of division:
from __future__ import division
This provides "true" division over the "classic" style that Python 2.x uses. See PEP 238 - Changing the Division Operator for more details.
This is now the default behaviour in Python 3.x
A very simple solution would be:
SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
def get_readable_file_size(size_in_bytes):
index = 0
while size_in_bytes >= 1024:
size_in_bytes /= 1024
index += 1
try:
return f'{size_in_bytes} {SIZE_UNITS[index]}'
except IndexError:
return 'File too large'
When you divide the value you're using an integer divide, since both values are integers. You need to convert one of them to float first:
return '%.1f' % float(b)/1000 + 'KB'
or even just
return '%.1f' % b/1000.0 + 'KB'
This is a compact version that converts B (bytes) to any higher order such MB, GB without using a lot of if...else in python. I use bit-wise to deal with this. Also it allows to return a float output if you trigger the parameter return_output in the function as True:
import math
def bytes_conversion(number, return_float=False):
def _conversion(number, return_float=False):
length_number = int(math.log10(number))
if return_float:
length_number = int(math.log10(number))
return length_number // 3, '%.2f' % (int(number)/(1 << (length_number//3) *10))
return length_number // 3, int(number) >> (length_number//3) * 10
unit_dict = {
0: "B", 1: "kB",
2: "MB", 3: "GB",
4: "TB", 5: "PB",
6: "EB"
}
if return_float:
num_length, number = _conversion(number, return_float=return_float)
else:
num_length, number = _conversion(number)
return "%s %s" % (number, unit_dict[num_length])
#Example usage:
#print(bytes_conversion(491266116, return_float=True))
This is only a few of my posts in StackOverflow. Please let me know if I have any errors or violations.
I have improved, in my opininion, #whereisalext answer to have a somewhat more generic function which does not require one to add more if statements once more units are going to be added:
AVAILABLE_UNITS = ['bytes', 'KB', 'MB', 'GB', 'TB']
def get_amount_and_unit(byte_amount):
for index, unit in enumerate(AVAILABLE_UNITS):
lower_threshold = 0 if index == 0 else 1024 ** (index - 1)
upper_threshold = 1024 ** index
if lower_threshold <= byte_amount < upper_threshold:
if lower_threshold == 0:
return byte_amount, unit
else:
return byte_amount / lower_threshold, AVAILABLE_UNITS[index - 1]
# Default to the maximum
max_index = len(AVAILABLE_UNITS) - 1
return byte_amount / (1024 ** max_index), AVAILABLE_UNITS[max_index]
Do note that this differs slightly frrom #whereisalext's algo:
This returns a tuple containing the converted amount at the first index and the unit at the second index
This does not try to differ between a singular and multiple bytes (1 bytes is therefore an output of this approach)
I think this is a short and succinct. The idea is based on some graph scaling code I wrote many years ago. The code snippet round(log2(size)*4)/40 does the magic here, calculating the boundaries with an increment with the power of 2**10. The "correct" implementation would be: trunc(log2(size)/10, however then you would get strange behavior when the size is close to a new boundary. For instance datasize(2**20-1) would return (1024.00, 'KiB'). By using round and scaling the log2result you get a nice cutof when approaching a new boundary.
from math import log2
def datasize(size):
"""
Calculate the size of a code in B/KB/MB.../
Return a tuple of (value, unit)
"""
assert size>0, "Size must be a positive number"
units = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
scaling = round(log2(size)*4)//40
scaling = min(len(units)-1, scaling)
return size/(2**(10*scaling)), units[scaling]
for size in [2**10-1, 2**10-10, 2**10-100, 2**20-10000, 2**20-2**18, 2**20, 2**82-2**72, 2**80-2**76]:
print(size, "bytes= %.3f %s" % datasize(size))
1023 bytes= 0.999 KiB
1014 bytes= 0.990 KiB
924 bytes= 924.000 B
1038576 bytes= 0.990 MiB
786432 bytes= 768.000 KiB
1048576 bytes= 1.000 MiB
4830980911975647053611008 bytes= 3.996 YiB
1133367955888714851287040 bytes= 0.938 YiB
Let me add mine, where no variable is updated in a loop or similar error-prone behaviors. The logic implemented is straightforward. It's tested only with Python 3.
def format_bytes(size: int) -> str:
power_labels = {40: "TB", 30: "GB", 20: "MB", 10: "KB"}
for power, label in power_labels.items():
if size >= 2 ** power:
approx_size = size // 2 ** power
return f"{approx_size} {label}"
return f"{size} bytes"
It's tested, for example at KB/MB boundary:
1024*1024-1 returns "1023 KB"
1024*1024 returns "1 MB"
1024*1024+1 returns "1 MB"
You can easily change approx_size if you want float instead of rounded integers.
Do float(b) before do dividing, e.g. do float(b)/1000 instead of float(b/1000), because both b and 1000 are integers, b/1000 is still an integer without decimal part.
Here is to convert bytes to kilo, mega, tera.
#From bytes to kilo, mega, tera
def get_(size):
#2**10 = 1024
power = 2**10
n = 1
Dic_powerN = {1:'kilobytes', 2:'megabytes', 3:'gigabytes', 4:'Terabytes'}
if size <= power**2 :
size /= power
return size, Dic_powerN[n]
else:
while size > power :
n += 1
size /= power**n
return size, Dic_powerN[n]
An output with no decimal places:
>>> format_file_size(12345678)
'11 MiB, 792 KiB, 334 bytes'
format_file_size(
def format_file_size(fsize):
result = []
units = {s: u for s, u in zip(reversed([2 ** n for n in range(0, 40, 10)]), ['GiB', 'MiB', 'KiB', 'bytes'])}
for s, u in units.items():
t = fsize // s
if t > 0:
result.append('{} {}'.format(t, u))
fsize = fsize % s
return ', '.join(result) or '0 bytes'
I know there already are a lot of answers and explanations here, but I tried this class based method and it perfectly worked for me. It may seem enormous but just take a look at how I used the attributes and methods.
class StorageUnits:
b, Kb, Kib, Mb, Mib, Gb, Gib, Tb, Tib, Pb, Pib, Eb, Eib, Zb, Zib, Yb, Yib, B, KB, KiB, MB, MiB, GB, GiB, TB,\
TiB, PB, PiB, EB, EiB, ZB, ZiB, YB, YiB = [0]*34
class DigitalStorageConverter:
def __init__(self):
self.storage = StorageUnits()
self.bit_conversion_value_table = {
'b': 1, 'Kb': 1000, 'Mb': 1000**2, 'Gb': 1000**3, 'Tb': 1000**4, 'Pb': 1000**5, 'Eb': 1000**6,
'Zb': 1000**7, 'Yb': 1000**8, 'Kib': 1024, 'Mib': 1024**2, 'Gib': 1024**3, 'Tib': 1024**4, 'Pib': 1024**5,
'Eib': 1024**6, 'Zib': 1024**7, 'Yib': 1024**8,
'B': 8, 'KB': 8*1000, 'MB': 8*(1000**2), 'GB': 8*(1000**3), 'TB': 8*(1000**4), 'PB': 8*(1000**5),
'EB': 8*(1000**6), 'ZB': 8*(1000**7), 'YB': 8*(1000**8), 'KiB': 8*1024, 'MiB': 8*(1024**2),
'GiB': 8*(1024**3), 'TiB': 8*(1024**4), 'PiB': 8*(1024**5), 'EiB': 8*(1024**6), 'ZiB': 8*(1024**7),
'YiB': 8*(1024**8)
}
"Values of all the units in bits"
self.name_conversion_table = {
'bit': 'b', 'kilobit': 'Kb', 'megabit': 'Mb', 'gigabit': 'Gb', 'terabit': 'Tb', 'petabit': 'Pb',
'exabit': 'Eb', 'zettabit': 'Zb', 'yottabit': 'Yb', 'kibibit': 'Kib', 'mebibit': 'Mib', 'Gibibit': 'Gib',
'tebibit': 'Tib', 'pebibit': 'Pb', 'exbibit': 'Eib', 'zebibit': 'Zib', 'yobibit': 'Yib',
'byte': 'B', 'kilobyte': 'KB', 'megabyte': 'MB', 'gigabyte': 'GB', 'terabyte': 'TB', 'petabyte': 'PB',
'exabyte': 'EB', 'zettabyte': 'ZB', 'yottabyte': 'YB', 'kibibyte': 'KiB', 'mebibyte': 'MiB',
'gibibyte': 'GiB', 'tebibyte': 'TiB', 'pebibyte': 'PiB', 'exbibyte': 'EiB', 'zebibyte': 'ZiB',
'yobibyte': 'YiB'
}
self.storage_units = [u for u in list(StorageUnits.__dict__.keys()) if not u.startswith('__')]
def get_conversion(self, value: float, from_type: str) -> StorageUnits:
if from_type in list(self.name_conversion_table.values()):
from_type_bit_value = self.bit_conversion_value_table[from_type]
elif from_type in list(self.name_conversion_table.keys()):
from_type = self.name_conversion_table[from_type]
from_type_bit_value = self.bit_conversion_value_table[from_type]
else:
raise KeyError(f'Invalid storage unit type "{from_type}"')
value = value * from_type_bit_value
for i in self.storage_units:
self.storage.__setattr__(i, value / self.bit_conversion_value_table[i])
return self.storage
if __name__ == '__main__':
c = DigitalStorageConverter()
s = c.get_conversion(5000, 'KiB')
print(s.KB, s.MB, s.TB) # , ..., ..., etc till whatever you may want
This program will give you answers in exponent form if the number is too big.
NOTE: Please correct the names of the storage values, if anywhere found incorrect
def resize(size: int | float, from_: str = "KB", to_: str = "B"):
sizes = ("PB", "TB", "GB", "MB", "KB", "B")
unit = sizes.index(to_.upper()) - sizes.index(from_.upper())
return size // (1024 ** abs(unit)) if unit < 0 else size ** (1024 * abs(unit))