Pandas FloatingPoint Error - python

I'm getting a floating point error on a simple time series in pandas. I'm trying to do shift operations... but this also happens with the window functions like rolling_mean.
EDIT: For some more info... I tried to actually build this from source yesterday prior to the error. I'm not sure if the error would've occurred prior the build attempt, as I'd never messed around w/ these functions.
EDIT2: I thought I'd fixed this, but when I run this inside python it works, but when it's in ipython I get the error.
EDIT3: Numpy 1.7.0, iPython 0.13, pandas 0.7.3
In [35]: ts = Series(np.arange(12), index=DateRange('1/1/2000', periods=12, freq='T'))
In [36]: ts.shift(0)
Out[36]:
2000-01-03 0
2000-01-04 1
2000-01-05 2
2000-01-06 3
2000-01-07 4
2000-01-10 5
2000-01-11 6
2000-01-12 7
2000-01-13 8
2000-01-14 9
2000-01-17 10
2000-01-18 11
In [37]: ts.shift(1)
Out[37]: ---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
/Users/trenthauck/Repository/work/SQS/analysis/campaign/tv2/data/<ipython-input-37-2b7cec97d440> in <module>()
----> 1 ts.shift(1)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in pretty(self, obj)
353 if callable(obj_class._repr_pretty_):
354 return obj_class._repr_pretty_(obj, self, cycle)
--> 355 return _default_pprint(obj, self, cycle)
356 finally:
357 self.end_group()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
473 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
474 # A user-provided repr.
--> 475 p.text(repr(obj))
476 return
477 p.begin_group(1, '<')
/Library/Python/2.7/site-packages/pandas/core/series.pyc in __repr__(self)
696 result = self._get_repr(print_header=True,
697 length=len(self) > 50,
--> 698 name=True)
699 else:
700 result = '%s' % ndarray.__repr__(self)
/Library/Python/2.7/site-packages/pandas/core/series.pyc in _get_repr(self, name, print_header, length, na_rep, float_format)
756 length=length, na_rep=na_rep,
757 float_format=float_format)
--> 758 return formatter.to_string()
759
760 def __str__(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in to_string(self)
99
100 fmt_index, have_header = self._get_formatted_index()
--> 101 fmt_values = self._get_formatted_values()
102
103 maxlen = max(len(x) for x in fmt_index)
/Library/Python/2.7/site-packages/pandas/core/format.pyc in _get_formatted_values(self)
90 return format_array(self.series.values, None,
91 float_format=self.float_format,
---> 92 na_rep=self.na_rep)
93
94 def to_string(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
431 justify=justify)
432
--> 433 return fmt_obj.get_result()
434
435
/Library/Python/2.7/site-packages/pandas/core/format.pyc in get_result(self)
528
529 # this is pretty arbitrary for now
--> 530 has_large_values = (np.abs(self.values) > 1e8).any()
531
532 if too_long and has_large_values:
FloatingPointError: invalid value encountered in absolute
In [38]: ts.shift(-1)
Out[38]: ---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
/Users/myusername/Repository/work/SQS/analysis/campaign/tv2/data/<ipython-input-38-314ec815a7c5> in <module>()
----> 1 ts.shift(-1)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in pretty(self, obj)
353 if callable(obj_class._repr_pretty_):
354 return obj_class._repr_pretty_(obj, self, cycle)
--> 355 return _default_pprint(obj, self, cycle)
356 finally:
357 self.end_group()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
473 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
474 # A user-provided repr.
--> 475 p.text(repr(obj))
476 return
477 p.begin_group(1, '<')
/Library/Python/2.7/site-packages/pandas/core/series.pyc in __repr__(self)
696 result = self._get_repr(print_header=True,
697 length=len(self) > 50,
--> 698 name=True)
699 else:
700 result = '%s' % ndarray.__repr__(self)
/Library/Python/2.7/site-packages/pandas/core/series.pyc in _get_repr(self, name, print_header, length, na_rep, float_format)
756 length=length, na_rep=na_rep,
757 float_format=float_format)
--> 758 return formatter.to_string()
759
760 def __str__(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in to_string(self)
99
100 fmt_index, have_header = self._get_formatted_index()
--> 101 fmt_values = self._get_formatted_values()
102
103 maxlen = max(len(x) for x in fmt_index)
/Library/Python/2.7/site-packages/pandas/core/format.pyc in _get_formatted_values(self)
90 return format_array(self.series.values, None,
91 float_format=self.float_format,
---> 92 na_rep=self.na_rep)
93
94 def to_string(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
431 justify=justify)
432
--> 433 return fmt_obj.get_result()
434
435
/Library/Python/2.7/site-packages/pandas/core/format.pyc in get_result(self)
528
529 # this is pretty arbitrary for now
--> 530 has_large_values = (np.abs(self.values) > 1e8).any()
531
532 if too_long and has_large_values:
FloatingPointError: invalid value encountered in absolute

I would add this as a comment, but I don't have the privilege to do that yet :)
It works for me in python and iPython 0.12; iPython 0.13 is still in development (see http://ipython.org/ ), and, since the errors you're getting seem to involve formatting in the iPython 0.13 egg, I suspect that might be the cause. Try with iPython 0.12 instead-- if it works, file a bug report with iPython and then probably stick with 0.12 until 0.13 is (more) stable.

Related

Cannot open eps file after saving figure

Normally, opening an eps file is no problem but with this current code in Python that I am working on, the exported eps file is loading when opened but never appearing. I have tried exporting the same figure as a png and that works fine. Also I have tried exporting a really simple figure as eps and that opens without any flaws. I have included some of the relevant code concerning the plot/figure. Any help would be much appreciated.
#%% plot section
plt.close('all')
plt.figure()
plt.errorbar(r,omega,yerr=omega_err,fmt='mo')
plt.xlabel('xlabel')
plt.ylabel('ylabel')
plt.title('profile averaged from {} ms to {} ms \n shot {}'.format(tidsinterval[0],tidsinterval[1],skud_numre[0]),y=1.05)
plt.grid()
plt.axhline(y=2.45,color='Red')
plt.text(39,2.43,'txt block for horizontal line',backgroundcolor='white')
plt.axvline(x=37.5,color='Purple')
plt.text(37.5,1.2,'txt block for vertical line',ha='center',va="center",rotation='vertical',backgroundcolor='white')
plt.savefig('directory/plot.eps', format='eps')
plt.show()
The variables r, omega, omega_err are vectors of float of small sizes (6 perhaps).
Update: The program I use for opening eps-files is Evince, furthermore, one can download the eps file here https://filedropper.com/d/s/z7lxUCtANeox7tDMQ6dI6HZUpcTfHn. As far as I can see, it is fine sharing files over filedropper via community guidelines, but if I'm wrong please say so.
Found out that it is possible to open the file as long as there is no text contained in the plot (for example x-label,y-label, title and so on), so the problem has to be related to the text.
The short answer is it's your font. The /e glyph is throwing an error on setcachedevice (your PostScript interpreter should have told you this).
The actual problem is that the font program is careless (at least) about it's use of function name. The program contains this:
/mpldict 11 dict def
mpldict begin
/d { bind def } bind def
That creates a new dictionary called mpldict, begins that dictionary (makes it the topmost entry in the dictionary stack) and defines a function called 'd' in that dictionary
We then move on to the font definition, there's a lot of boiler plate in here, but each character shape is defined by an entry in the font's CharStrings dictionary, we'll pick that up with the definition of the function called 'd' in the font's CharStrings dictionary.
/d{1300 0 113 -29 1114 1556 sc
930 950 m
930 1556 l
ce} d
(2.60) == flush
/e{1260 0 113 -29 1151 1147 sc
1151 606 m
1151 516 l
305 516 l
313 389 351 293 419 226 c
488 160 583 127 705 127 c
776 127 844 136 910 153 c
977 170 1043 196 1108 231 c
1108 57 l
1042 29 974 8 905 -7 c
836 -22 765 -29 694 -29 c
515 -29 374 23 269 127 c
165 231 113 372 113 549 c
113 732 162 878 261 985 c
360 1093 494 1147 662 1147 c
813 1147 932 1098 1019 1001 c
1107 904 1151 773 1151 606 c
967 660 m
966 761 937 841 882 901 c
827 961 755 991 664 991 c
561 991 479 962 417 904 c
356 846 320 764 311 659 c
967 660 l
ce} d
Notice that what this does is create a new definition of a function named 'd' in the current dictionary. That's not a problem in itself. We now have two functions named 'd'; one in the current dictionary (the font's CharStrings dictionary) and one in 'mpldict'.
Then we define the next character:
/e{1260 0 113 -29 1151 1147 sc
1151 606 m
1151 516 l
305 516 l
313 389 351 293 419 226 c
488 160 583 127 705 127 c
776 127 844 136 910 153 c
977 170 1043 196 1108 231 c
1108 57 l
1042 29 974 8 905 -7 c
836 -22 765 -29 694 -29 c
515 -29 374 23 269 127 c
165 231 113 372 113 549 c
113 732 162 878 261 985 c
360 1093 494 1147 662 1147 c
813 1147 932 1098 1019 1001 c
1107 904 1151 773 1151 606 c
967 660 m
966 761 937 841 882 901 c
827 961 755 991 664 991 c
561 991 479 962 417 904 c
356 846 320 764 311 659 c
967 660 l
ce} d
Now, the last thing we do at the end of defining that character shape (for the character named 'e') is call a function named 'd'. But there are two, which one do we call ? The answer is that we work backwards down the dictionary stack looking in each dictionary to see if it has a function called 'd' and we use the first one we find. The current dictionary is the font's CharStrings dictionary, and it has a function called 'd' (which defines the 'd' character) so we call that.
And that function then tries to use setcachedevice. That operator is not legal except when executing a character description, which we are not doing, so it throws an undefined error.
Now your PostScript interpreter should tell you there is an error (Ghostscript, for example, does so). Because there is an error the interpreter stops and doesn't draw anything further, which is why you get a blank page.
What can you do about this ? Well you could raise a bug report with the creating application (apparently Matplotlib created the font too). This is not a good way to define a font!
Other than that, well frankly the only thing you can do is search and replace through the file. If you look for occurrences of ce} d and replace them with ce}bind def it'll probably work. This time.

Problem using make_column_transformer in Sklearn

This is my code/model that I'm trying to implement:
kf = KFold(n_splits=10,shuffle=True,random_state=2652124)
transf = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}',max_features=1500, min_df=5, max_df=0.7, stop_words=stop)
scaler = MinMaxScaler(feature_range=(0, 1))
metadados = ['F13','F14','F19','F21','F22']
cls = RandomForestClassifier(n_estimators=1000,random_state=0)
features = make_column_transformer(
(transf,'textimage'),(transf,'subtitle'),
(scaler, metadata),(scaler,'F3'),remainder ='drop')
X = features.fit_transform(data)
y = data['classification']
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
cls.fit(X_train,y_train)
y_score = cls.fit(X_train, y_train).predict_proba(X_test)
roc = roc_auc_score(y_test, y_score[:,1])
pred = cls.predict(X_test)
acs = accuracy_score(y_test,pred)
clr = classification_report(y_test,pred)
The error:
TypeError Traceback (most recent call last)
TypeError: only size-1 arrays can be converted to Python scalars
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-13-6bdcb91ff478> in <module>
14
15 kfnum = 1
---> 16 X = features.fit_transform(data)
17 y = data['classe']
18 catr = 'timagem + metadados + legenda'
~/.local/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
529 self._validate_remainder(X)
530
--> 531 result = self._fit_transform(X, y, _fit_transform_one)
532
533 if not result:
~/.local/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
456 self._iter(fitted=fitted, replace_strings=True))
457 try:
--> 458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
460 transformer=clone(trans) if not fitted else trans,
~/.local/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
1049 self._iterating = self._original_iterator is not None
1050
-> 1051 while self.dispatch_one_batch(iterator):
1052 pass
1053
~/.local/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868
~/.local/lib/python3.8/site-packages/joblib/parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to
~/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~/.local/lib/python3.8/site-packages/joblib/parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/.local/lib/python3.8/site-packages/joblib/parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/.local/lib/python3.8/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
738 with _print_elapsed_time(message_clsname, message):
739 if hasattr(transformer, 'fit_transform'):
--> 740 res = transformer.fit_transform(X, y, **fit_params)
741 else:
742 res = transformer.fit(X, y, **fit_params).transform(X)
~/.local/lib/python3.8/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
688 if y is None:
689 # fit method of arity 1 (unsupervised transformation)
--> 690 return self.fit(X, **fit_params).transform(X)
691 else:
692 # fit method of arity 2 (supervised transformation)
~/.local/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in fit(self, X, y)
334 # Reset internal state before fitting
335 self._reset()
--> 336 return self.partial_fit(X, y)
337
338 def partial_fit(self, X, y=None):
~/.local/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in partial_fit(self, X, y)
367
368 first_pass = not hasattr(self, 'n_samples_seen_')
--> 369 X = self._validate_data(X, reset=first_pass,
370 estimator=self, dtype=FLOAT_DTYPES,
371 force_all_finite="allow-nan")
~/.local/lib/python3.8/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
418 f"requires y to be passed, but the target y is None."
419 )
--> 420 X = check_array(X, **check_params)
421 out = X
422 else:
~/.local/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~/.local/lib/python3.8/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
596 array = array.astype(dtype, casting="unsafe", copy=False)
597 else:
--> 598 array = np.asarray(array, order=order, dtype=dtype)
599 except ComplexWarning:
600 raise ValueError("Complex data not supported\n"
~/.local/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
~/.local/lib/python3.8/site-packages/pandas/core/series.py in __array__(self, dtype)
795 dtype='datetime64[ns]')
796 """
--> 797 return np.asarray(self.array, dtype)
798
799 # ----------------------------------------------------------------------
~/.local/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
~/.local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py in __array__(self, dtype)
209
210 def __array__(self, dtype=None) -> np.ndarray:
--> 211 return np.asarray(self._ndarray, dtype=dtype)
212
213 _HANDLED_TYPES = (np.ndarray, numbers.Number)
~/.local/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
ValueError: setting an array element with a sequence.
I have no problemas using only:
features = make_column_transformer(
(transf,'textimage'),(transf,'subtitle'),
(scaler, metadata),remainder ='drop')
So my problem is the column 'F3' in my dataframe, which is an array in each row:
0 [0.0026778684, 0.003117677, 0.00040434036, 0.0...
1 [0.061992627, 0.047432333, 0.012270351, 0.0102...
2 [0.0, 0.0, 0.0, 4.3830705e-06, 1.3149212e-05, ...
3 [0.30314153, 0.04477268, 0.01840577, 0.0319251...
4 [0.2563626, 0.03259786, 0.018686974, 0.0198365...
...
1287 [0.11471527, 0.032394826, 0.012400794, 0.01131...
1288 [0.002138354, 0.001044489, 0.0007786191, 0.001...
1289 [0.056204572, 0.026556363, 0.02082041, 0.01966...
1290 [0.051759016, 0.0058623934, 0.0054726205, 0.00...
1291 [0.0, 5.4140626e-05, 4.4114586e-05, 4.8125003e...
Name: F3, Length: 1292, dtype: object
Can anyone help me with that? How can I change a column into a list into a pipeline, or how can I concatenate the tranform with a list? Any suggestions?

Python - Print updating counter not working

This is my code. The goal is to print a counter that updates the number of the page that's being checked within the same lane, replacing the old one:
import time
start_page = 500
stop_page = 400
print 'Checking page ',
for n in range(start_page,stop_page,-1):
print str(n),
time.sleep(5) # This to simulate the execution of my code
print '\r',
This doesn't print anything:
$ python test.py
$
I'm using Python 2.7.10, the line that causes problems is probably this print '\r', because if I run this:
import time
start_page = 500
stop_page = 400
print 'Checking page ',
for n in range(start_page,stop_page,-1):
print str(n),
time.sleep(5) # This to simulate the execution of my code
#print '\r',
I have this output:
$ python test.py
Checking page 500 499 498 497 496 495 494 493 492 491 490 489 488 487 486 485 484 483 482 481 480 479 478 477 476 475 474 473 472 471 470 469 468 467 466 465 464 463 462 461 460 459 458 457 456 455 454 453 452 451 450 449 448 447 446 445 444 443 442 441 440 439 438 437 436 435 434 433 432 431 430 429 428 427 426 425 424 423 422 421 420 419 418 417 416 415 414 413 412 411 410 409 408 407 406 405 404 403 402 401
$
Remove the comas after the print expressions:
print 'Checking page ',
print str(n),
print '\r',
PS: Since I got asked, the first thing to notice is that print is not a function it is a statement, hence it is not interpreted in the same way.
In the print case in particular, adding a ',' after the print will make it print the content without a newline.
In the case of your program, in particular, what it was doing is:
printing 'Checking page' -> NO \n here
printing n -> no \n here
printing '\r' -> again no '\n' here
Since you were not sending any new lines to the output, your OS didn't flush the data. You can add a sys.stdout.flush() after the print('\r') and see it changing if you want.
More on the print statement here.
https://docs.python.org/2/reference/simple_stmts.html#grammar-token-print_stmt
Why the hell I got downvoted? o.O

pandas: FloatingPointError with np.seterr(all='raise') and missing data

I'm getting a FloatingPointError when I want to look at data involving missing data.
import numpy as np
import pandas as pd
np.seterr(all='raise')
s = pd.Series([np.nan,np.nan,np.nan],index=[1,2,3]); print(s); print(s.head())
I'm on the newest version of pandas, installed via
conda install -f pandas
after pkill python and conda remove pandas.
Here's the trace back:
Out[4]: ---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
695 type_pprinters=self.type_printers,
696 deferred_pprinters=self.deferred_printers)
--> 697 printer.pretty(obj)
698 printer.flush()
699 return stream.getvalue()
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
381 if callable(meth):
382 return meth(obj, self, cycle)
--> 383 return _default_pprint(obj, self, cycle)
384 finally:
385 self.end_group()
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
501 if _safe_getattr(klass, '__repr__', None) not in _baseclass_reprs:
502 # A user-provided repr. Find newlines and replace them with p.break_()
--> 503 _repr_pprint(obj, p, cycle)
504 return
505 p.begin_group(1, '<')
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _repr_pprint(obj, p, cycle)
683 """A pprint that just redirects to the normal repr function."""
684 # Find newlines and replace them with p.break_()
--> 685 output = repr(obj)
686 for idx,output_line in enumerate(output.splitlines()):
687 if idx:
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/base.pyc in __repr__(self)
61 Yields Bytestring in Py2, Unicode String in py3.
62 """
---> 63 return str(self)
64
65
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/base.pyc in __str__(self)
41 if compat.PY3:
42 return self.__unicode__()
---> 43 return self.__bytes__()
44
45 def __bytes__(self):
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/base.pyc in __bytes__(self)
53
54 encoding = get_option("display.encoding")
---> 55 return self.__unicode__().encode(encoding, 'replace')
56
57 def __repr__(self):
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/series.pyc in __unicode__(self)
954
955 self.to_string(buf=buf, name=self.name, dtype=self.dtype,
--> 956 max_rows=max_rows)
957 result = buf.getvalue()
958
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/series.pyc in to_string(self, buf, na_rep, float_format, header, length, dtype, name, max_rows)
992 the_repr = self._get_repr(float_format=float_format, na_rep=na_rep,
993 header=header, length=length, dtype=dtype,
--> 994 name=name, max_rows=max_rows)
995
996 # catch contract violations
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/series.pyc in _get_repr(self, name, header, length, dtype, na_rep, float_format, max_rows)
1022 float_format=float_format,
1023 max_rows=max_rows)
-> 1024 result = formatter.to_string()
1025
1026 # TODO: following check prob. not neces.
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in to_string(self)
223
224 fmt_index, have_header = self._get_formatted_index()
--> 225 fmt_values = self._get_formatted_values()
226
227 maxlen = max(self.adj.len(x) for x in fmt_index) # max index len
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in _get_formatted_values(self)
213 return format_array(self.tr_series._values, None,
214 float_format=self.float_format,
--> 215 na_rep=self.na_rep)
216
217 def to_string(self):
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
1974 justify=justify)
1975
-> 1976 return fmt_obj.get_result()
1977
1978
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in get_result(self)
1990
1991 def get_result(self):
-> 1992 fmt_values = self._format_strings()
1993 return _make_fixed_width(fmt_values, self.justify)
1994
/home/xxx/.conda/envs/myenv2/lib/python2.7/site-packages/pandas/core/format.pyc in _format_strings(self)
2085
2086 # this is pretty arbitrary for now
-> 2087 has_large_values = (abs_vals > 1e8).any()
2088 has_small_values = ((abs_vals < 10 ** (-self.digits)) &
2089 (abs_vals > 0)).any()
FloatingPointError: invalid value encountered in greater
Whenever you import pandas, all numpy errors are set to be ignore. This is currently undocumented behavior.
This is done in pandas/compat/numpy_compat.py
# TODO: HACK for NumPy 1.5.1 to suppress warnings
# is this necessary?
try:
np.seterr(all='ignore')
except Exception: # pragma: no cover
pass
Here's how that plays out
In [1]: import numpy as np
In [2]: np.geterr()
Out[2]: {'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'}
In [3]: import pandas as pd
In [4]: np.geterr()
Out[4]: {'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'}
In [5]: s = pd.Series([np.nan,np.nan,np.nan],index=[1,2,3]); print(s); print(s.head())
1 NaN
2 NaN
3 NaN
dtype: float64
1 NaN
2 NaN
3 NaN
dtype: float64
In [6]: np.seterr(invalid='raise')
Out[6]: {'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'}
In [7]: s = pd.Series([np.nan,np.nan,np.nan],index=[1,2,3]); print(s); print(s.head())
FloatingPointError: invalid value encountered in greater
The "solution" is hence to not np.seterr(invalid'raise'), whenever you use pandas (especially when working with missing data).

how to print only lines which contain a substring

i have a file with strings:
REALSTEP 12342 {2012-7-20 15:10:39};[{416 369 338};{423 432 349};{383 380 357};{399 401 242};{0 454 285};{457 433 115};{419 455 314};{495 534 498};][{428 377 336};{433 456 345};{386 380 363};{384 411
REALSTEP 7191 {2012-7-20 15:10:41};[{416 370 361};{406 417 376};{377 368 359};{431 387 251};{0 461 366};{438 409 134};{429 411 349};{424 505 364};][{423 372 353};{420 433 374};{379 365 356};{431 387 2
REALSTEP 12123 {2012-7-20 15:10:42};[{375 382 329};{386 402 347};{374 378 357};{382 384 259};{0 397 357};{442 424 188};{398 384 356};{392 420 355};][{404 405 359};{420 432 372};{405 408 383};{413 407
REALSTEP 27237 {2012-7-20 15:10:44};[{431 375 329};{416 453 334};{387 382 349};{397 403 248};{0 451 300};{453 422 131};{433 401 317};{434 505 326};][{443 384 328};{427 467 336};{391 386 344};{394 413
FAKE 32290 {2012-7-20 15:10:48};[{424 399 364};{408 446 366};{397 394 389};{415 409 261};{0 430 374};{445 428 162};{432 416 375};{431 473 380};][{424 398 376};{412 436 372};{401 400 390};{417 409 261}
FAKE 32296 {2012-7-20 15:10:53};[{409 326 394};{445 425 353};{401 402 357};{390 424 250};{0 420 353};{447 423 143};{404 436 351};{421 527 420};][{410 332 400};{450 429 356};{402 403 356};{391 425 250}
FAKE 32296 {2012-7-20 15:10:59};[{381 312 387};{413 405 328};{320 387 376};{388 387 262};{0 402 326};{417 418 177};{407 409 335};{443 502 413};][{412 336 417};{446 437 353};{343 417 403};{417 418 258}
FAKE 32295 {2012-7-20 15:11:4};[{377 314 392};{416 403 329};{322 388 375};{385 391 261};{0 403 329};{425 420 168};{414 393 330};{458 502 397};][{408 338 421};{449 435 355};{345 418 403};{413 420 257};
FAKE 32295 {2012-7-20 15:11:9};[{371 318 411};{422 385 333};{342 379 352};{394 395 258};{0 440 338};{418 414 158};{420 445 346};{442 516 439};][{401 342 441};{456 415 358};{367 407 377};{420 420 255};
FAKE 32296 {2012-7-20 15:11:15};[{373 319 412};{423 386 333};{344 384 358};{402 402 257};{0 447 342};{423 416 151};{422 450 348};{447 520 442};][{403 342 442};{456 416 358};{366 409 379};{422 421 255}
REALSTEP 7191 {2012-7-20 15:10:41};[{416 370 361};{406 417 376};{377 368 359};{431 387 251};{0 461 366};{438 409 134};{429 411 349};{424 505 364};][{423 372 353};{420 433 374};{379 365 356};{431 387 2
REALSTEP 12123 {2012-7-20 15:10:42};[{375 382 329};{386 402 347};{374 378 357};{382 384 259};{0 397 357};{442 424 188};{398 384 356};{392 420 355};][{404 405 359};{420 432 372};{405 408 383};{413 407
REALSTEP 27237 {2012-7-20 15:10:44};[{431 375 329};{416 453 334};{387 382 349};{397 403 248};{0 451 300};{453 422 131};{433 401 317};{434 505 326};][{443 384 328};{427 467 336};{391 386 344};{394 413
I read the file with readlines() and want to then loop over the lines and print only when there is a consecutive block of lines larger than 3, containing the string "REALSTEP". So in the example the expected result is:
REALSTEP 12342 {2012-7-20 15:10:39};[{416 369 338};{423 432 349};{383 380 357};{399 401 242};{0 454 285};{457 433 115};{419 455 314};{495 534 498};][{428 377 336};{433 456 345};{386 380 363};{384 411
REALSTEP 7191 {2012-7-20 15:10:41};[{416 370 361};{406 417 376};{377 368 359};{431 387 251};{0 461 366};{438 409 134};{429 411 349};{424 505 364};][{423 372 353};{420 433 374};{379 365 356};{431 387 2
REALSTEP 12123 {2012-7-20 15:10:42};[{375 382 329};{386 402 347};{374 378 357};{382 384 259};{0 397 357};{442 424 188};{398 384 356};{392 420 355};][{404 405 359};{420 432 372};{405 408 383};{413 407
REALSTEP 27237 {2012-7-20 15:10:44};[{431 375 329};{416 453 334};{387 382 349};{397 403 248};{0 451 300};{453 422 131};{433 401 317};{434 505 326};][{443 384 328};{427 467 336};{391 386 344};{394 413
I tried this:
lines = f.readlines()
idx = -1
#loop trough all lines in the file
for i, line in enumerate(lines):
if idx > i:
continue
else:
if "REALSTEP" in line:
steps = lines[i:i+3]
#check for blokc of realsteps
if is_block(steps, "REALSTEP") == 3:
#prints the block up to the first next "FAKE STEP"
lst = get_block(lines[i:-1])
for l in lst:
print l[:200]
idx = i + len(lst)
print "next block============"
where the function is_block is this:
def is_block(lines, check):
#print len(lines)
bool = 1
for l in lines:
if check in l:
bool = 1
else:
bool = 0
bool = bool + bool
return bool
and the function get_block:
def get_block(lines):
lst = []
for l in lines:
if "REALSTEP" in l:
#print l[:200]
lst.append(l)
else:
break
return lst
While this works, it prints all lines containing the string "REALSTEPS". The print len(lines) in is_block(lines) is always 10 when the function is called so that is not it.
I am confused, please help me out here!
Here's a simple solution containing the logic you need:
to_print = []
count = 0
started = False
for line in f.readlines():
if "REALSTEP" in line:
if not started:
started = True
to_print.append(line)
count += 1
else:
if count > 3: print('\n'.join(to_print))
started = False
count = 0
to_print = []
It counts any line that has the string "REALSTEP" in it as valid. Produces the desired output.
This part:
...
if "REALSTEP" in line:
steps = lines[i:i+3]
for s in steps:
print s[:200] # <- right here
...
Whenever you find "REALSTEP" in a line, you retrieve the following three lines and print them right away. That's probably not what you wanted.

Categories