Reading file remotly using paramiko (or not) in python - python

I would like to read a file in a remote machine. I can do it using paramiko.
The file is constantly updated by newlines. I have tried to implement a python script for reading it. Here the interesting part of the code:
import glob
import sys
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import os
import pandas as pd
from scipy.linalg import norm
import time
import paramiko
import select
if __name__ == "__main__":
print("...starting")
# a lot of stuff here in the middle
ssh_client = paramiko.SSHClient()
ssh_client.load_system_host_keys()
ssh_client.connect(hostname='xxx.xx.xx.xxx',username='user',password='pass')
print("...starting transport:")
transport = ssh_client.get_transport()
channel = transport.open_session()
channel.exec_command("cat /tmp/ciao.txt")
while True:
rl, wl, xl = select.select([channel],[],[],0.0)
#print(rl.readlines())
if len(rl) > 0:
#print("printing")
string_in_file = channel.recv(1024)
if len(string_in_file) > 0:
#print("printing")
print(string_in_file)
Problem: the file is correctly read at the beginning and after, every newly written line is completely ignored or, at least, it does not produce any effect on the output of the proposed script. Any suggestions on how to read new lines when written?
Any other idea on how to achieve the same result (even without paramiko) is more than welcome. The only restriction is the use of python.

tail -f will keep following the file giving you more output as you go.
import glob
import sys
import os
import time
import paramiko
import select
if __name__ == "__main__":
print("...starting")
# a lot of stuff here in the middle
ssh_client = paramiko.SSHClient()
ssh_client.load_system_host_keys()
# for test get "user,pw" in ./test.pw
user, pw = open('test.pw').readline().strip().split(",")
ssh_client.connect(hostname='localhost',username=user,password=pw)
print("...starting transport:")
transport = ssh_client.get_transport()
channel = transport.open_session()
# 1GB is include first Gig - just a way to get all of the
# file instead of the last few lines
# include --follow=name instead of -f if you want to keep
# following files that are log rotated
channel.exec_command("tail -f --lines=1GB /tmp/test.txt")
while True:
# (don't melt cpu's with a zero timeout)
rl, wl, xl = select.select([channel],[],[])
#rl, wl, xl = select.select([channel],[],[],0.0)
if rl:
string_in_file = channel.recv(1024)
if len(string_in_file) > 0:
print(string_in_file)
else:
print("channel disconnected")
break

Related

script has a huge delay at the beginning

I have multiple python scripts in various complexity I am running from another application as part of data load steps.
Some are in production for more than 2 years some new.
I have observed lately a script starts and does nothing for 10+ minutes.
Example : The application has the start timestamp as 2022-05-17 15:48:42
The first entry in the log file for this script is 2022-05-17 16:01:04
Script logs the start time as a first step.
The list of libraries imported for the example run above is a short one since this is one of the areas I looked at
import snowflake.connector
import json
import os
import datetime
import threading
from pathlib import Path
Any ideas what could be causing this behavior?
How can I further debug this?
Update : Here is the minimal example of the script
import snowflake.connector
import json
import os
import datetime
import threading
from pathlib import Path
def to_log(msg, include_timestamp=True):
"""Write the log entry to log file"""
# add the current date to the log file name in order to create a file for everyday
log_file = Path(
work_dir
/ (
f"log/kafka_loader_"
+ "{:%Y%m%d}".format(datetime.datetime.now())
+ ".log"
)
)
if include_timestamp:
msg = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) + f" {msg}"
print(msg)
with open(log_file, "a") as l_f:
l_f.write(msg + "\n")
def proces():
...
if __name__ == "__main__":
to_log("Starting a new session")
process()
to_log("Session completed")
The application used to invoke the script is called WhereScape.
In the UI I see the started start timestamp above.

Using stdout and stdin in a loop in python leading to errors

I am using stdout and stdin to communicate information between two python programs. tester.py should pass telemetry data into helper.py and helper.py should return some command to tester.py.
This seems to work when run without a loop, but when I put the code in tester.py inside a loop that updates the telemetry data, helper.py no longer seems able to pass back the correct command. The console print out is as follows:
b'\x00\x00\x00\x00\x01\x00\x00\x00'
0.0
b''
Traceback (most recent call last):
File "/Users/Advay/Documents/PyCharm/zip_sim/tester.py", line 44, in <module>
varr = COMMAND_STRUCT.unpack(cmd)
struct.error: unpack requires a buffer of 8 bytes
The tester.py:
import sys
import subprocess
import struct
TELEMETRY_STRUCT = struct.Struct(">fB3s")
COMMAND_STRUCT = struct.Struct(">fB3s")
helper = subprocess.Popen(['python3', 'helper.py'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
drop = 1
padding = bytes(3)
for i in range(5):
speed = i
helper.stdin.write(TELEMETRY_STRUCT.pack(speed, drop, padding))
helper.stdin.flush()
cmd = helper.stdout.read(COMMAND_STRUCT.size)
print(cmd)
varr = COMMAND_STRUCT.unpack(cmd)
print(varr[0])
and the helper.py:
import os
import random
import sys
import struct
TELEMETRY_STRUCT = struct.Struct(">fB3s")
COMMAND_STRUCT = struct.Struct(">fB3s")
telemetry = sys.stdin.buffer.read(TELEMETRY_STRUCT.size)
a = TELEMETRY_STRUCT.unpack(telemetry)
command = COMMAND_STRUCT.pack(a[0], 1, bytes(3))
sys.stdout.buffer.write(command)
sys.stdout.buffer.flush()
Any help would be appreciated a lot, I am at a complete loss as to why it. does not work in the loop.
You're trying to send multiple commands from tester.py to helper.py, but helper.py only reads a single command and then exits -- there is no loop that would allow it to continue receiving additional commands from tester.py.
When you run tester.py, the first loop iteration succeeds, but the subsequent iteration fails because the helper.stdout.read() returns an empty value (because the helper has exited).
You need to structure your helper.py so that it can receive multiple commands.
For example:
import os
import random
import sys
import struct
TELEMETRY_STRUCT = struct.Struct(">fB3s")
COMMAND_STRUCT = struct.Struct(">fB3s")
while True:
telemetry = sys.stdin.buffer.read(TELEMETRY_STRUCT.size)
if not telemetry:
break
a = TELEMETRY_STRUCT.unpack(telemetry)
command = COMMAND_STRUCT.pack(a[0], 1, bytes(3))
sys.stdout.buffer.write(command)
sys.stdout.buffer.flush()
With this change, running tester.py results in:
b'\x00\x00\x00\x00\x01\x00\x00\x00'
0.0
b'?\x80\x00\x00\x01\x00\x00\x00'
1.0
b'#\x00\x00\x00\x01\x00\x00\x00'
2.0
b'##\x00\x00\x01\x00\x00\x00'
3.0
b'#\x80\x00\x00\x01\x00\x00\x00'
4.0

How to fix delays/lag in python script

I have my python script that executes an mp3 when the current time matches the time specified in a text file. However everything works well but I notice a lag and delay of around 18 seconds before mplayer plays the mp3 file.
Is there anyway of making my python script better in order to get rid of the 18 seconds lag and make the mp3 file play instantaneously?
Here is my python script:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# import libraries
import json
import urllib2
from bs4 import BeautifulSoup
import requests
import datetime
import playsound
import os
import subprocess
dateSTR = datetime.datetime.now().strftime('%H:%M')
f = open('/home/pi/test.txt','r')
messagetest = f.read()
newnametest = messagetest.replace("\n","")
f.close()
if (dateSTR) == (newnametest):
os.system("mplayer -ao alsa:device=bluealsa /home/pi/test.mp3")
Try starting mplayer in a subprocess before you actually need it as:
p = subprocess.Popen('mplayer -slave -idle -ao alsa:device=bluealsa', shell=True, stdin=subprocess.PIPE)
That should start up mplayer and have it waiting for when you need it. Then, when you want to play a file, do:
p.communicate(input=b'loadfile /home/pi/test.mp3\n')
I'd create a loop, something like:
from time import sleep
from datetime import datetime
...
done = []
while 1:
dateSTR = datetime.now().strftime('%H:%M')
if (dateSTR) == (newnametest) and not dateSTR in done:
done.append(dateSTR)
os.system("mplayer -ao alsa:device=bluealsa /home/pi/test.mp3")
sleep(1)

Problems using the Python interface of the Berkeley Parser

I am using the berkeley parser's interface in Python. I want to use the parser by having the input as a string and not a file. In this document, the usage is explained: https://github.com/emcnany/berkeleyinterface/blob/master/examples/example.py
Here is the documentation for the interface
https://github.com/emcnany/berkeleyinterface/blob/master/BerkeleyInterface/berkeleyinterface.py
I am following that guide but when I'm running the code below, nothing happens after reaching the last line and the code never finishes.
import os
from BerkeleyInterface import *
from StringIO import StringIO
JAR_PATH = r'C:\berkeleyparser\parser.jar'
GRM_PATH = r'C:\berkeleyparser\english.gr'
cp = os.environ.get("BERKELEY_PARSER_JAR", JAR_PATH)
gr = os.environ.get("BERKELEY_PARSER_GRM", GRM_PATH)
startup(cp)
args = {"gr":gr, "tokenize":True}
opts = getOpts(dictToArgs(args))
parser = loadGrammar(opts)
print("parser loaded")
strIn = StringIO("hello world how are you today")
strOut = StringIO()
parseInput(parser, opts, outputFile=strOut)

How to display a heatmap created in python using rpy2?

I am currently trying to generate a heatmap in python from a text file, using R commands (with rpy2). It works fine in R, but when I take it to python, the Quartz interface displays quickly and then closes. I would like either to be able to save the quartz display to a file, or directly save my heatmap to a file without displaying it.
Here is the code I have been using:
import rpy2.robjects as robjects
robjects.r('''
library("gplots")
data = read.csv("/Users/.../Heatmap_data.txt")
DF = data.frame(data)
MD = data.matrix(DF,rownames.force=NA)
heatmap.2(MD, scale="none", col=redgreen(100), cexRow=0.1, key=FALSE, symkey=FALSE, trace="none", Colv=FALSE)
''')
I'm using python 2.7, on OS X Yosemite.
Thank you for any help.
import numpy as np
import rpy2.robjects as ro
import rpy2.robjects.numpy2ri
ro.numpy2ri.activate()
R = ro.r
data = np.random.random((10, 10))
R.png(file='/tmp/out.png')
R.heatmap(data)
R("dev.off()")
writes to the file /tmp/out.png without displaying the image:
.
Preventing the displayed image from immediately closing can be done like this:
script.py:
import numpy as np
import rpy2.robjects as ro
import rpy2.robjects.numpy2ri
import rpy2.rinterface as rinterface
import time
import threading
ro.numpy2ri.activate()
R = ro.r
def ion():
def r_refresh(interval = 0.03):
while True:
rinterface.process_revents()
time.sleep(interval)
t = threading.Thread(target=r_refresh)
t.daemon = True
t.start()
ion()
data = np.random.random((10, 10))
R.heatmap(data)
R("dev.copy(png,'/tmp/out2.png')")
R("dev.off()")
try:
# for Python2
raw_input()
except NameError:
# for Python3
input()
The raw_input or input call prevents the Python interpreter from exiting, thus allowing the window to stay open, until the user presses Enter.
The ion function calls rinterface.process_revents() periodically so the
displayed window will react to GUI events such as resizing or being closed.
dev.copy(png,'/tmp/out2.png') saves the already-displayed image to a
file.

Categories