Read line by line using timer - python

I am trying to read each line by specific interval using python timer from a txt file
But it read first line only and it shows continuously
my code is
def read():
try:
while True:
fo = open("foo.txt", "r")
threading.Timer(1.0, read).start()
line=fo.readline()
print line
if len(line)==0:
break
except:
pass
read()

The issue is that you again open the file , which starts reading from the first line , and you read that line print it and again continue the loop, this makes the loop an infinite loop.
Also, what you are doing with threading.Timer() is not how you use it, threading.Timer() starts the function read in a new thread after 1 sec, after sometime you would have loads of threads all running the read() function indefinitely.
What you are trying to do can easily be accomplished (without using threading.Timer() ) by using time.sleep() , to make your program sleep for a specific amount of seconds. Example -
def read():
import time
with open("foo.txt", "r") as fo:
for line in fo:
print line
time.sleep(1)
read()
If you really want to use threading.Timer() , then you do not need the while loop and you should pass the file object as an argument to the function, Example -
def read(fo):
line=fo.readline()
print line
if len(line) == 0:
return
t = threading.Timer(1.0, read, args=(fo,))
t.start()
t.join()
And then call the function initially as -
with open("foo.txt", "r") as fo:
read(fo)
Example/Demo -
>>> def a(x):
... if x >50:
... return
... print(x)
... t = threading.Timer(1.0 , a, args=(x+1,))
... t.start()
... t.join()
...
>>> import threading
>>> a(1)
1
2
3
4
5
6
7
8

Related

Pandas Read Continuously Growing CSV File

I have a continuously growing CSV File, that I want to periodically read. I am also only interested in new values.
I was hoping to do something like:
file_chunks = pd.read_csv('file.csv', chunksize=1)
while True:
do_something(next(file_chunks))
time.sleep(0.1)
in a frequency, that is faster than the .csv file is growing.
However, as soon as the iterator does not return a value once, it "breaks" and does not return values, even if the .csv file has grown in the meantime.
Is there a way to read continuously growing .csv files line by line?
you could build a try: except: around it or make and if statement that checks if file_chunks is not none first.
Like this it shouldnt break anymore and he only sleeps when he there are no more chunks left.
while True:
file_chunks = pd.read_csv('file.csv', chunksize=1)
while True:
try:
do_something(next(file_chunks))
except:
time.sleep(0.1)
This is easier to do with the standard csv module where you can write your own line iterator that knows how to read an updating file. This generator would read in binary mode so that it can track file position, close the file at EOF and poll its size for appended data. This can fail if the reader gets a partial file update because the other side hasn't flushed yet, or if a CSV cell contains and embedded new line that invalidates the reader's assumption that a binary mode newline always terminates a row.
import csv
import time
import os
import threading
import random
def rolling_reader(filename, poll_period=.1, encoding="utf-8"):
pos = 0
while True:
while True:
try:
if os.stat(filename).st_size > pos:
break
except FileNotFoundError:
pass
time.sleep(poll_period)
fp = open(filename, "rb")
fp.seek(pos)
for line in fp:
if line.strip():
yield line.decode("utf-8")
pos = fp.tell()
# ---- TEST - thread updates test.csv periodically
class GenCSVThread(threading.Thread):
def __init__(self, csv_name):
super().__init__(daemon=True)
self.csv_name = csv_name
self.start()
def run(self):
val = 1
while True:
with open(self.csv_name, "a") as fp:
for _ in range(random.randrange(4)):
fp.write(",".join(str(val) for _ in range(4)) + "\n")
val += 1
time.sleep(random.random())
if os.path.exists("test.csv"):
os.remove("test.csv")
test_gen = GenCSVThread("test.csv")
reader = csv.reader(rolling_reader("test.csv"))
for row in reader:
print(row)
A platform dependent update would be to use a facility such as inotify to trigger reads off of a file close operation to reduce the risk of partial data.

Python: Read a line from a file then remove it with threading

I have this program that uses Python threading to read different lines in a file, if it reads a duplicate line then reads another one, and once It has read it, removes the line from the file. The problem is that whenever it reads the file It doesn't update the file, or I'm not quite sure what's happening. It can sometimes read the same line as before therefore breaking it. I'm not sure if my code is the most effective way to do this?
def read_tokens_list():
tokens = []
with open('inputTokens.txt', 'r', encoding='UTF-8') as file:
lines = file.readlines()
for line in lines:
tokens.append(line.replace('\n', ''))
return tokens
def worker(token_list):
while True:
token = random.choice(token_list)
print(token)
ver = open("Fullyverified.txt", "a+")
ver.write(token + "\n")
with open("inputTokens.txt", "r") as f:
lines = f.readlines()
with open("inputTokens.txt", "w") as f:
for line in lines:
if line.strip("\n") != token:
f.write(line)
time.sleep(1)
def main():
threads = []
num_thread = input('Number of Threads: ')
num_thread = int(num_thread)
token_list = read_tokens_list() # read in the pokens.txt file
random.shuffle(token_list) # shuffle the list into random order
tokens_per_worker = len(token_list) // num_thread # how many tokens from the list each worker will get (roughly)
for i in range(num_thread):
if ((i+1)<num_thread):
num_tokens_for_this_worker = tokens_per_worker # give each worker an even share of the list
else:
num_tokens_for_this_worker = len(token_list) # except the last worker gets whatever is left
# we'll give the first (num_tokens_for_this_worker) tokens in the list to this worker
tokens_for_this_worker = token_list[0:num_tokens_for_this_worker]
# and remove those tokens from the list so that they won't get used by anyone else
token_list = token_list[num_tokens_for_this_worker:]
t = threading.Thread(target=worker, args= (tokens_for_this_worker, ))
threads.append(t)
t.start()
for t in threads:
t.join()
if __name__ == '__main__':
main()
Use a lock.
something like:
from threading import Lock
# ...
lock = Lock()
# ...
def worker(token_list, lock = lock):
# ...
with lock:
with open("inputTokens.txt", "r") as f:
lines = f.readlines()
with open("inputTokens.txt", "w") as f:
for line in lines:
if line.strip("\n") != token:
f.write(line)
# ...
The idea of the lock is to protect resources from being accessed by various threads simultaneously. So while one thread is working with the file, the others are waiting.
The next question is if this approach makes sense now, because depending of the size of your file, threads might be stuck waiting for the lock most of the time.
What about a database instead of a file? so you don't have to rewrite a full file, but just delete/update an entry

Writing a line in a file when finding a keyword

I am trying to make a function which writes a line when it finds some text inside a file.
Example: it finds "hello" in a .txt file so then writes "Hi!" in the following line. There is something else, i want it to write "Hi!" not the first time it finds "hello" but the second.
Here is what i have been trying, but i don't know if the idea is right. Any help?
def line_replace(namefilein):
print namefilein
filein=open(namefilein, "rw")
tag="intro"
filein.read()
for line in filein:
if tag=="second" or tag=="coord":
try:
filein.write("\n\n %s" %(text-to-be-added))
print line
except:
if tag=="coord":
tag="end"
else:
tag="coord"
if " text-to-find" in line:
if tag=="intro":
tag="first"
elif tag=="first":
tag="second"
filein.close()
You can use this code:
def line_replace(namefilein):
new_content = ''
first_time = False
with open(namefilein, 'r') as f:
for line in f:
new_content += line
if 'hello' in line:
if first_time:
new_content += 'Hi!' + '\n'
else:
first_time = True
with open(namefilein, 'w') as f:
f.write(new_content)
Look that I am using the with statement that in Python is a context manager, so it means, in this case, when the block of code has executed, then the file will be closed automatically.
Let's supposed you have a file my_file.txt which contents is:
hello
friend
this
is
hello
And let's say your file is in the same directory than the python file that has your code, so calling:
line_replace('my_file.txt')
will produce the following output:
hello
friend
hello
Hi!
is

How to Return to the First Line in a Urlopen Object

I am iterating a .dat file save on a http website using
import urllib2
test_file = urllib2.urlopen('http://~/file.dat')
And then, I have a function which iterates the file
def f(file):
while True:
iter = file.readline()
if iter == "":
break
print iter
If I want to call this function twice without opening the test_file again:
f(test_file)
f(test_file)
then what should I add into the f function?
Update:
Since I am not allowed to change anything outside the function, I finally came up a silly but effective solution:
def f(file):
while True:
iter = file.readline()
if iter == "":
break
print iter
global test_file
test_file = test_file = urllib2.urlopen('http://~/file.dat')
Thanks for the guys who answered my questions!
f.seek(0)
returns you to the start of the file. The argument is the byte position in the file.
So the best thing for you to do is to save the output of your f.read() to a var and then push through via the StringIO.readline() method that will work similarly to f.readline() but within memory.
import urllib2
import StringIO
t_fh = urllib2.urlopen('http://ftp.cs.stanford.edu/pub/sgb/test.dat')
test_data = t_fh.read()
def f(data):
buf = StringIO.StringIO(data)
while True:
line = buf.readline()
if line == "":
break
print line
f(test_data)

Python reload textfile into string every x seconds in a loop

I am trying to read a text file into a string, do something with the string, then ever X seconds,
re-read the text file (in case it has changed) to update the string and do the same thing again, over and over in a loop, without spawning an infinite number of processes.
so something like :
in an infinite loop
open 'MyTextFile' and read it into a String
do stuff with the string of text it reads from the file
close the file (if I need to...to allow another script to write to it)
wait x seconds before clearing the string and re-reading the
same 'MyTextFile' file to update the string and do it all again
(in an infinite loop until I tell it to stop)
What would be a good sturdy (reliable to run for a long time) way to do that?
Thanks!
<code>
import os
import time
myDataFile = "MyData.txt"
data = ""
def ReadFile(data):
# only need to read it if data has changed
# detect 'modified' date of myDataFile and only PrintToPrinter IF it has change
# and if it has NOT changed, wait 10 seconds and check again
# IF CHANGED
with open (myDataFile, "r") as myFile:
try:
data=myFile.read().replace('\n', ' ')
finally:
myFile.close()
ShowOnScreen(data)
PrintToPrinter(data)
# ELSE
# data has not changed..
# no need to print it so I shouldn't send back to PrintToPrinter
# but I still want to ShowOnScreen(data) on screen
# and keep the program running to check for changes
ShowOnScreen(data)
sleep(10)
ReadFile(data)
def ShowOnScreen(data):
print(time+' '+data)
def PrintToPrinter(data):
# send DateTimeStamp+data to printer here
# go back to checking for changes
ReadFile(data)
# kick off the program to start looking for changes
# data starts out as "" so it should always read the first time
# and send to printer and screen
ReadFile(data)
</code>
This can be easily done using the time.sleep from the time module. A sample code is pasted below:
import time
def file_handler(filename):
with open(filename) as fh:
line = fh.read()
print line
print "Length of string:%s"% len(line)
fh.close()
while True:
file_handler("test.txt")
time.sleep(10)
Ended up doing something like this
#!/usr/bin/env python
import sys
import time from sleep
import datetime
import time
# probably should not import both time and datetime
MyFile = "MyFile.txt"
MyText = ""
MyDate = ""
def GetDateTime():
MyDate = time.strftime("%x %I:%M %p")
return MyDate+" "
def ReadFile(MyText):
with open (MyFile, "r" as myfile:
try:
MyText2=myfile.read().replace('\n',' ')
finally:
myfile.close()
CurDateTime = GetDateTime()
if (MyText == MyText2):
# file hasn't changed...print to screen only
print CurDateTime
sleep(5)
ReadFile(MyText2)
else:
# print to screen and printer
print CurDateTime +MyText2
# send CurDateTime +MyText2 to printer
sleep(5)
ReadFile(MyText2)
ReadFile(MyText)
Hope this helps somebody

Categories