Check if timestamp is between limits - python

I m looking for a solution to make inside a reading log file processus a compare to check the time range within of the current line
currently I have :
variables : datetime_ed & datetime_sd as enddate & startdate with each fd_fmt as '%Y-%m-%d-%H-%M'
the current existing code block looks like
with open(f, "r") as fi:#, open(output_file, "w") as fo:
for line in fi:
#for pattern in REGEXES:
if REGEXES[0].search(line): # search by ID instead of the wordslist at first
match = REGEXES[1].search(line)
if match:
line_id = match.group(1)
# doing stuff here
so I would do something like
with open(f, "r") as fi:#, open(output_file, "w") as fo:
for line in fi:
#for pattern in REGEXES:
if REGEXES[0].search(line): # search by ID instead of the wordslist at first
match = REGEXES[1].search(line)
#ONLY IF LINE CONTAIN DATE BETWEEN datetime_ed & datetime_sd
if match:
line_id = match.group(1)
# doing stuff here
how to do that ?
didn't found a function to do so in datetime documents
as temporary solution I made that :
with open(f, "r") as fi:#, open(output_file, "w") as fo:
for line in fi:
try:
ts = datetime.datetime.strptime(line[:16], fd_fmt) # try/except to avoid issues with non-dated lines
except ValueError:
pass
#print("ts ",ts)
if datetime_sd < ts < datetime_ed :
#for pattern in REGEXES:
if REGEXES[0].search(line): # search by ID instead of the wordslist at first
match = REGEXES[1].search(line)
if match:
#etc...
Is this good / correct ?

Related

Python - Search a list of a group strings in text file

I want to search a list of group of strings inside a text file (.txt or .log).
it must include group A or B (or CDE..).
group A OR B each words need in the same line but not near by. (eg. ["123456", "Login"] or ["123457", "Login"] if in the same line then save it to a new txt file.
Some of example output line:
20221110,1668057560.965,AE111,123457,0,"Action=Account Login,XXX,XXX",XXX,XXX
20221110,1668057560.965,AE112,123458,0,"Action=Account Login,XXX,XXX",XXX,XXX
20221111,1668057560.965,AE113,123458,0,"Action=Order,XXX,XXX",XXX,XXX
below is my code:
import os, re
path = "Log\\"
file_list = [path + f for f in os.listdir(path) if f.endswith('.log')]
keep_phrases1 = ["123456", "Login"]
keep_phrases2 = ["123457", "Login"]
pat = r"\b.*?\b".join([re.escape(word) for word in keep_phrases1])
pat = re.compile(r"\b" + pat + r"\b")
pat2 = r"\b.*?\b".join([re.escape(word) for word in keep_phrases2])
pat2 = re.compile(r"\b" + pat2 + r"\b")
print(pat2,pat)
if len(file_list) != 0:
for infile in sorted(file_list):
with open(infile, encoding="latin-1") as f:
f = f.readlines()
for line in f:
found1 = pat.search(line)
found2 = pat2.search(line)
if found1 or found2:
with open(outfile, "a") as wf:
wf.write(line)
It's works for me but not easy to add more group of words. And I think the code is not good for understand?
My problems is How can I simplify the code?
How can I easier to add other group to search? e.g. ["123458", "Login"] ["123456", "order"] ["123457", "order"]
import os, re
path = "Log\\"
file_list = [path + f for f in os.listdir(path) if f.endswith('.log')]
All keep_phrases in a container, I choose a dictionary but since they are identified by order, it could have been a list:
keep_phrases = {'keep_phrases1': ["123456", "Login"], 'keep_phrases2':["123457", "Login"]}
# Alternative, a list would work:
# keep_phrases = [["123456", "Login"], ["123457", "Login"]]
Now let's generate a list with the compiled patterns:
def compile_pattern(keep_phrase):
pat = r"\b.*?\b".join([re.escape(word) for word in keep_phrase])
pat = re.compile(r"\b" + pat + r"\b")
return pat
patterns = [compile_pattern(keep_phrases[keep_phrase]) for keep_phrase in keep_phrases.keys()]
# if keep_phrases had been a list, we would do
# patterns = [compile_pattern(keep_phrase) for keep_phrase in keep_phrases]
Finally, we look for matches for every pattern and if we get any finding, we write to file.
if len(file_list) != 0:
for infile in sorted(file_list):
with open(infile, encoding="latin-1") as f:
f = f.readlines()
for line in f:
findings = [pat.search(line) for pat in patterns] # can do this because there's a list with patterns
if any(findings):
with open(outfile, "a") as wf:
wf.write(line)
Try, this. I read the whole file in a string to make code fast and readable, findall will return a list with all matching lines for the file.
If memory is a problem the pattern also works on individual lines:
import re
file_list=["sit.txt"]
keep_phrases=[["123456", "Login"],["123457", "Login"]]
pat = [r"(?:.*?(?:" + p1 + r"\b.*?"+p2+r".*?(?:\n|$)))" for p1,p2 in keep_phrases]
pat= r"|".join(pat)
for infile in sorted(file_list):
with open(infile, encoding="latin-1") as f:
text=f.read()
print(re.findall(pat,text))
Without regex
def match_words(line, words):
return all(word in words for word in line)
with open(infile, encoding="latin-1") as f:
f = f.readlines()
for line in f:
split_line = line.split(",")
if any( match_words(split_line , word) for word in [keep_phrases1, keep_phrases2]):
with open(outfile, "a") as wf:
wf.write(line)

How to replace two different lines of text?

I need to create a file that changes the date and name of a .txt, but I can only change one or the other with this code I found on the internet, can anyone give me any tips?
Print
import os
from ast import Str
file = open("example.txt", "r")
replacement = ""
data = "02/07/2022"
name = "Alan"
for line in file:
line = line.strip()
changes = line.replace("__/__/____", data)
replacement = replacement + changes + "\n"
file.close()
fout = open("final.txt", "w")
fout.write(replacement)
fout.close()
You don't need to do this a line a time. You can replace that entire program with this:
data = "02/07/2022"
name = "Alan"
text = open("example.txt", "r").read().replace("__/__/____", data)
open("final.txt", "w").write(text)

What is the correct way to list a specific string in txt file then place it in a csv file? [python]

I have this sample_vsdt.txt file containing SHA-1 and description like this inside of my txt file:
Scanning samples_extracted\02b809d4edee752d9286677ea30e8a76114aa324->(Microsoft RTF 6008-0)
->Found Virus [Possible_SCRDL]
Scanning samples_extracted\0349e0101d8458b6d05860fbee2b4a6d7fa2038d->(Adobe Portable Document Format(PDF) 6015-0)
->Found Virus [TROJ_FRS.VSN11I18]
Example:
SHA-1: 02b809d4edee752d9286677ea30e8a76114aa324
Description:(Microsoft RTF 6008-0)
Problem:
My task is to list those SHA-1 and Description in my txt file then list it in a csv file, I was able to do that using regex,prefix and delimeter. However this example is what makes it hard for me:
Scanning samples_extracted\0191a23ee122bdb0c69008971e365ec530bf03f5
- Invoice_No_94497.doc->Found Virus [Trojan.4FEC5F36]->(MIME 6010-0)
- Found 1/3 Viruses in samples_extracted\0191a23ee122bdb0c69008971e365ec530bf03f5
It has different line pattern and I only want to get the SHA-1 in the first line not the 4th line and get the description in the second line.
Output:
The output went wrong because the description (MIME 6010-0) was put in the SHA-1 column.
0191a23ee122bdb0c69008971e365ec530bf03f5
(MIME 6010-0)
02b809d4edee752d9286677ea30e8a76114aa324 (Microsoft RTF 6008-0)
0349e0101d8458b6d05860fbee2b4a6d7fa2038d (Adobe Portable Document Format(PDF) 6015-0)
035a7afca8b72cf1c05f6062814836ee31091559 (Adobe Portable Document Format(PDF) 6015-0)
Code
import csv
import re
INPUTFILE = 'samples_vsdt.txt'
OUTPUTFILE = 'output.csv'
PREFIX = '\\'
DELIMITER = '->'
DELIMITER2 = ']->'
PREFIX2 = ' - '
def read_text_file(inputfile):
data = []
with open(inputfile, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.rstrip('\n')
if re.search(r'[a-zA-Z0-9]{40}', line) and not "Found" in line: # <----
line = line.split(PREFIX, 1)[-1]
parts = line.split(DELIMITER)
data.append(parts)
else:
if "->(" in line and "Found" in line :
matched_words=(re.search(r'\(.*?\)',line))
sha =(re.search(r'[a-zA-Z0-9]{40}',line))
if matched_words!=None:
matched_words=matched_words.group()
matched_words=matched_words.split("]->")
data.append(matched_words)
#data.append(parts)
return data
def write_csv_file(data, outputfile):
with open(outputfile, 'wb') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"')
for row in data:
csvwriter.writerow(row)
def main():
data = read_text_file(INPUTFILE)
write_csv_file(data, OUTPUTFILE)
if __name__ == '__main__':
main()
Here is the full content of my text file:
sample_vsdt.txt
I changed some logic, maybe I can give you some different ideas.
Basically it checks if the string Scanning samples_extracted is present with (, which means that the description is on the same line of the sha.
Otherwise with only Scanning samples_extracted means that the desc is on the following line ( in your example there are some blank line, I had to add a while cycle )
Prints the result, cherry-pick logic and put in your program.
import re
with open("vCjjGQxe.txt") as f:
for line in f:
if "Scanning samples_extracted" in line and "(" in line:
sha = re.search('\\\(.*)->', line).group(1)
desc = re.search('->\((.*)\)', line).group(1)
print("SHA-1:", sha)
print("Description:", desc)
continue
if "Scanning samples_extracted" in line:
sha = re.search('\\\(.*)$', line).group(1)
while True:
i = next(f)
if "(" in i:
desc = re.search('->\((.*)\)', i).group(1)
break
print("SHA-1:", sha)
print("Description:", desc)

How to split text file by id in python

I have a bunch of text files containing tab separated tables. The second column contains an id number, and each file is already sorted by that id number. I want to separate each file into multiple files by the id number in column 2. Here's what I have.
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'r') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) <> lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
What I get is simply a copy of all the read files with the first id number from each file in front of the new filenames. It is as if
thisid = line.split("\t")[1]
is only done once in the loop. Any clue to what is going on?
EDIT
The problem was my files used \r rather than \r\n to terminate lines. Corrected code (simply adding 'rU' when opening the read file and swapping != for <>):
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'rU') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) != lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
If you're dealing with tab delimited files, then you can use the csv module, and take advantage of the fact that itertools.groupby will do the previous/current tracking of the id for you. Also utilise os.path.join to make sure your filenames end up joining correctly.
Untested:
import os
import csv
from itertools import groupby
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(os.path.join(readpath, filename)) as fin:
tabin = csv.reader(fin, delimiter='\t')
for file_id, rows in groupby(tabin, lambda L: L[1]):
with open(os.path.join(writepath, file_id + '-' + filename), 'w') as fout:
tabout = csv.writer(fout, delimiter='\t')
tabout.writerows(rows)

extracting lines in a text file between two lines of strings

I have the following example text file (it is in the format as indicated below). I want to extract everything between the lines "Generating configuration...." and "`show accounting log all`", this is the beginning and end of what I am interested in.
some lines
some more line
Generating configuration....
interested config
interested config
interested config
`show accounting log all`
some lines
some more line
I wrote the following code, but its does not stop appending the lines to the textfile after it has found `show accounting log all`.
config_found = False
with open(filename, 'rb') as f:
textfile_temp = f.readlines()
for line in textfile_temp:
if re.match("Generating configuration....", line):
config_found = True
if re.match("`show accounting log all`", line):
config_found = False
if config_found:
i = line.rstrip()
textfile.append(i)
what am i doing wrong with my statements?
Instead of single quotes, you have to use back quote in your comparision and you can have if and elif for extracting in between strings. I have modified as below and it's working:
with open('file.txt', 'rb') as f:
textfile_temp = f.readlines()
config_found = False
textfile = []
for line in textfile_temp:
if re.match("`show accounting log all`", line):
config_found = False
elif config_found:
i = line.rstrip()
textfile.append(i)
elif re.match("Generating configuration....", line):
config_found = True
print textfile
Output:
['interested config', 'interested config', 'interested config']
Instead you can use split as below:
with open('file.txt', 'rb') as f:
textfile_temp = f.read()
print textfile_temp.split('Generating configuration....')[1].split("`show accounting log all`")[0]
Output:
interested config
interested config
interested config
config_found appears to have no scope outside of the loop.
Put config_found = False before the loop and it should work fine.

Categories