How to check the windows path matches with partial Linux path string - python

I am trying to check what files that are present in my full_list_files are also present in required_list.
The thing here is they are not exactly equal to one other , but macthes with filename and last sub directory.
Example :
'C:\Users\Documents\Updated\Build\Output\M\Application_1.bin' matches with "M/Application_1.bin" except the slashes are different.
So I am trying to make both uniform by using the function convert_fslash_2_bslash
But still, I see the output as below ,none of the files are matched.
full_list_files = set(['C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Report.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_2.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Testing.txt', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\masking.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_1.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_1.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\History.zip', 'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Challenge.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_2.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\porting.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Booting.tar.gz'])
original required_list = set(['N/Application_2.bin', 'M/masking.tar.gz', 'N/Application_1.bin', 'O/Challenge.tar.gz', 'M/Application_1.bin', 'O/Testing.txt', 'M/rooting.tar.gz', 'M/Application_2.bin', 'O/History.zip', 'N/porting.tar.gz', 'O/Report.tar.gz'])
modified required_list = ['N\\Application_2.bin', 'M\\masking.tar.gz', 'N\\Application_1.bin', 'O\\Challenge.tar.gz', 'M\\Application_1.bin', 'O\\Testing.txt', 'M\\rooting.tar.gz', 'M\\Application_2.bin', 'O\\History.zip', 'N\\porting.tar.gz', 'O\\Report.tar.gz']
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Report.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_2.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Testing.txt' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\masking.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_1.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_1.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\History.zip' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Challenge.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_2.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\porting.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Booting.tar.gz' not present
How can I get it working correctly.
import os
import sys
import re
full_list_files = {
#These are actually real paths parsed from listdir
#Just for convenience used as strings
'C:\Users\Documents\Updated\Build\Output\M\Application_1.bin',
'C:\Users\Documents\Updated\Build\Output\M\Application_2.bin',
'C:\Users\Documents\Updated\Build\Output\M\masking.tar.gz',
'C:\Users\Documents\Updated\Build\Output\M\Booting.tar.gz',
'C:\Users\Documents\Updated\Build\Output\N\Application_1.bin',
'C:\Users\Documents\Updated\Build\Output\N\Application_2.bin',
'C:\Users\Documents\Updated\Build\Output\N\porting.tar.gz',
'C:\Users\Documents\Updated\Build\Output\O\Challenge.tar.gz',
'C:\Users\Documents\Updated\Build\Output\O\History.zip',
'C:\Users\Documents\Updated\Build\Output\O\Testing.txt',
'C:\Users\Documents\Updated\Build\Output\O\Report.tar.gz'
}
required_list = {
"M/Application_1.bin",
"M/Application_2.bin",
"M/masking.tar.gz",
"M/rooting.tar.gz",
"N/Application_1.bin",
"N/Application_2.bin",
"N/porting.tar.gz",
"O/Challenge.tar.gz",
"O/History.zip",
"O/Testing.txt",
"O/Report.tar.gz"
}
def convert_fslash_2_bslash(required_file_list):
required_config_file_list = []
i = 0
for entry in required_file_list:
entry = entry.strip()
entry = entry.replace('"',"")
entry = entry.replace('/','\\')
required_config_file_list.insert(i, entry)
i = i + 1
return required_config_file_list
if __name__ == "__main__":
print
print "full_list_files = ", full_list_files
print
print "original required_list = ", required_list
print
required_config_file_list = convert_fslash_2_bslash(required_list)
print "modified required_list = ", required_config_file_list
print
for f_entry in full_list_files:
f_entry = repr(f_entry)
#for r_entry in required_config_file_list:
#if ( f_entry.find(r_entry) != -1):
if f_entry in required_config_file_list:
print f_entry ," present"
else:
print f_entry ," not present"

Here is the logic you need at the bottom:
for f_entry in full_list_files:
for r_entry in required_config_file_list:
if f_entry.endswith(r_entry):
print f_entry, " present"
You need to loop over both collections, then check to see if the longer path ends with the shorter path. One of your mistakes was calling repr(), which changes the double backslashes to quadruple ones.
I'll leave it up to you to decide how you'll handle printing paths that are not present at all.

Related

Generate 8.3 filenames with standard library

I would like to generate an 8.3 filename (as used on DOS/FAT) without the modules win32api or ctypes (neither works with my configuration).
Currently, the code is this:
def short_names(names):
names2 = []
for i in names:
append_tilde = True
b = set(".\"/\\[]:;=, ") # ."/\[]:;=,[space] (forbidden chars)
old = i
for char in b:
i = i.replace(char, "")
if i == old: append_tilde = False
name_parts = i.split(sep=".")
name = ''.join(name_parts[0:len(name_parts)-1])
extension = name_parts[-1][0:3]
if len(name) > 6:
name = name[0:6]
append_tilde = True
if append_tilde:
for j in range(1,10):
if name.upper()+"~"+str(j) not in names2:
names2.append(name.upper() + "~" + str(j))
break
return names2
But it returns the "~1" part only, not the 6-character part plus "~1".
For the example input:
["Program Files", "ProgramData", "Programme", "Documents and Settings", "Dokumente und Einstellungen"]
it returns
['~1', '~2', '~3']
Intended return value:
["PROGRA~1", "PROGRA~2", "PROGRA~3", "DOCUME~1", "DOKUME~1"]
Python version: Python 3.10.1 (v3.10.1:2cd268a3a9, Dec 6 2021, 14:28:59) [Clang 13.0.0 (clang-1300.0.29.3)] on darwin
The problem is in the way you try to split a filename into a base part and an extension.
If you call split('.') on a string that doesn't have a . in it, you get back a list with a single element - your original string. This means that name_parts[0:len(name_parts)-1] is the same as name_parts[0:0] which is an empty list. You're setting name to an empty string, while extension is set to the first 3 characters of the entire file name.
You need to detect the case where there was no . in the filename and treat it differently.
name_parts = i.split(sep=".")
if len(name_parts) <= 1:
name = i
extension = ''
else:
name = ''.join(name_parts[0:len(name_parts)-1])
extension = name_parts[-1][0:3]
P.S. Python has some facilities to make this easier. Check out os.path or pathlib.

Python script to alert on empty/missing logs

I am working on a project to check a file directory and automatically add log files as they are created. A file is being generated every five minutes, but some of the files are being created with a "0" filesize and I would like to alert when this happens.
So the sequence of steps I would like to have are essentially:
Get time (MM:DD:YY HH:MM:SS) *Not sure if I need to do this...
CD to Folder Directory /Netflow/YY/MM/DD
Search for filename "nfcapd.YYYYMMDDHHMM" where MM increments by 5.
If filesize is 0, then email Johnny, Sally and Jimmy
Wait 6 minutes and repeat
This is what I have pieced together thus far. How can I get the desired functionality?
import os
def is_non_zero_file(fpath): storage/Netflow/
return True if os.path.isfile(fpath) and os.path.getsize(fpath) > 0 else False
# I need to check storage/Netflow for files named by time e.g 13_56_05.txt
while True:
time.sleep(360)
In addition to enumerating the files in a given path, and subsequently filtering the files which are only zero-length, you probably want to maintain some type of state to ensure you're aren't notified multiple times of the same zero length file. That is, you probably don't want to get a notification that the same file is zero-length indefinitely (although you can modify the example below if you want said behavior).
You may optionally want to do things like verify that the file name strictly meets your naming convention. You may also want to validate the the string date-stamp included in the file name is a valid datetime.
The example below uses the glob module (itself leveraging os.listdir() and fnmatch.fnmatch()) to build up a set of possible files for inclusion. [1]
The example is intentionally simple, and leverages a single class to store log sample 'state'. KEEP_SAMPLES samples are maintained (instances of logState() in the log_states list, achieved by using list slicing.
A single alert(msg) function is supplied as a stub to something that might send mail, etc...
References:
[1] https://docs.python.org/3.2/library/glob.html
#!/usr/bin/python3
import os
import glob
import re
from datetime import datetime, timezone
import time
from pprint import pprint
class logState():
def __init__(self, log_path, glob_patt, re_patt, dt_fmt):
self.dt = datetime.now(timezone.utc)
self.log_path = log_path
self.glob_patt = glob_patt
self.re_patt = re_patt
self.dt_fmt = dt_fmt
self.empty_logs = []
self.nonempty_logs = []
# Retrieve only files from glob
self.files = [ f for f in
glob.glob(self.log_path + self.glob_patt)
if os.path.isfile(f) ]
for f in self.files:
unq_fname = f.split('/')[-1]
if unq_fname == None:
continue
# Tighter pattern matching
if re.match(re_patt, unq_fname) == None:
continue
# Get the datetime portion of the file name
f_dtstamp = unq_fname.split('.')[-1]
# Make sure the datetime stamp represents
# a valid date
if datetime.strptime(f_dtstamp, self.dt_fmt) == None:
continue
# Check file size, add to the appropriate
# list
if os.path.getsize(f) <= 0:
self.empty_logs.append(f)
else:
self.nonempty_logs.append(f)
def alert(msg):
print("ALERT!: {0}".format(msg))
if __name__ == "__main__":
# How long to sleep
SLEEP_SECS = 5
# How many samples to keep
KEEP_SAMPLES = 5
log_states = []
# Definition for what logs states we'll look for
log_path = './'
glob_patt = 'nfcapd.[0-9]*'
re_patt = 'nfcapd.([0-9]{12})'
dt_fmt = "%Y%m%d%H%M"
print("-- Setup --")
print("Sample files in '{0}'".format(log_path))
print("\t{0} samples kept:".format(KEEP_SAMPLES))
print("\tglob pattern: '{0}'".format(glob_patt))
print("\tregex pattern: '{0}'".format(re_patt))
print("\tdatetime string: '{0}'".format(dt_fmt))
print("")
# Collect the initial state
log_states.append(logState(log_path,
glob_patt,
re_patt, dt_fmt))
while True:
# Print state inventory and current state detail
print( "-- Log States Stored --")
for i, log_state in enumerate(log_states):
print("Log state {0} # {1}".format(i, log_state.dt))
print(" -- Logs size > 0 --")
pprint(log_states[-1].nonempty_logs)
print(" -- Logs size <= 0 --")
pprint(log_states[-1].empty_logs)
print("")
time.sleep(SLEEP_SECS)
log_states = log_states[-KEEP_SAMPLES+1:]
log_states.append(logState(log_path,
glob_patt,
re_patt,
dt_fmt))
# p = previous sample, c = current
p = set(log_states[-2].empty_logs)
c = set(log_states[-1].empty_logs)
# only report the items in the current sample
# not in the last
if len(c.difference(p)) > 0:
alert("\nNew zero length logs: " + str(c.difference(p)) + "\n")

Unable to display all the information except for first selection

I am using the following code to process a list of images that is found in my scene, before the gathered information, namely the tifPath and texPath is used in another function.
However, example in my scene, there are 3 textures, and hence I should be seeing 3 sets of tifPath and texPath but I am only seeing 1 of them., whereas if I am running to check surShaderOut or surShaderTex I am able to see all the 3 textures info.
For example, the 3 textures file path is as follows (in the surShaderTex): /user_data/testShader/textureTGA_01.tga, /user_data/testShader/textureTGA_02.tga, /user_data/testShader/textureTGA_03.tga
I guess what I am trying to say is that why in my for statement, it is able to print out all the 3 results and yet anything bypass that, it is only printing out a single result.
Any advices?
surShader = cmds.ls(type = 'surfaceShader')
for con in surShader:
surShaderOut = cmds.listConnections('%s.outColor' % con)
surShaderTex = cmds.getAttr("%s.fileTextureName" % surShaderOut[0])
path = os.path.dirname(surShaderTex)
f = surShaderTex.split("/")[-1]
tifName = os.path.splitext(f)[0] + ".tif"
texName = os.path.splitext(f)[0] + ".tex"
tifPath = os.path.join(path, tifName)
texPath = os.path.join(path, texName)
convertText(surShaderTex, tifPath, texPath)
Only two lines are part of your for loop. The rest only execute once.
So first this runs:
surShader = cmds.ls(type = 'surfaceShader')
for con in surShader:
surShaderOut = cmds.listConnections('%s.outColor' % con)
surShaderTex = cmds.getAttr("%s.fileTextureName" % surShaderOut[0])
Then after that loop, with only one surShader, one surShaderOut, and one surShaderTex, the following is executed once:
path = os.path.dirname(surShaderTex)
f = surShaderTex.split("/")[-1]
tifName = os.path.splitext(f)[0] + ".tif"
texName = os.path.splitext(f)[0] + ".tex"
tifPath = os.path.join(path, tifName)
texPath = os.path.join(path, texName)
Indent that the same as the lines above it, and it'll be run for each element of surShader instead of only once.

Unpack ValueError in Python

I was making a site component scanner with Python. Unfortunately, something goes wrong when I added another value to my script. This is my script:
#!/usr/bin/python
import sys
import urllib2
import re
import time
import httplib
import random
# Color Console
W = '\033[0m' # white (default)
R = '\033[31m' # red
G = '\033[1;32m' # green bold
O = '\033[33m' # orange
B = '\033[34m' # blue
P = '\033[35m' # purple
C = '\033[36m' # cyan
GR = '\033[37m' # gray
#Bad HTTP Responses
BAD_RESP = [400,401,404]
def main(path):
print "[+] Testing:",host.split("/",1)[1]+path
try:
h = httplib.HTTP(host.split("/",1)[0])
h.putrequest("HEAD", "/"+host.split("/",1)[1]+path)
h.putheader("Host", host.split("/",1)[0])
h.endheaders()
resp, reason, headers = h.getreply()
return resp, reason, headers.get("Server")
except(), msg:
print "Error Occurred:",msg
pass
def timer():
now = time.localtime(time.time())
return time.asctime(now)
def slowprint(s):
for c in s + '\n':
sys.stdout.write(c)
sys.stdout.flush() # defeat buffering
time.sleep(8./90)
print G+"\n\t Whats My Site Component Scanner"
coms = { "index.php?option=com_artforms" : "com_artforms" + "link1","index.php?option=com_fabrik" : "com_fabrik" + "ink"}
if len(sys.argv) != 2:
print "\nUsage: python jx.py <site>"
print "Example: python jx.py www.site.com/\n"
sys.exit(1)
host = sys.argv[1].replace("http://","").rsplit("/",1)[0]
if host[-1] != "/":
host = host+"/"
print "\n[+] Site:",host
print "[+] Loaded:",len(coms)
print "\n[+] Scanning Components\n"
for com,nme,expl in coms.items():
resp,reason,server = main(com)
if resp not in BAD_RESP:
print ""
print G+"\t[+] Result:",resp, reason
print G+"\t[+] Com:",nme
print G+"\t[+] Link:",expl
print W
else:
print ""
print R+"\t[-] Result:",resp, reason
print W
print "\n[-] Done\n"
And this is the error message that comes up:
Traceback (most recent call last):
File "jscan.py", line 69, in <module>
for com,nme,expl in xpls.items():
ValueError: need more than 2 values to unpack
I already tried changing the 2 value into 3 or 1, but it doesn't seem to work.
xpls.items returns a tuple of two items, you're trying to unpack it into three. You initialize the dict yourself with two pairs of key:value:
coms = { "index.php?option=com_artforms" : "com_artforms" + "link1","index.php?option=com_fabrik" : "com_fabrik" + "ink"}
besides, the traceback seems to be from another script - the dict is called xpls there, and coms in the code you posted...
you can try
for (xpl, poc) in xpls.items():
...
...
because dict.items will return you tuple with 2 values.
You have all the information you need. As with any bug, the best place to start is the traceback. Let's:
for com,poc,expl in xpls.items():
ValueError: need more than 2 values to unpack
Python throws ValueError when a given object is of correct type but has an incorrect value. In this case, this tells us that xpls.items is an iterable an thus can be unpacked, but the attempt failed.
The description of the exception narrows down the problem: xpls has 2 items, but more were required. By looking at the quoted line, we can see that "more" is 3.
In short: xpls was supposed to have 3 items, but has 2.
Note that I never read the rest of the code. Debugging this was possible using only those 2 lines.
Learning to read tracebacks is vital. When you encounter an error such as this one again, devote at least 10 minutes to try to work with this information. You'll be repayed tenfold for your effort.
As already mentioned, dict.items() returns a tuple with two values. If you use a list of strings as dictionary values instead of a string, which should be split anyways afterwards, you can go with this syntax:
coms = { "index.php?option=com_artforms" : ["com_artforms", "link1"],
"index.php?option=com_fabrik" : ["com_fabrik", "ink"]}
for com, (name, expl) in coms.items():
print com, name, expl
>>> index.php?option=com_artforms com_artforms link1
>>> index.php?option=com_fabrik com_fabrik ink

how to extract the key from the log in python

i write the python code ,in order to extract key from the log.And using the same log,it worked well in one machine.But when i run it in hadoop,it failed.I guess there are some bugs when using regex.Who can give me some comments?Is regex can't support hadoop?
This python code is aim to extract qry and rc ,and count the value of rc ,and then print it as qry query_count rc_count .When run it in hadoop,it report
java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1.
I search google,there may some bug in your mapper code.So how can i fix it?
log formats like that,
NOTICE: 01-03 23:57:23: [a.cpp][b][222] show_ver=11 sid=ae1d esid=6WVj uid=D1 a=20 qry=cars qid0=293 loc_src=4 phn=0 mid=0 wvar=c op=0 qry_src=0 op_type=1 src=110|120|111 at=60942 rc=3|1|1 discount=20 indv_type=0 rep_query=
And my python code is that
import sys
import re
for line in sys.stdin:
count_result = 0
line = line.strip()
match=re.search('.*qry=(.*?)qid0.*rc=(.*?)discount',line).groups()
if (len(match)<2):
continue
counts_tmp = match[1].strip()
counts=counts_tmp.split('|')
for count in counts:
if count.isdigit():
count_result += int(count)
key_tmp = match[0].strip()
if key_tmp.strip():
key = key_tmp.split('\t')
key = ' '.join(key)
print '%s\t%s\t%s' %(key,1,count_result)
Most likely is that your regular expression catches more that you expect. I would suggest to split it to some more simple parts like:
(?<= qry=).*(?= quid0)
and
(?<= rc=).*(?= discount)
Taking a lot of assumptions and hazarding an educated guess, you might be able to parse your log like this:
from collections import defaultdict
input = """NOTICE: 01-03 23:57:23: [a.cpp][b][222] show_ver=11 sid=ae1d esid=6WVj uid=D1 a=20 qry=cars qid0=293 loc_src=4 phn=0 mid=0 wvar=c op=0 qry_src=0 op_type=1 src=110|120|111 at=60942 rc=3|1|1 discount=20 indv_type=0 rep_query=
NOTICE: 01-03 23:57:23: [a.cpp][b][222] show_ver=11 sid=ae1d esid=6WVj uid=D1 a=20 qry=boats qid0=293 loc_src=4 phn=0 mid=0 wvar=c op=0 qry_src=0 op_type=1 src=110|120|111 at=60942 rc=3|5|2 discount=20 indv_type=0 rep_query=
NOTICE: 01-03 23:57:23: [a.cpp][b][222] show_ver=11 sid=ae1d esid=6WVj uid=D1 a=20 qry=cars qid0=293 loc_src=4 phn=0 mid=0 wvar=c op=0 qry_src=0 op_type=1 src=110|120|111 at=60942 rc=3|somestring|12 discount=20 indv_type=0 rep_query="""
d = defaultdict (lambda: 0)
for line in input.split ("\n"):
tokens = line.split (" ")
count = 0
qry = None
for token in tokens:
pair = token.split ("=")
if len (pair) != 2: continue
key, value = pair
if key == "qry":
qry = value
if key == "rc":
values = value.split ("|")
for value in values:
try: count += int (value)
except: pass
if qry: d [qry] += count
print (d)
Assuming, that (a) key-value pairs are separated by spaces, and (b) there are no spaces inside neither keys nor values.

Categories