I'm looking for some help on logic, the code is not very Pythonic I'm still learning. We map the Z: drive to different locations all the time. Here is what I'm trying to accomplish
1: Check for an old map on Z: say \192.168.1.100\old
2: Map the new location to Z: say \192.168.1.200\new
3: Make sure the new Z: mapping exists and is still connected
4: If it gets disconnected or unmapped reconnect it and log it
90% of the code works, if I run it as is, it unmaps the old drive and maps the new drive but the name of the old drive stays the same even though it's mapped to the new location and I can browse it. The other problem is I only want to run checkOldDrive one time and just let checkDrive run. Any advice is appreciated.
#!/usr/bin/python
import pywintypes
import win32com.client
import os.path
import sys
import string
import fileinput
import time
import win32net
##################################################################
# Check for old Z: map and remove it
# Map the new instance of Z:
# Check if the Z: drive exists
# if the drive exists report to status.log we are working
# if the drive DOES NOT exist map it and report errors to the log
###################################################################
def checkDrive():
if os.path.exists('z:'):
saveout = sys.stdout
fsock = open('status.log', 'a')
sys.stdout = fsock
print os.getenv("COMPUTERNAME"), " - ", time.ctime(), " - Connected"
sys.stdout = saveout
fsock.close()
else:
ivvinetwork = win32com.client.Dispatch('Wscript.Network')
network_drives = ivvinetwork.EnumNetworkDrives()
for mapped_drive in [network_drives.Item(i)
for i in range(0, network_drives.Count() -1 , 2)
if network_drives.Item(i)]:
ivvinetwork.RemoveNetworkDrive(mapped_drive, True, True)
drive_mapping = [
('z:', '\\\\192.168.1.100\\newmap', 'someuser', 'somepass')]
for drive_letter, network_path, user_name, user_pass in drive_mapping:
try:
ivvinetwork.MapNetworkDrive(drive_letter, network_path, True, user_name, user_pass)
saveout = sys.stdout
fsock = open('status.log', 'a')
sys.stdout = fsock
print os.getenv("COMPUTERNAME"), " - ", time.ctime(), " - ", drive_mapping, "Drive Has Been Mapped"
sys.stdout = saveout
fsock.close()
except Exception, err:
saveout = sys.stdout
fsock = open('status.log', 'a')
sys.stdout = fsock
print os.getenv("COMPUTERNAME"), " - ", time.ctime(), " - ", err
sys.stdout = saveout
fsock.close()
def checkOldDrive():
if os.path.exists('z:'):
ivvinetwork = win32com.client.Dispatch('Wscript.Network')
network_drives = ivvinetwork.EnumNetworkDrives()
for mapped_drive in [network_drives.Item(i)
for i in range(0, network_drives.Count() -1 , 2)
if network_drives.Item(i)]:
ivvinetwork.RemoveNetworkDrive(mapped_drive, True, True)
checkOldDrive()
checkDrive()
I've put together a script based on the one you laid out which I believe accomplishes what you have described.
I've tried to do it in a way that's both Pythonic and follows good programming principles.
In particular, I've done the following:
modularize much of the functionality into reusable functions
avoided repetition as much as possible. I did not factor out the hard-coded 'Z:' drive. I leave that to you as an exercise (as you see fit).
factored the logging definition into one location (so the format, etc are consistent and not repeated). The logging module made this easy.
moved all code out of the top level scope (except for some global constants). This allows the script to be run directly or imported by another script as a module.
Added some documentation strings to help document what each function does.
Kept each function short an succinct - so it can be read more easily on a single screen and in an isolated context.
Surely, there is still room for some improvement, but I have tested this script and it is functional. It should provide some good lessons while also helping you accomplish your task. Enjoy.
#!/usr/bin/env python
import os
import time
import win32com.client
import logging
old_mappings = [
r'\\192.168.1.100\old',
]
new_mapping = r'\\192.168.1.200\new'
LOG_FILENAME = 'status.log'
def main():
"""
Check to see if Z: is mapped to the old server; if so remove it and
map the Z: to the new server.
Then, repeatedly monitor the Z: mapping. If the Z: drive exists,
report to status.log that we are working. Otherwise, re-map it and
report errors to the log.
"""
setupLogging()
replaceMapping()
monitorMapping()
def replaceMapping():
if removeMapping():
createNewMapping()
def setupLogging():
format = os.environ['COMPUTERNAME'] + " - %(asctime)s - %(message)s"
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG, format=format)
def getCredentials():
"""
Return one of three things:
- an empty tuple
- a tuple containing just a username (if a password is not required)
- a tuple containing username and password
"""
return ('someuser', 'somepass')
def createNewMapping():
network = win32com.client.Dispatch('WScript.Network')
params = (
'Z:', # drive letter
new_mapping, # UNC path
True, # update profile
)
params += getCredentials()
try:
network.MapNetworkDrive(*params)
msg = '{params} - Drive has been mapped'
logging.getLogger().info(msg.format(**vars()))
except Exception as e:
msg = 'error mapping {params}'
logging.getLogger().exception(msg.format(**vars()))
def monitorMapping():
while True:
# only check once a minute
time.sleep(60)
checkMapping()
def checkMapping():
if getDriveMappings()['Z:'] == new_mapping:
msg = 'Drive is still mapped'
logging.getLogger().info(msg.format(**vars()))
else:
replaceMapping()
# From Python 2.6.4 docs
from itertools import izip_longest
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
def getDriveMappings():
"""
Return a dictionary of drive letter to UNC paths as mapped on the
system.
"""
network = win32com.client.Dispatch('WScript.Network')
# http://msdn.microsoft.com/en-us/library/t9zt39at%28VS.85%29.aspx
drives = network.EnumNetworkDrives()
# EnumNetworkDrives returns an even-length array of drive/unc pairs.
# Use grouper to convert this to a dictionary.
result = dict(grouper(2, drives))
# Potentially several UNC paths will be connected but not assigned
# to any drive letter. Since only the last will be in the
# dictionary, remove it.
if '' in result: del result['']
return result
def getUNCForDrive(drive):
"""
Get the UNC path for a mapped drive.
Throws a KeyError if no mapping exists.
"""
return getDriveMappings()[drive.upper()]
def removeMapping():
"""
Remove the old drive mapping. If it is removed, or was not present,
return True.
Otherwise, return False or None.
"""
mapped_drives = getDriveMappings()
drive_letter = 'Z:'
if not drive_letter in mapped_drives:
return True
if mapped_drives[drive_letter] in old_mappings:
network = win32com.client.Dispatch('WScript.Network')
force = True
update_profile = True
network.RemoveNetworkDrive(drive_letter, force, update_profile)
return True
# return None
if __name__ == '__main__':
main()
Related
My code was working just fine before adding the hash function. I was getting the list of all folders and files in my directory in the Pretty Table. Once I added the hash function, I got maybe 5 of the files in that directory with hashes in the table. I am not sure where I have gone wrong. Please forgive me, I am new to this. We are not learning to code from scratch, but have to modify existing codes to function the way we need it to.
# Python Standard Libaries
import os #file system methode
import hashlib #hashing function
import sys #system methods
import time #time conversions
# Python 3rd Party Libraries
from prettytable import PrettyTable # pip install prettytable
# Local Functions
def GetFileMetaData(fileName):
#obtain file system metadata
try:
metaData = os.stat(fileName) # Use the stat method to obtain meta data
fileSize = metaData.st_size # Extract fileSize and MAC Times
timeLastAccess = metaData.st_atime
timeLastModified = metaData.st_mtime
timeCreated = metaData.st_ctime
macTimeList = [timeLastModified, timeCreated, timeLastAccess] # Group the MAC Times in a List
return True, None, fileSize, macTimeList
except Exception as err:
return False, str(err), None, None
# Psuedo Constants
# Start of the Script
tbl = PrettyTable(['FilePath','FileSize','UTC-Modified', 'UTC-Accessed', 'UTC-Created', 'SHA-256 HASH'])
#file check
while True:
targetFolder = input("Enter Target Folder: ")
if os.path.isdir(targetFolder):
break
else:
print("\nInvalid Folder ... Please Try Again")
print("Walking: ", targetFolder, "\n")
print()
for currentRoot, dirList, fileList in os.walk(targetFolder):
for nextFile in fileList:
fullPath = os.path.join(currentRoot, nextFile)
absPath = os.path.abspath(fullPath)
fileSize = os.path.getsize(absPath)
success, errInfo, fileSize, macList = GetFileMetaData(absPath)
if success:
#convert to readable Greenich Time
modTime = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(macList[0]))
accTime = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(macList[1]))
creTime = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(macList[2]))
#hashing function
with open(absPath, 'rb') as target:
fileContents = target.read()
sha256Obj = hashlib.sha256()
sha256Obj.update(fileContents)
hexDigest = sha256Obj.hexdigest()
tbl.add_row( [ absPath, fileSize,modTime, accTime, creTime, hexDigest] )
tbl.align = "l" # align the columns left justified
# display the table
print (tbl.get_string(sortby="FileSize", reversesort=True))
print("\nScript-End\n")
I'm trying to compare between two files with filecmp, the problem is that the result is always "No, the files are NOT the same" which means False
even though the files are the same.
I'm writing to two different files the same content. First I write to file revision_1.txt:
original_stdout = sys.stdout
with open('revision_1.txt', 'w') as rev1:
sys.stdout = rev1
print(revision) # revision is output from command i took before
sys.stdout = original_stdout
if filecmp.cmp('revision_1.txt', 'revision_2.txt'):
# revision_2.txt is file I c
print("Both the files are same")
else:
# Do whatever you want if the files are NOT the same
print("No, the files are NOT the same")
original_stdout = sys.stdout
with open('revision_2.txt', 'w') as rev2:
sys.stdout = rev2
print(revision) # revision is output from command i took before
sys.stdout = original_stdout
My goal is if the files are equal - stop the script. If they are not, it will rewrite revision_2.txt and then send mail, (I already wrote the code for mail).
Your usage of files us unusual:
import filecmp
revision = "08/15"
with open('revision_1.txt', 'w') as rev1:
rev1.write(revision)
with open('revision_2.txt', 'w') as rev2:
rev2.write(revision)
with open('revision_3.txt', 'w') as rev3:
rev3.write(revision + "-42")
# should compare equal
if filecmp.cmp('revision_1.txt', 'revision_2.txt'):
print("Identical")
else:
print("No, the files are NOT the same")
# should NOT compare equal
if filecmp.cmp('revision_1.txt', 'revision_3.txt'):
print("Identical")
else:
print("No, the files are NOT the same")
prints
Identical
No, the files are NOT the same
Try set shallow to false (Default is True), i.e
if filecmp.cmp('revision_1.txt', 'revision_2.txt', shallow=False):
From the documentation:
If shallow is true, files with identical os.stat() signatures are taken to be equal. Otherwise, the contents of the files are compared.
https://docs.python.org/3/library/filecmp.html#filecmp.cmp
Thank you all for the reply
As I said I'm very new with Python
According to your recommendations I changed the code, this time I'm going to send the full script and explain
I successes to compare between 'revision' and 'd' my problem is that I'm getting different rpc-reply message-id,
How can can ignore message-id (I only need the Revision value) ?
See script output:
Not equal
Revision: fpc1-1603878922-228
FFFFFFF
Revision: fpc1-1603878922-228
FFFFFFF
Script:
import smtplib
import email.message
from email.mime.text import MIMEText
from ncclient import manager
from ncclient.xml_ import *
import sys
import time
import filecmp
# Connecting to juniper cc-vc-leg
conn = manager.connect(
host='10.1.1.1',
port='830',
username='test',
password='test',
timeout=10,
device_params={'name':'junos'},
hostkey_verify=False)
# Take juniper commands
resault = conn.command('show version | match Hostname', format='text')
revision = conn.command('show system commit revision', format='text')
compare_config = conn.compare_configuration(rollback=1)
# Open & read file vc-lg_rev.text
f = open('vc-lg_rev.text', 'r')
d = f.read()
# Check if revision output is equal to file "vc-lg_rev.text"
# If equal exit the script
if (revision == d):
print('equal')
exit()
print('I hop script stopped')
else:
print('Not equal')
print(revision)
print('FFFFFFF')
print(d)
print('FFFFFFF')
# To save last revision number to "vc-lg_rev.text"
with open('vc-lg_rev.text', 'w', buffering=1) as rev1:
rev1.write(str(revision))
rev1.flush()
rev1.close()
# This is how i copy "compare_config" output to file "vc-lg_compare.text"
original_stdout = sys.stdout
with open('vc-lg_compare.text', 'w') as a:
sys.stdout = a
print(compare_config)
sys.stdout = original_stdout
def send_email(compare):
server = smtplib.SMTP('techunix.technion.ac.il', 25)
email_reciver = 'rafish#technion.ac.il', 'rafi1shemesh#gmail.com'
message = f"'Subject': mail_subject \n\n {compare}"
ID = 'Juniper_Compare'
server.sendmail(ID, email_reciver, message)
with open('vc-lg_compare.text', 'r') as compare: # "as" means file object called compare
text = str(compare.read()) # I want to recive the output as string to look specific word in the file
if (text.find('+') > -1) or (text.find('- ') > -1):
send_email(text)
I need a function to check if a directory is empty, but it should be as fast as possible, because I use it for thousands of directories that can have up to 100k files. I implemented the next one, but it looks like something is wrong with kernel32 module in python3 (I get OSError: exception: access violation writing 0xFFFFFFFFCE4A9500 on FindNextFileW, right from the first call)
import os
import ctypes
from ctypes.wintypes import WIN32_FIND_DATAW
def is_empty(fpath):
ret = True
loop = True
fpath = os.path.join(fpath, '*')
wfd = WIN32_FIND_DATAW()
handle = ctypes.windll.kernel32.FindFirstFileW(fpath, ctypes.byref(wfd))
if handle == -1:
return ret
while loop:
if wfd.cFileName not in ('.', '..'):
ret = False
break
loop = ctypes.windll.kernel32.FindNextFileW(handle, ctypes.byref(wfd))
ctypes.windll.kernel32.FindClose(handle)
return ret
print(is_empty(r'C:\\Users'))
You can use os.scandir, the iterator version of listdir, and simply return upon "iterating" the first entry, like this:
import os
def is_empty(path):
with os.scandir(path) as scanner:
for entry in scanner: # this loop will have maximum 1 iteration
return False # found file, not empty.
return True # if we reached here, then empty.
I am using Python Multiprocessing module to scrape a website. Now this website has over 100,000 pages. What I am trying to do is to put every 500 pages I retrieve into a separate folder. The problem is that though I successfully create a new folder, my script only populates the previous folder. Here is the code:
global a = 1
global b = 500
def fetchAfter(y):
global a
global b
strfile = "E:\\A\\B\\" + str(a) + "-" + str(b) + "\\" + str(y) + ".html"
if (os.path.exists( os.path.join( "E:\\A\\B\\" + str(a) + "-" + str(b) + "\\", str(y) + ".html" )) == 0):
f = open(strfile, "w")
if __name__ == '__main__':
start = time.time()
for i in range(1,3):
os.makedirs("E:\\Results\\Class 9\\" + str(a) + "-" + str(b))
pool = Pool(processes=12)
pool.map(fetchAfter, range(a,b))
pool.close()
pool.join()
a = b
b = b + 500
print time.time()-start
It is best for the worker function to only rely on the single argument it gets for determining what to do. Because that is the only information it gets from the parent process every time it is called. This argument can be almost any Python object (including a tuple, dict, list) so you're not really limited in the amount of information you pass to a worker.
So make a list of 2-tuples. Each 2-tuple should consist of (1) the file to get and (2) the directory where to stash it. Feed that list of tuples to map(), and let it rip.
I'm not sure if it is useful to specify the number of processes you want to use. Pool generally uses as many processes as your CPU has cores. That is usually enough to max out all the cores. :-)
BTW, you should only call map() once. And since map() blocks until everything is done, there is no need to call join().
Edit: Added example code below.
import multiprocessing
import requests
import os
def processfile(arg):
"""Worker function to scrape the pages and write them to a file.
Keyword arguments:
arg -- 2-tuple containing the URL of the page and the directory
where to save it.
"""
# Unpack the arguments
url, savedir = arg
# It might be a good idea to put a random delay of a few seconds here,
# so we don't hammer the webserver!
# Scrape the page. Requests rules ;-)
r = requests.get(url)
# Write it, keep the original HTML file name.
fname = url.split('/')[-1]
with open(savedir + '/' + fname, 'w+') as outfile:
outfile.write(r.text)
def main():
"""Main program.
"""
# This list of tuples should hold all the pages...
# Up to you how to generate it, this is just an example.
worklist = [('http://www.foo.org/page1.html', 'dir1'),
('http://www.foo.org/page2.html', 'dir1'),
('http://www.foo.org/page3.html', 'dir2'),
('http://www.foo.org/page4.html', 'dir2')]
# Create output directories
dirlist = ['dir1', 'dir2']
for d in dirlist:
os.makedirs(d)
p = Pool()
# Let'er rip!
p.map(processfile, worklist)
p.close()
if __name__ == '__main__':
main()
Multiprocessing, as the name implies, uses separate processes. The processes you create with your Pool do not have access to the original values of a and b that you are adding 500 to in the main program. See this previous question.
The easiest solution is to just refactor your code so that you pass a and b to fetchAfter (in addition to passing y).
Here's one way to implement it:
#!/usr/bin/env python
import logging
import multiprocessing as mp
import os
import urllib
def download_page(url_path):
try:
urllib.urlretrieve(*url_path)
mp.get_logger().info('done %s' % (url_path,))
except Exception as e:
mp.get_logger().error('failed %s: %s' % (url_path, e))
def generate_url_path(rootdir, urls_per_dir=500):
for i in xrange(100*1000):
if i % urls_per_dir == 0: # make new dir
dirpath = os.path.join(rootdir, '%d-%d' % (i, i+urls_per_dir))
if not os.path.isdir(dirpath):
os.makedirs(dirpath) # stop if it fails
url = 'http://example.com/page?' + urllib.urlencode(dict(number=i))
path = os.path.join(dirpath, '%d.html' % (i,))
yield url, path
def main():
mp.log_to_stderr().setLevel(logging.INFO)
pool = mp.Pool(4) # number of processes is unrelated to number of CPUs
# due to the task is IO-bound
for _ in pool.imap_unordered(download_page, generate_url_path(r'E:\A\B')):
pass
if __name__ == '__main__':
main()
See also Python multiprocessing pool.map for multiple arguments and the code
Brute force basic http authorization using httplib and multiprocessing from how to make HTTP in Python faster?
Was just looking at the logging docs in Python and came across funcName as a parameter in the log formatter.
While it looks handy, great way to see exactly where the log is coming from obviously, someone's raised a concern about it, possibly that it would need to generate a stack trace that would be a performance hit.
I assume it uses something like sys._getframe() and not the inspect module, which would have an impact in performance.
Is funcName something we could use in a production environment or should we stay away?
Resist the temptation to guess, the source for logging is available to you as part of your Python distribution.
How it finds the function name (logging/__init__.py):
#
# _srcfile is used when walking the stack to check when we've got the first
# caller stack frame.
#
if hasattr(sys, 'frozen'): #support for py2exe
_srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:])
elif __file__[-4:].lower() in ['.pyc', '.pyo']:
_srcfile = __file__[:-4] + '.py'
else:
_srcfile = __file__
_srcfile = os.path.normcase(_srcfile)
# next bit filched from 1.5.2's inspect.py
def currentframe():
"""Return the frame object for the caller's stack frame."""
try:
raise Exception
except:
return sys.exc_info()[2].tb_frame.f_back
if hasattr(sys, '_getframe'): currentframe = lambda: sys._getframe(3)
# done filching
and then later:
def findCaller(self):
"""
Find the stack frame of the caller so that we can note the source
file name, line number and function name.
"""
f = currentframe()
#On some versions of IronPython, currentframe() returns None if
#IronPython isn't run with -X:Frames.
if f is not None:
f = f.f_back
rv = "(unknown file)", 0, "(unknown function)"
while hasattr(f, "f_code"):
co = f.f_code
filename = os.path.normcase(co.co_filename)
if filename == _srcfile:
f = f.f_back
continue
rv = (filename, f.f_lineno, co.co_name)
break
return rv
also, no need to worry about the overhead: it figures out the function name before it works out whether or not you needed it so you might as well use it.
Here is a test app that shows that writing the filename and line number to a file costs around 1sec/500000 requests on my local machine.
#!/usr/bin/env python
import traceback, sys, time
def writeinfo(f, on=True):
# give the function something to do
s=sum(range(1000))
if on:
fr = sys._getframe(1)
s = "%s (line %s) " % (fr.f_code.co_filename, fr.f_lineno)
f.write(s)
cnt = 50000
t1 = time.time()
f = open('tempfile.log','w')
for i in range(cnt):
writeinfo(f)
f.close()
t2 = time.time()
for i in range(cnt):
writeinfo(f, on=False)
t3 = time.time()
print "Test time with file write: %s" % (t2-t1)
print "Test time without file write: %s" % (t3-t2)
Results:
Test time with file write: 1.17307782173
Test time without file write: 1.08166718483