I am using the following code to process a list of images that is found in my scene, before the gathered information, namely the tifPath and texPath is used in another function.
However, example in my scene, there are 3 textures, and hence I should be seeing 3 sets of tifPath and texPath but I am only seeing 1 of them., whereas if I am running to check surShaderOut or surShaderTex I am able to see all the 3 textures info.
For example, the 3 textures file path is as follows (in the surShaderTex): /user_data/testShader/textureTGA_01.tga, /user_data/testShader/textureTGA_02.tga, /user_data/testShader/textureTGA_03.tga
I guess what I am trying to say is that why in my for statement, it is able to print out all the 3 results and yet anything bypass that, it is only printing out a single result.
Any advices?
surShader = cmds.ls(type = 'surfaceShader')
for con in surShader:
surShaderOut = cmds.listConnections('%s.outColor' % con)
surShaderTex = cmds.getAttr("%s.fileTextureName" % surShaderOut[0])
path = os.path.dirname(surShaderTex)
f = surShaderTex.split("/")[-1]
tifName = os.path.splitext(f)[0] + ".tif"
texName = os.path.splitext(f)[0] + ".tex"
tifPath = os.path.join(path, tifName)
texPath = os.path.join(path, texName)
convertText(surShaderTex, tifPath, texPath)
Only two lines are part of your for loop. The rest only execute once.
So first this runs:
surShader = cmds.ls(type = 'surfaceShader')
for con in surShader:
surShaderOut = cmds.listConnections('%s.outColor' % con)
surShaderTex = cmds.getAttr("%s.fileTextureName" % surShaderOut[0])
Then after that loop, with only one surShader, one surShaderOut, and one surShaderTex, the following is executed once:
path = os.path.dirname(surShaderTex)
f = surShaderTex.split("/")[-1]
tifName = os.path.splitext(f)[0] + ".tif"
texName = os.path.splitext(f)[0] + ".tex"
tifPath = os.path.join(path, tifName)
texPath = os.path.join(path, texName)
Indent that the same as the lines above it, and it'll be run for each element of surShader instead of only once.
Related
I wrote some code that uses OCR to extract text from screenshots of follower lists and then transfer them into a data frame.
The reason I have to do the hustle with "name" / "display name" and removing blank lines is that the initial text extraction looks something like this:
Screenname 1
name 1
Screenname 2
name 2
(and so on)
So I know in which order each extraction will be.
My code works well for 1-30 images, but if I take more than that its gets a bit slow. My goal is to run around 5-10k screenshots through it at once. I'm pretty new to programming so any ideas/tips on how to optimize the speed would be very appreciated! Thank you all in advance :)
from PIL import Image
from pytesseract import pytesseract
import os
import pandas as pd
from itertools import chain
list_final = [""]
list_name = [""]
liste_anzeigename = [""]
list_raw = [""]
anzeigename = [""]
name = [""]
sort = [""]
f = r'/Users/PycharmProjects/pythonProject/images'
myconfig = r"--psm 4 --oem 3"
os.listdir(f)
for file in os.listdir(f):
f_img = f+"/"+file
img = Image.open(f_img)
img = img.crop((240, 400, 800, 2400))
img.save(f_img)
for file in os.listdir(f):
f_img = f + "/" + file
test = pytesseract.image_to_string(PIL.Image.open(f_img), config=myconfig)
lines = test.split("\n")
list_raw = [line for line in lines if line.strip() != ""]
sort.append(list_raw)
name = {list_raw[0], list_raw[2], list_raw[4],
list_raw[6], list_raw[8], list_raw[10],
list_raw[12], list_raw[14], list_raw[16]}
list_name.append(name)
anzeigename = {list_raw[1], list_raw[3], list_raw[5],
list_raw[7], list_raw[9], list_raw[11],
list_raw[13], list_raw[15], list_raw[17]}
liste_anzeigename.append(anzeigename)
reihenfolge_name = list(chain.from_iterable(list_name))
index_anzeigename = list(chain.from_iterable(liste_anzeigename))
sortieren = list(chain.from_iterable(sort))
print(list_raw)
sort_name = sorted(reihenfolge_name, key=sortieren.index)
sort_anzeigename = sorted(index_anzeigename, key=sortieren.index)
final = pd.DataFrame(zip(sort_name, sort_anzeigename), columns=['name', 'anzeigename'])
print(final)
Use a multiprocessing.Pool.
Combine the code under the for-loops, and put it into a function process_file.
This function should accept a single argument; the name of a file to process.
Next using listdir, create a list of files to process.
Then create a Pool and use its map method to process the list;
import multiprocessing as mp
def process_file(name):
# your code goes here.
return anzeigename # Or watever the result should be.
if __name__ is "__main__":
f = r'/Users/PycharmProjects/pythonProject/images'
p = mp.Pool()
liste_anzeigename = p.map(process_file, os.listdir(f))
This will run your code in parallel in as many cores as your CPU has.
For a N-core CPU this will take approximately 1/N times the time as doing it without multiprocessing.
Note that the return value of the worker function should be pickleable; it has to be returned from the worker process to the parent process.
I am trying to check what files that are present in my full_list_files are also present in required_list.
The thing here is they are not exactly equal to one other , but macthes with filename and last sub directory.
Example :
'C:\Users\Documents\Updated\Build\Output\M\Application_1.bin' matches with "M/Application_1.bin" except the slashes are different.
So I am trying to make both uniform by using the function convert_fslash_2_bslash
But still, I see the output as below ,none of the files are matched.
full_list_files = set(['C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Report.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_2.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Testing.txt', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\masking.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_1.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_1.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\History.zip', 'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Challenge.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_2.bin', 'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\porting.tar.gz', 'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Booting.tar.gz'])
original required_list = set(['N/Application_2.bin', 'M/masking.tar.gz', 'N/Application_1.bin', 'O/Challenge.tar.gz', 'M/Application_1.bin', 'O/Testing.txt', 'M/rooting.tar.gz', 'M/Application_2.bin', 'O/History.zip', 'N/porting.tar.gz', 'O/Report.tar.gz'])
modified required_list = ['N\\Application_2.bin', 'M\\masking.tar.gz', 'N\\Application_1.bin', 'O\\Challenge.tar.gz', 'M\\Application_1.bin', 'O\\Testing.txt', 'M\\rooting.tar.gz', 'M\\Application_2.bin', 'O\\History.zip', 'N\\porting.tar.gz', 'O\\Report.tar.gz']
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Report.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_2.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Testing.txt' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\masking.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\Application_1.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_1.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\History.zip' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\O\\Challenge.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Application_2.bin' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\N\\porting.tar.gz' not present
'C:\\Users\\Documents\\Updated\\Build\\Output\\M\\Booting.tar.gz' not present
How can I get it working correctly.
import os
import sys
import re
full_list_files = {
#These are actually real paths parsed from listdir
#Just for convenience used as strings
'C:\Users\Documents\Updated\Build\Output\M\Application_1.bin',
'C:\Users\Documents\Updated\Build\Output\M\Application_2.bin',
'C:\Users\Documents\Updated\Build\Output\M\masking.tar.gz',
'C:\Users\Documents\Updated\Build\Output\M\Booting.tar.gz',
'C:\Users\Documents\Updated\Build\Output\N\Application_1.bin',
'C:\Users\Documents\Updated\Build\Output\N\Application_2.bin',
'C:\Users\Documents\Updated\Build\Output\N\porting.tar.gz',
'C:\Users\Documents\Updated\Build\Output\O\Challenge.tar.gz',
'C:\Users\Documents\Updated\Build\Output\O\History.zip',
'C:\Users\Documents\Updated\Build\Output\O\Testing.txt',
'C:\Users\Documents\Updated\Build\Output\O\Report.tar.gz'
}
required_list = {
"M/Application_1.bin",
"M/Application_2.bin",
"M/masking.tar.gz",
"M/rooting.tar.gz",
"N/Application_1.bin",
"N/Application_2.bin",
"N/porting.tar.gz",
"O/Challenge.tar.gz",
"O/History.zip",
"O/Testing.txt",
"O/Report.tar.gz"
}
def convert_fslash_2_bslash(required_file_list):
required_config_file_list = []
i = 0
for entry in required_file_list:
entry = entry.strip()
entry = entry.replace('"',"")
entry = entry.replace('/','\\')
required_config_file_list.insert(i, entry)
i = i + 1
return required_config_file_list
if __name__ == "__main__":
print
print "full_list_files = ", full_list_files
print
print "original required_list = ", required_list
print
required_config_file_list = convert_fslash_2_bslash(required_list)
print "modified required_list = ", required_config_file_list
print
for f_entry in full_list_files:
f_entry = repr(f_entry)
#for r_entry in required_config_file_list:
#if ( f_entry.find(r_entry) != -1):
if f_entry in required_config_file_list:
print f_entry ," present"
else:
print f_entry ," not present"
Here is the logic you need at the bottom:
for f_entry in full_list_files:
for r_entry in required_config_file_list:
if f_entry.endswith(r_entry):
print f_entry, " present"
You need to loop over both collections, then check to see if the longer path ends with the shorter path. One of your mistakes was calling repr(), which changes the double backslashes to quadruple ones.
I'll leave it up to you to decide how you'll handle printing paths that are not present at all.
I'm trying to write a script that makes it easier to create Light AOV's using LPE's (Light Path Expressions).
But I just can't find a way to query the existence of previously created AOV's and skip those.
I'm following a tutorial I found from Arvid Schneider so some steps are from his video.
Here is what I have so far:
custAovLst = []
for lightSel in range(len(LightList)):
lgt_name = cmds.listRelatives(LightList, shapes = 1)
aov ='setAttr -type "string" {}.aiAov {};'.format(lgt_name[lightSel], 'lgt_' + lgt_name[lightSel].replace("Shape", ""))
mel.eval(aov)
for light in LightList:
aovName = cmds.getAttr(light + '.aiAov')
custAovLst.append(aovName)
def aovCreate():
for aovPass in custAovLst:
if cmds.attributeQuery(aovPass , node = ".aiAov", ex = True):
pass
else:
lightAov = aovs.AOVInterface().addAOV(aovPass, aovType='rgba')
aiAov = pmc.PyNode(lightAov.node)
aiAov.lightPathExpression.set("C.<L.'" + aovPass + "'>.*")
aovCreate()
Any help or advice on how to achieve this or a better way to go on hits is more than welcome!
I ended up using a list with referenceQuery command.
aovList = cmds.ls(type = "aiAOV")
deleteNode = [node for node in aovList if 'lgt' in str(node) if not
cmds.referenceQuery(node, inr = 1)]
cmds.delete(deleteNode)
I am using Windows 10 and running the code in Jupyter Notebook (in Chrome).
This is my code:
if __name__ == '__main__':
import itertools
MOD03_path = r"C:\Users\saviosebastian\MYD03.A2008001.0000.006.2012066122450.hdf"
MOD06_path = r"C:\Users\saviosebastian\MYD06_L2.A2008001.0000.006.2013341193524.hdf"
satellite = 'Aqua'
yr = [2008]
mn = [1] #np.arange(1,13)
dy = [1]
# latitude and longtitude boundaries of level-3 grid
lat_bnd = np.arange(-90,91,1)
lon_bnd = np.arange(-180,180,1)
nlat = 180
nlon = 360
TOT_pix = np.zeros(nlat*nlon)
CLD_pix = np.zeros(nlat*nlon)
### To use Spark in Python
spark = SparkSession\
.builder\
.appName("Aggregation")\
.getOrCreate()
filenames0=['']*500
i=0
for y,m,d in itertools.product(yr,mn,dy):
#-------------find the MODIS prodcts--------------#
date = datetime.datetime(y,m,d)
JD01, JD02 = gcal2jd(y,1,1)
JD1, JD2 = gcal2jd(y,m,d)
JD = np.int((JD2+JD1)-(JD01+JD02) + 1)
granule_time = datetime.datetime(y,m,d,0,0)
while granule_time <= datetime.datetime(y,m,d,23,55): # 23,55
print('granule time:',granule_time)
**[MOD03_fp = 'MYD03.A{:04d}{:03d}.{:02d}{:02d}.006.?????????????.hdf'.format(y,JD,granule_time.hour,granule_time.minute)][1]**
MOD06_fp = 'MYD06_L2.A{:04d}{:03d}.{:02d}{:02d}.006.?????????????.hdf'.format(y,JD,granule_time.hour,granule_time.minute)
MOD03_fn, MOD06_fn =[],[]
for MOD06_flist in os.listdir(MOD06_path):
if fnmatch.fnmatch(MOD06_flist, MOD06_fp):
MOD06_fn = MOD06_flist
for MOD03_flist in os.listdir(MOD03_path):
if fnmatch.fnmatch(MOD03_flist, MOD03_fp):
MOD03_fn = MOD03_flist
if MOD03_fn and MOD06_fn: # if both MOD06 and MOD03 products are in the directory
I am getting the following error:
Do you know any solution to this problem?
I can't give you a specific answer without knowledge of the directory system on your computer, but for now it's obvious that there is something wrong with the name of the directory that you are referencing. Use File Explorer to make sure that the directory actually exists, and also make sure that you haven't misspelled the name of the file, which could easily happen given the filename.
You are giving the full path along with file name. The os.listdir(path) method in python is used to get the list of all files and directories in the specified directory. If we don’t specify any directory, then list of files and directories in the current working directory will be returned.
You can just write "C:/Users/saviosebastian" in path.
Same goes for os.chdir("C:/Users/saviosebastian").
I am working on a project to check a file directory and automatically add log files as they are created. A file is being generated every five minutes, but some of the files are being created with a "0" filesize and I would like to alert when this happens.
So the sequence of steps I would like to have are essentially:
Get time (MM:DD:YY HH:MM:SS) *Not sure if I need to do this...
CD to Folder Directory /Netflow/YY/MM/DD
Search for filename "nfcapd.YYYYMMDDHHMM" where MM increments by 5.
If filesize is 0, then email Johnny, Sally and Jimmy
Wait 6 minutes and repeat
This is what I have pieced together thus far. How can I get the desired functionality?
import os
def is_non_zero_file(fpath): storage/Netflow/
return True if os.path.isfile(fpath) and os.path.getsize(fpath) > 0 else False
# I need to check storage/Netflow for files named by time e.g 13_56_05.txt
while True:
time.sleep(360)
In addition to enumerating the files in a given path, and subsequently filtering the files which are only zero-length, you probably want to maintain some type of state to ensure you're aren't notified multiple times of the same zero length file. That is, you probably don't want to get a notification that the same file is zero-length indefinitely (although you can modify the example below if you want said behavior).
You may optionally want to do things like verify that the file name strictly meets your naming convention. You may also want to validate the the string date-stamp included in the file name is a valid datetime.
The example below uses the glob module (itself leveraging os.listdir() and fnmatch.fnmatch()) to build up a set of possible files for inclusion. [1]
The example is intentionally simple, and leverages a single class to store log sample 'state'. KEEP_SAMPLES samples are maintained (instances of logState() in the log_states list, achieved by using list slicing.
A single alert(msg) function is supplied as a stub to something that might send mail, etc...
References:
[1] https://docs.python.org/3.2/library/glob.html
#!/usr/bin/python3
import os
import glob
import re
from datetime import datetime, timezone
import time
from pprint import pprint
class logState():
def __init__(self, log_path, glob_patt, re_patt, dt_fmt):
self.dt = datetime.now(timezone.utc)
self.log_path = log_path
self.glob_patt = glob_patt
self.re_patt = re_patt
self.dt_fmt = dt_fmt
self.empty_logs = []
self.nonempty_logs = []
# Retrieve only files from glob
self.files = [ f for f in
glob.glob(self.log_path + self.glob_patt)
if os.path.isfile(f) ]
for f in self.files:
unq_fname = f.split('/')[-1]
if unq_fname == None:
continue
# Tighter pattern matching
if re.match(re_patt, unq_fname) == None:
continue
# Get the datetime portion of the file name
f_dtstamp = unq_fname.split('.')[-1]
# Make sure the datetime stamp represents
# a valid date
if datetime.strptime(f_dtstamp, self.dt_fmt) == None:
continue
# Check file size, add to the appropriate
# list
if os.path.getsize(f) <= 0:
self.empty_logs.append(f)
else:
self.nonempty_logs.append(f)
def alert(msg):
print("ALERT!: {0}".format(msg))
if __name__ == "__main__":
# How long to sleep
SLEEP_SECS = 5
# How many samples to keep
KEEP_SAMPLES = 5
log_states = []
# Definition for what logs states we'll look for
log_path = './'
glob_patt = 'nfcapd.[0-9]*'
re_patt = 'nfcapd.([0-9]{12})'
dt_fmt = "%Y%m%d%H%M"
print("-- Setup --")
print("Sample files in '{0}'".format(log_path))
print("\t{0} samples kept:".format(KEEP_SAMPLES))
print("\tglob pattern: '{0}'".format(glob_patt))
print("\tregex pattern: '{0}'".format(re_patt))
print("\tdatetime string: '{0}'".format(dt_fmt))
print("")
# Collect the initial state
log_states.append(logState(log_path,
glob_patt,
re_patt, dt_fmt))
while True:
# Print state inventory and current state detail
print( "-- Log States Stored --")
for i, log_state in enumerate(log_states):
print("Log state {0} # {1}".format(i, log_state.dt))
print(" -- Logs size > 0 --")
pprint(log_states[-1].nonempty_logs)
print(" -- Logs size <= 0 --")
pprint(log_states[-1].empty_logs)
print("")
time.sleep(SLEEP_SECS)
log_states = log_states[-KEEP_SAMPLES+1:]
log_states.append(logState(log_path,
glob_patt,
re_patt,
dt_fmt))
# p = previous sample, c = current
p = set(log_states[-2].empty_logs)
c = set(log_states[-1].empty_logs)
# only report the items in the current sample
# not in the last
if len(c.difference(p)) > 0:
alert("\nNew zero length logs: " + str(c.difference(p)) + "\n")