My goal is to create a python's script, which will format/modify the xml file.
Path to file & filename are to be given as arguments in command line.
Here is my code below:
import lxml.etree as etree
from argparse import ArgumentParser
import sys
import os
def main():
parser = ArgumentParser()
parser.add_argument('-p', '--path', help="path to file's directory", required=True)
parser.add_argument('-f', '--file', help="file name", required=True)
args = parser.parse_args()
root_dir = sys.argv[1]
file_name = sys.argv[2]
path = sys.argv[1] + sys.argv[2]
for dirpath, dirnames, files in os.walk(root_dir):
for file in files:
if file == file_name:
print(os.path.join(dirpath, file_name))
with open(path, 'r', encoding="utf8") as myfile:
try:
print("DONE")
parser = etree.XMLParser(remove_blank_text = True)
tree = etree.parse(path, parser)
tree.write(path, pretty_print = True)
except IOError:
print("IO Exception Occured")
if __name__ == "__main__":
main()
When I run it from cmd - I have 0 errors, but the file is not formatted, even though I give the filename that doesn't exist - still no errors. So when I run it from cmd - nothing happens.
When I try to debug it in Visual Studio, there is error that 2 arguments need to be given.
Can anybody tell me how to fix my code, I have no idea where I have wrong code?
You're misusing/combining/confusing sys.argv and ArgumentParser. This code actually gives you the unexpected result, because your variables are not what you think they are!
root_dir = sys.argv[1]
file_name = sys.argv[2]
# Add some print statements to examine these variables:
print(f'root_dir:{root_dir}')
print(f'file_name:{file_name}')
Look:
Do this instead:
root_dir = args.path
file_name = args.file
Here is code I used to test:
from argparse import ArgumentParser
import sys
def main():
parser = ArgumentParser()
parser.add_argument('-p', '--path', help="path to file's directory", required=True)
parser.add_argument('-f', '--file', help="file name", required=True)
args = parser.parse_args()
root_dir = args.path
file_name = args.file
print(f'root_dir:{root_dir}')
print(f'file_name:{file_name}')
if __name__ == "__main__":
main()
You are mixing two things!
METHOD 1
Launch with XmlFormat.py -p c:\User\Desktop\test\ -f test.xml
import lxml.etree as etree
from argparse import ArgumentParser
import sys
import os
def main():
parser = ArgumentParser()
parser.add_argument('-p', '--path', help="path to file's directory", required=True)
parser.add_argument('-f', '--file', help="file name", required=True)
args = parser.parse_args()
root_dir = args.path
file_name = args.file
path = root_dir + file_name
for dirpath, dirnames, files in os.walk(root_dir):
for file in files:
if file == file_name:
print(os.path.join(dirpath, file_name))
with open(path, 'r', encoding="utf8") as myfile:
try:
print("DONE")
parser = etree.XMLParser(remove_blank_text = True)
tree = etree.parse(path, parser)
tree.write(path, pretty_print = True)
except IOError:
print("IO Exception Occured")
if __name__ == "__main__":
main()
METHOD 2
Launch with XmlFormat.py c:\User\Desktop\test\ test.xml (do not use -p and -f)
import lxml.etree as etree
from argparse import ArgumentParser
import sys
import os
def main():
root_dir = sys.argv[1]
file_name = sys.argv[2]
path = root_dir + file_name
for dirpath, dirnames, files in os.walk(root_dir):
for file in files:
if file == file_name:
print(os.path.join(dirpath, file_name))
with open(path, 'r', encoding="utf8") as myfile:
try:
print("DONE")
parser = etree.XMLParser(remove_blank_text = True)
tree = etree.parse(path, parser)
tree.write(path, pretty_print = True)
except IOError:
print("IO Exception Occured")
if __name__ == "__main__":
main()
Related
I am tasked with creating an app using python to sort files in a given directory.
I want the input path to be passed as a command line argument using
cmd_parser.add_argument()
instead of being accepted as normal input after running the code.
The code does what it's supposed to, but I don't want the path to be received as an input.
Any help is greatly appreciated
import os
import shutil
from argparse import ArgumentParser
def main():
cmd_parser = ArgumentParser(description="Application")
cmd_parser.add_argument(
'-v', '--version',
action='version',
version='Application v0.0.1',
help='show version of the application'
)
cmd_parser.add_argument(
'-h', '--help',
action='help',
help='Please enter a valid directory which contains the files to be sorted'
)
cmd_args = cmd_parser.parse_args()
try:
globals()[cmd_args.action](cmd_args.file)
except Exception as ex:
print('[ERROR]', str(ex))
sys.exit(1)
while True:
directory = input("Please input the directory including the files: ")
if not os.path.isdir(directory):
print("Please input a valid directory")
else:
break
path = directory
os.chdir(path)
new_folder = "Sorted Files"
os.makedirs(new_folder)
path_2 = path+"/"+new_folder
os.chdir(path_2)
new_folder_doc = "Documents"
new_folder_texts = "Texts"
new_folder_images = "Images"
new_folder_other = "Other"
os.makedirs(new_folder_doc)
os.makedirs(new_folder_texts)
os.makedirs(new_folder_images)
os.makedirs(new_folder_other)
for file in os.listdir(path):
file_path = os.path.join(path, file)
if os.path.isfile(file_path):
file_name = os.path.basename(file_path)
if file_path.endswith('.png') or file_path.endswith('.gif') or file_path.endswith('.bmp') or\
file_path.endswith('.jpg') or file_path.endswith('.jpeg') is True:
shutil.move(file_path, new_folder_images)
continue
if file_path.endswith('.txt') or file_path.endswith('.ini') or file_path.endswith('.log') is True:
shutil.move(file_path, new_folder_texts)
continue
if file_path.endswith('.pdf') or file_path.endswith('.docx') or file_path.endswith('.doc') or\
file_path.endswith('.xls') or file_path.endswith('.xlsx') or file_path.endswith('.csv') is True:
shutil.move(file_path, new_folder_doc)
continue
if file_path.endswith('.docx') or file_path.endswith('.txt') or file_path.endswith('.bmp') or \
file_path.endswith('.png') is not True:
shutil.move(file_path, new_folder_other)
continue
my_folder = path # your path here
count = 0
for root, dirs, files in os.walk(my_folder):
count += len([fn for fn in files if fn.endswith(".pdf") or fn.endswith(".docx")
or fn.endswith(".doc") or fn.endswith(".xls") or fn.endswith(".xlsx") or fn.endswith(".csv")
or fn.endswith(".jpeg") or fn.endswith(".jpg") or fn.endswith(".bmp") or fn.endswith(".gif")
or fn.endswith(".png") or fn.endswith(".txt") or fn.endswith(".ini") or fn.endswith(".log")])
print(f"Organized {count} files")
Error resulting from fixed code (It's also not doing what it's supposed to anymore)
usage: StackTestSorter.py [-h] [-v] directory
StackTestSorter.py: error: the following arguments are required: directory
Rather than having:
def main():
cmd_parser = ArgumentParser(description="Application")
cmd_parser.add_argument(
'-v', '--version',
action='version',
version='Application v0.0.1',
help='show version of the application'
)
cmd_parser.add_argument(
'-h', '--help',
action='help',
help=''
)
cmd_args = cmd_parser.parse_args()
try:
globals()[cmd_args.action](cmd_args.file)
except Exception as ex:
print('[ERROR]', str(ex))
sys.exit(1)
while True:
directory = input("Please input the directory including the files: ")
if not os.path.isdir(directory):
print("Please input a valid directory")
else:
break
path = directory
have (removing the --help argument as this will be added automatically):
cmd_parser = ArgumentParser(description="Application")
cmd_parser.add_argument(
'-v', '--version',
action='version',
version='Application v0.0.1',
help='show version of the application'
)
cmd_parser.add_argument(
"directory", # this will be a positional argument
help="a valid directory which contains the files to be sorted",
)
cmd_args = cmd_parser.parse_args()
path = cmd_args.directory
if I understood you correctly all you need to do is add one more argument, something like:
cmd_parser.add_argument("-p", "--path", help="", required=True, default=False)
and then after cmd_args you can do:
path = cmd_args.path
Below is some code im working on which merges pdf files and creates a bookmark using the pdfs being merged filename. I know need to add nested bookmarks which has meant ive had to use the pdffilemerger.addbookmark instead of just putting bookmarks in merger.append. merger.addbookmark needs a page number as an argument whereas before they were automatically in the right place. as can be seen below i have tried to figure out a way to add the pagenum based on the previous pdfs page number but it needs to be a cumulative of all the previous PDFS. Can anyone help me with this?
from argparse import ArgumentParser
from glob import glob
from PyPDF2 import PdfFileMerger, PdfFileReader
import os
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir(r'filepath', f))]
os.chdir(r'filepath')
for file in onlyfiles:
fileReader = PdfFileReader(open(file,'rb'))
totalpages = fileReader.numPages
source_dir = os.getcwd()
merger = PdfFileMerger(strict=False)
oldextension = '.PDF'
newextension = '.pdf'
with os.scandir(source_dir) as files_and_folders:
for element in files_and_folders:
if element.is_file():
root, ext = os.path.splitext(element.path)
if ext == oldextension:
new_path = root + newextension
os.rename(element.path, new_path)
file_counter += 1
def merge(path, output_filename):
merger = PdfFileMerger(strict=False)
content = merger.addBookmark("contents", 0, parent=None)
parent = merger.addBookmark("moisture content", 0, parent=content)
pdfpages = 0
for pdffile in glob(path + os.sep + '*.pdf'):
if pdffile[2:] == output_filename:
continue
fileReader = PdfFileReader(open(pdffile,'rb'))
bookmark = os.path.basename(pdffile[:-4])
merger.add_bookmark(bookmark, pdfpages, parent=parent)
pdfpages = fileReader.numPages
print(f"Appending: '{pdffile}'")
for item in os.listdir(source_dir):
if item.endswith('pdf'):
merger.append(item)
merger.write(output_filename)
merger.close()
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-o", "--output",
dest="output_filename",
default="moisturecontent.pdf",
help="write merged PDF to FILE",
metavar="FILE")
parser.add_argument("-p", "--path",
dest="path",
default=".",
help="path of source PDF files")
args = parser.parse_args()
merge(args.path, args.output_filename)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-o", "--output",
dest="output_filename",
default="group1.pdf",
help="write merged PDF to FILE",
metavar="FILE")
parser.add_argument("-p", "--path",
dest="path",
default=".",
help="path of source PDF files")
args = parser.parse_args()
merge(args.path, args.output_filename)
the above code creates a pdf file with the first bookmark on page 1 (or 0 in python terms) then the next bookmark at the end of the first but after that it doesnt work as indented as the pdfpage is not cumulative
I have two python files, when I run "python downloader.py todownload" it downloads images from todownload.txt file, but when I run it indirectly (calling function of downloader.py via other python file) I don't get the images downloaded.
I tried debugging and printing what arguments are sent to function "download_all_images(args)" when I call it both ways, and the object args match.
downloader.py
import argparse
from concurrent import futures
import os
import re
import sys
import boto3
import botocore
import tqdm
BUCKET_NAME = 'open-images-dataset'
REGEX = r'(test|train|validation|challenge2018)/([a-fA-F0-9]*)'
def check_and_homogenize_one_image(image):
split, image_id = re.match(REGEX, image).groups()
yield split, image_id
def check_and_homogenize_image_list(image_list):
for line_number, image in enumerate(image_list):
try:
yield from check_and_homogenize_one_image(image)
except (ValueError, AttributeError):
raise ValueError(
f'ERROR in line {line_number} of the image list. The following image '
f'string is not recognized: "{image}".')
def read_image_list_file(image_list_file):
with open(image_list_file, 'r') as f:
for line in f:
yield line.strip().replace('.jpg', '')
def download_one_image(bucket, split, image_id, download_folder):
try:
bucket.download_file(f'{split}/{image_id}.jpg',
os.path.join(download_folder, f'{image_id}.jpg'))
except botocore.exceptions.ClientError as exception:
sys.exit(
f'ERROR when downloading image `{split}/{image_id}`: {str(exception)}')
def download_all_images(args):
print(type(args))
print(args)
"""Downloads all images specified in the input file."""
bucket = boto3.resource(
's3', config=botocore.config.Config(
signature_version=botocore.UNSIGNED)).Bucket(BUCKET_NAME)
download_folder = args['download_folder'] or os.getcwd()
if not os.path.exists(download_folder):
os.makedirs(download_folder)
try:
image_list = list(
check_and_homogenize_image_list(
read_image_list_file(args['image_list'])))
except ValueError as exception:
sys.exit(exception)
progress_bar = tqdm.tqdm(
total=len(image_list), desc='Downloading images', leave=True)
with futures.ThreadPoolExecutor(
max_workers=args['num_processes']) as executor:
all_futures = [
executor.submit(download_one_image, bucket, split, image_id,
download_folder) for (split, image_id) in image_list
]
for future in futures.as_completed(all_futures):
future.result()
progress_bar.update(1)
progress_bar.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'image_list',
type=str,
default=None,
help=('Filename that contains the split + image IDs of the images to '
'download. Check the document'))
parser.add_argument(
'--num_processes',
type=int,
default=5,
help='Number of parallel processes to use (default is 5).')
parser.add_argument(
'--download_folder',
type=str,
default=None,
help='Folder where to download the images.')
download_all_images(vars(parser.parse_args()))
imagegatherer.py
import pandas as pd
import argparse
from concurrent import futures
import os
import re
import sys
import boto3
import botocore
import tqdm
import downloader
labels = pd.read_csv('class-descriptions-boxable.csv', header=None)
labels.columns = ['id', 'name']
classified = pd.read_csv('train-annotations-human-imagelabels-boxable.csv', nrows=2500)
classes = ['Apple', 'Motorcycle', 'Snowman']
ids = labels[labels['name'].isin(classes)]
for entry in ids.iterrows():
id = entry[1][0]
name = entry[1][1]
good = classified.loc[classified['LabelName'] == id]['ImageID'].tolist()
with open('todownload.txt', 'w') as file:
for img in good:
file.write('train/' + img + '\n')
args = dict()
args['image_list'] = 'todownload.txt'
args['num_processes'] = 5
args['download_folder'] = None
downloader.download_all_images(args)
todownload.txt content that should download 4 images:
train/0000048549557964
train/000023aa04ab09ed
train/00002f4ff380c64c
train/000037c2dd414b46
I'm new to python and currently playing around with argpase. I'm trying to call a function using a directory path given as a command line argument followed by an argparse option(-name) and a regex that goes through all the files in the directory and spits out all the matches to the regex as so:
./find.py ../seek -name '[a-z]*\.txt'
However, I'm getting a error that looks like
usage: find.py [-h] [--path PATH] [-name] [--regex REGEX]
find.py: error: unrecognized arguments: . . / s e e k / p r o g r a m . c
And without the -name its just printing all the files inside the path.
Here is what I have so far:
#!/usr/bin/python2.7
import os, sys, argparse,re
from stat import *
def parse(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('--path', help='path of directory', action='store')
parser.add_argument('-name', '--name', action='store_true')
parser.add_argument('--regex', default=r"[a-z0-9A-Z]")
args = parser.parse_args(argv)
print(args)
return args
def main(argv=None):
direc = sys.argv[1]
files = []
for f in os.listdir(direc):
pathname = os.path.join(direc, f)
mode = os.stat(pathname).st_mode
if S_ISREG(mode):
args = parse(pathname)
if args.name:
dirls = [re.match(args.regex, pathname)]
print(dirls)
else:
print pathname
if __name__ == '__main__':main()
Any thoughts?
Argument Parser PATH Example : Different type of arguments with custom handlers added. For path here you can pass '-path' followed by path value as argument
import os
import argparse
from datetime import datetime
def parse_arguments():
parser = argparse.ArgumentParser(description='Process command line arguments.')
parser.add_argument('-path', type=dir_path)
parser.add_argument('-e', '--yearly', nargs = '*', help='yearly date', type=date_year)
parser.add_argument('-a', '--monthly', nargs = '*',help='monthly date', type=date_month)
return parser.parse_args()
def dir_path(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
def date_year(date):
if not date:
return
try:
return datetime.strptime(date, '%Y')
except ValueError:
raise argparse.ArgumentTypeError(f"Given Date({date}) not valid")
def date_month(date):
if not date:
return
try:
return datetime.strptime(date, '%Y/%m')
except ValueError:
raise argparse.ArgumentTypeError(f"Given Date({date}) not valid")
def main():
parsed_args = parse_arguments()
if __name__ == "__main__":
main()
In order for your program to operate, you need a path. So, the --path option must take an argument. Modify your parse() function to change the line
parser.add_argument('--path', help='path of directory', action='store')
to
parser.add_argument('--path', help='path of directory', action='store', required=True)
You need to call parse_args() only once. Remove the parse() invocation to the top of the loop.
And you needn't do
direc = sys.argv[1]
if you are using argparse.
re.match() returns a match object, which is probably not what you want to print.
You might want to take a look at match() versus search().
The match() function only checks if the RE matches at the beginning of the string while search() will scan forward through the string for a match.
If you wanted to print the file names matching the regex, you could do
if S_ISREG(mode):
#args = parse(pathname)
if args.name:
#dirls = re.match(args.regex, pathname)
dirls = re.search(args.regex, pathname)
if( dirls ):
print(pathname)
else:
print pathname
So main() should be something like
def main(argv=None):
args = parse(sys.argv[1:])
print(args)
#direc = sys.argv[1]
direc = args.path
files = []
for f in os.listdir(direc):
pathname = os.path.join(direc, f)
mode = os.stat(pathname).st_mode
if S_ISREG(mode):
#args = parse(pathname)
if args.name:
#dirls = re.match(args.regex, pathname)
dirls = re.search(args.regex, pathname)
if( dirls ):
print(pathname)
else:
print pathname
In order to specify the regex matching the file names, you must specify the regex using the --regex option. By default, you've made it to match names having only numbers and (English) letters.
./find.py --path ../seek -name --regex [a-z]\*.txt
or
./find.py --path ../seek -name --regex '[a-z]*.txt'
When I specify the -a switch to add to config file for some reason this calls the read_config file also. For example I use ./script.py -a newfile.txt and the file is added correctly but then returns "Config File not found".
parser = argparse.ArgumentParser(description='Copy multiple Files from a specified data file')
parser.add_argument('-c', '--configfile', default="config.dat", help='file to read the config from')
parser.add_argument('-l', '--location', default="/home/admin/Documents/backup/",help='Choose location to store files')
parser.add_argument('-a', '--add', help='add new line to config data')
def read_config(data):
try:
dest = '/home/admin/Documents/backup/'
# Read in date from config.dat
data = open(data)
# Interate through list of files '\n'
filelist = data.read().split('\n')
# Copy through interated list and strip white spaces and empty lines
for file in filelist:
if file:
shutil.copy(file.strip(), dest)
except FileNotFoundError:
logger.error("Config file not found")
print ("Config File not found")
Not quite sure why this section below is working but read_config is called and finding the except error. Where am I going wrong?
def addtofile(add):
f = open('config.dat', "a")
f.write(add + '\n')
f.close()
args = vars(parser.parse_args())
read = read_config(args['configfile'])
add = addtofile(args['add'])
parser = argparse.ArgumentParser()
parser.add_argument('-c', action='store_true')
a = parser.parse_args()
if a.c:
{
print("get custom config")
}
else:
{
print("using default config file")
}
I think your solution will be:
parser = argparse.ArgumentParser()
parser.add_argument('-c', action='store_true')
a = parser.parse_args()
if a.c:
{
print("get custom config")
}
else:
{
print("using default config file")
}