I used the "pdf2txt.py" program which came as part of the pdfminer package in GitHub to try convert pdf file to text.As per the instruction , I ran the program by typing "python pdf2txt.py somefile.pdf" in the Mac OS terminal.The output was correctly generated and printed in the terminal itself. Now my question is this, how do I direct this output to a text file.I only know the bare basics of python and I am not able to figure out which line in the program actually prints the output and what needs to be changed to direct the same into a .txt file?
import argparse
import logging
import six
import sys
import pdfminer.settings
pdfminer.settings.STRICT = False
import pdfminer.high_level
import pdfminer.layout
from pdfminer.image import ImageWriter
def extract_text(files=[], outfile='-',
_py2_no_more_posargs=None, # Bloody Python2 needs a shim
no_laparams=False, all_texts=None, detect_vertical=None, # LAParams
word_margin=None, char_margin=None, line_margin=None, boxes_flow=None, # LAParams
output_type='text', codec='utf-8', strip_control=False,
maxpages=0, page_numbers=None, password="", scale=1.0, rotation=0,
layoutmode='normal', output_dir=None, debug=False,
disable_caching=False, **other):
if _py2_no_more_posargs is not None:
raise ValueError("Too many positional arguments passed.")
if not files:
raise ValueError("Must provide files to work upon!")
# If any LAParams group arguments were passed, create an LAParams object and
# populate with given args. Otherwise, set it to None.
if not no_laparams:
laparams = pdfminer.layout.LAParams()
for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"):
paramv = locals().get(param, None)
if paramv is not None:
setattr(laparams, param, paramv)
else:
laparams = None
imagewriter = None
if output_dir:
imagewriter = ImageWriter(output_dir)
if output_type == "text" and outfile != "-":
for override, alttype in ( (".htm", "html"),
(".html", "html"),
(".xml", "xml"),
(".tag", "tag") ):
if outfile.endswith(override):
output_type = alttype
if outfile == "-":
outfp = sys.stdout
if outfp.encoding is not None:
codec = 'utf-8'
else:
outfp = open(outfile, "wb")
for fname in files:
with open(fname, "rb") as fp:
pdfminer.high_level.extract_text_to_fp(fp, **locals())
return outfp
def maketheparser():
parser = argparse.ArgumentParser(description=__doc__, add_help=True)
parser.add_argument("files", type=str, default=None, nargs="+", help="File to process.")
parser.add_argument("-d", "--debug", default=False, action="store_true", help="Debug output.")
parser.add_argument("-p", "--pagenos", type=str, help="Comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.")
parser.add_argument("--page-numbers", type=int, default=None, nargs="+", help="Alternative to --pagenos with space-separated numbers; supercedes --pagenos where it is used.")
parser.add_argument("-m", "--maxpages", type=int, default=0, help="Maximum pages to parse")
parser.add_argument("-P", "--password", type=str, default="", help="Decryption password for PDF")
parser.add_argument("-o", "--outfile", type=str, default="-", help="Output file (default \"-\" is stdout)")
parser.add_argument("-t", "--output_type", type=str, default="text", help="Output type: text|html|xml|tag (default is text)")
parser.add_argument("-c", "--codec", type=str, default="utf-8", help="Text encoding")
parser.add_argument("-s", "--scale", type=float, default=1.0, help="Scale")
parser.add_argument("-A", "--all-texts", default=None, action="store_true", help="LAParams all texts")
parser.add_argument("-V", "--detect-vertical", default=None, action="store_true", help="LAParams detect vertical")
parser.add_argument("-W", "--word-margin", type=float, default=None, help="LAParams word margin")
parser.add_argument("-M", "--char-margin", type=float, default=None, help="LAParams char margin")
parser.add_argument("-L", "--line-margin", type=float, default=None, help="LAParams line margin")
parser.add_argument("-F", "--boxes-flow", type=float, default=None, help="LAParams boxes flow")
parser.add_argument("-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode")
parser.add_argument("-n", "--no-laparams", default=False, action="store_true", help="Pass None as LAParams")
parser.add_argument("-R", "--rotation", default=0, type=int, help="Rotation")
parser.add_argument("-O", "--output-dir", default=None, help="Output directory for images")
parser.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching")
parser.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode")
return parser
# main
def main(args=None):
P = maketheparser()
A = P.parse_args(args=args)
if A.page_numbers:
A.page_numbers = set([x-1 for x in A.page_numbers])
if A.pagenos:
A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")])
imagewriter = None
if A.output_dir:
imagewriter = ImageWriter(A.output_dir)
if six.PY2 and sys.stdin.encoding:
A.password = A.password.decode(sys.stdin.encoding)
if A.output_type == "text" and A.outfile != "-":
for override, alttype in ( (".htm", "html"),
(".html", "html"),
(".xml", "xml" ),
(".tag", "tag" ) ):
if A.outfile.endswith(override):
A.output_type = alttype
if A.outfile == "-":
outfp = sys.stdout
if outfp.encoding is not None:
# Why ignore outfp.encoding? :-/ stupid cathal?
A.codec = 'utf-8'
else:
outfp = open(A.outfile, "wb")
## Test Code
outfp = extract_text(**vars(A))
outfp.close()
return 0
if __name__ == '__main__': sys.exit(main())
Try
python pdf2txt.py somefile.pd > pdf_text.txt
Thank you everyone for the inputs.
I added a line
sys.stdout=open("somefile.txt","w")
right before the line
outfp=sys.stdout.
worked like a charm.
Related
im making a type of "aux switch" and I decided that i was going to use python module sounddevice to do that and I use it BUT i cant seem to get it to use the sound device that i want it too. every time that i try to change it for some reason it dose not work
import argparse
import sounddevice as sd
import numpy # Make sure NumPy is loaded before it is used in the callback
assert numpy # avoid "imported but unused" message (W0611)
print(sd.query_devices())
sd.check_input_settings(device= "VoiceMeeter Aux Output (VB-Audio VoiceMeeter AUX VAIO), Windows DirectSound")
sd.check_output_settings(device= "CABLE Input (VB-Audio Virtual Cable), Windows DirectSound ")
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-i', '--input-device', type=int_or_str,
help='')
parser.add_argument(
'-o', '--output-device', type=int_or_str,
help='output device (numeric ID or substring)')
parser.add_argument(
'-c', '--channels', type=int, default=2,
help='number of channels')
parser.add_argument('--dtype', help='audio data type')
parser.add_argument('--samplerate', type=float, help='sampling rate')
parser.add_argument('--blocksize', type=int, help='block size')
parser.add_argument('--latency', type=float, help='latency in seconds')
args = parser.parse_args(remaining)
def callback(indata, outdata, frames, time, status):
if status:
print(status)
outdata[:] = indata
try:
with sd.Stream(device=(args.input_device, args.output_device),
samplerate=args.samplerate, blocksize=args.blocksize,
dtype=args.dtype, latency=args.latency,
channels=args.channels, callback=callback):
print('#' * 80)
print('press Return to quit')
print('#' * 80)
input()
except KeyboardInterrupt:
parser.exit('')
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
import argparse
import sounddevice as sd
import numpy # Make sure NumPy is loaded before it is used in the callback
assert numpy # avoid "imported but unused" message (W0611)
print(sd.query_devices())
inpdev = "VoiceMeeter Aux Output (VB-Audio VoiceMeeter AUX VAIO), Windows DirectSound"
outdev = "CABLE Input (VB-Audio Virtual Cable), Windows DirectSound "
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-i', '--input-device', type=int_or_str,
help='')
parser.add_argument(
'-o', '--output-device', type=int_or_str,
help='output device (numeric ID or substring)')
parser.add_argument(
'-c', '--channels', type=int, default=2,
help='number of channels')
parser.add_argument('--dtype', help='audio data type')
parser.add_argument('--samplerate', type=float, help='sampling rate')
parser.add_argument('--blocksize', type=int, help='block size')
parser.add_argument('--latency', type=float, help='latency in seconds')
args = parser.parse_args(remaining)
def callback(indata, outdata, frames, time, status):
if status:
print(status)
outdata[:] = indata
try:
with sd.Stream(device=(inpdev, outdev),
samplerate=args.samplerate, blocksize=args.blocksize,
dtype=args.dtype, latency=args.latency,
channels=args.channels, callback=callback):
print('#' * 80)
print('press Return to quit')
print('#' * 80)
input()
except KeyboardInterrupt:
parser.exit('')
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
I'm trying to execute the argparser and the error is:
usage: ipykernel_launcher.py [-h] [-v VIDEO] [-i IMAGE] [-c CAMERA]
[-o OUTPUT]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-6bea2aa0-717c-4cb5-8793-534ee75b255f.json
An exception has occurred, use %tb to see the full traceback.
SystemExit: 2*
My argparser code :
def argsParser():
arg_parse = argparse.ArgumentParser()
arg_parse.add_argument("-v", "--video", default=None, help="path to Video File ")
arg_parse.add_argument("-i", "--image", default=None, help="path to Image File ")
arg_parse.add_argument("-c", "--camera", default=False, help="Set True if you want to use the camera.")
arg_parse.add_argument("-o", "--output", type=str, help="path to optional output video file")
args = vars(arg_parse.parse_args())
arg_parse.add_argument('-f')
return args
if __name__ == "__main__":
HOGCV = cv2.HOGDescriptor()
HOGCV.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
args = argsParser()
humanDetector(args)`
Could you help me what should I do? (btw I'm using google collaborator)
def argsParser():
arg_parse = argparse.ArgumentParser()
arg_parse.add_argument("-v", "--video", default=None, help="path to Video File ")
arg_parse.add_argument("-i", "--image", default=None, help="path to Image File ")
arg_parse.add_argument("-c", "--camera", default=True, help="Set true if you want to use the camera.")
arg_parse.add_argument("-o", "--output", type=str, help="path to optional output video file")
args = vars(arg_parse.parse_args())
return args
if __name__ == "__main__":
HOGCV = cv2.HOGDescriptor()
HOGCV.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
args = argsParser()
humanDetector(args)
This should give you the answer you are looking for, if anything i think it was a formating error
You were so close. You just need to put "arg_parse.add_argument('-f')" line before "args = vars(arg_parse.parse_args())" ( This is the reason you are getting 'SystemExit: 2' error ):
Edit your argsParser like this:
def argsParser():
arg_parse = argparse.ArgumentParser()
arg_parse.add_argument("-v", "--video", default='your_video_path', help="path to Video File ")
arg_parse.add_argument("-i", "--image", default='your_image_path', help="path to Image File ")
arg_parse.add_argument("-c", "--camera", default='True/False', help="Set True if you want to use the camera.")
arg_parse.add_argument("-o", "--output", type=str, default='your_output_path', help="path to optional output video file")
arg_parse.add_argument("-f", required=False)
args = vars(arg_parse.parse_args())
return args
Now yoy can call:
if __name__ == "__main__":
HOGCV = cv2.HOGDescriptor()
HOGCV.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
args = argsParser()
humanDetector(args)
I have been writing a code to make an app that can stream mic input to speaker output it works perfectly on PyCharm or any IDE but as soon as a successful installation happens and you do first start up it crashes, IDK why no solution so far has worked this was my first python app. Can anyone help
I used google colab to run builddozer but its not working. Can anyone tell me what is wrong here?
import argparse
import kivy
from kivymd.app import MDApp
from kivy.uix.gridlayout import GridLayout
from kivy.uix.label import Label
from kivy.uix.button import Button
import sounddevice as sd
import numpy # Make sure NumPy is loaded before it is used in the callback
assert numpy # avoid "imported but unused" message (W0611)
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-i', '--input-device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument(
'-o', '--output-device', type=int_or_str,
help='output device (numeric ID or substring)')
parser.add_argument(
'-c', '--channels', type=int, default=2,
help='number of channels')
parser.add_argument('--dtype', help='audio data type')
parser.add_argument('--samplerate', type=float, help='sampling rate')
parser.add_argument('--blocksize', type=int, help='block size')
parser.add_argument('--latency', type=float, help='latency in seconds')
args = parser.parse_args(remaining)
class THAGrid(GridLayout):
def __init__(self,**kwargs):
super(THAGrid, self).__init__()
self.cols = 2
self.add_widget(Label(text="Press To Start"))
self.press = Button(text = "Press to Hear")
self.press.bind(on_press=self.hear)
self.add_widget(self.press)
def hear(self, instance, ):
def callback(indata, outdata, frames, time, status):
if status:
print(status)
outdata[:] = indata
try:
with sd.Stream(device=(args.input_device, args.output_device),
samplerate=args.samplerate, blocksize=args.blocksize,
dtype=args.dtype, latency=args.latency,
channels=args.channels, callback=callback):
print('#' * 80)
print('press Return to quit')
print('#' * 80)
input()
except KeyboardInterrupt:
parser.exit('')
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
class TheHealingTech(MDApp):
def build(self):
return THAGrid()
if __name__ == "__main__":
TheHealingTech().run()
Try using kivy 2.0.0 and kivymd==0.104.2. Install them in plugins trough settings in pycharm. And your buildozer.spec should be like this:
requirements = python3,kivy==2.0.0,kivymd==0.104.2,pillow,sdl2_ttf==2.0.15,argparse,numpy
my python file would read two files from command line using argparse.
I want to use 'try...except...' to handle incorrect user input, like non-exist file and incorrect command:
for example, I use '-f' to open file, then '-g' is incorrect command.
my codes are:
> parser = argparse.ArgumentParser(description = "Handle with input
files")
parser.add_argument('-m', dest = "model", type=argparse.FileType('r'))
parser.add_argument('-t', dest = "test", type=argparse.FileType('r'))
parser.add_argument('-d', dest = "permute",type=argparse.FileType('r'))
args = parser.parse_args()
# Parse file names from command line
model_file = open(args.model.name)
test_file = open(args.test.name)
decoy_file = open(args.permute.name)
I try to implement 'try...except' in my code but failed. Here is what I did:
> parser = argparse.ArgumentParser(description = "Handle with input
files")
parser.add_argument('-m', dest = "model", type=argparse.FileType('r'))
parser.add_argument('-t', dest = "test", type=argparse.FileType('r'))
parser.add_argument('-d', dest = "permute",type=argparse.FileType('r'))
args = parser.parse_args()
# Parse file names from command line
try:
model_file = open(args.model.name)
test_file = open(args.test.name)
decoy_file = open(args.permute.name)
except IOError:
print('Wrong file name')
I would suggest adding something like this to make sure all arguments are there. Also argparse.FileType('r') will already open the file (meaning you can do something like args.model.readlines()).
if not all([args.model, args.test, args.permute]):
print("All Arguments are required")
exit(1)
Firstly the error happend at args = parser.parse_args() when it parse your command param.
As we can see from argparse's origin code:
...
def parse_args(self, args=None, namespace=None):
args, argv = self.parse_known_args(args, namespace)
if argv:
msg = _('unrecognized arguments: %s')
self.error(msg % ' '.join(argv))
return args
...
def error(self, message):
"""error(message: string)
Prints a usage message incorporating the message to stderr and
exits.
If you override this in a subclass, it should not return -- it
should either exit or raise an exception.
"""
self.print_usage(_sys.stderr)
args = {'prog': self.prog, 'message': message}
self.exit(2, _('%(prog)s: error: %(message)s\n') % args)
# exit here ,that's why can not catch the exception
So when you input incorrectly, like what you mentioned above, like non-exist file or incorrect command , it will call error function in the end, and exit the thread, that's why you cannot catch a exception by try .. except synax.
If you want to catch the exception, you can inherit the main class ArgumentParser, and rewrite the method error. you can take my codes as reference:
import argparse
import sys as _sys
from argparse import ArgumentParser
class ArgumentParserSub(ArgumentParser):
def error(self, message):
self.print_usage(_sys.stderr)
args = {'prog': self.prog, 'message': message}
raise Exception(('error: %(message)s\n') % args)
parser = ArgumentParserSub(description = "Handle with input files")
parser.add_argument('-m', dest = "model", type=argparse.FileType('r'))
parser.add_argument('-t', dest = "test", type=argparse.FileType('r'))
parser.add_argument('-d', dest = "permute",type=argparse.FileType('r'))
try:
args = parser.parse_args()
# Parse file names from command line
model_file = open(args.model.name)
test_file = open(args.test.name)
decoy_file = open(args.permute.name)
except Exception as e:
print(e)
print('Wrong file name')
If you don't want the parser to catch and exit bad files, open the files yourself. Just ask the parser for names, not open files.
parser = argparse.ArgumentParser(description = "Handle with input files")
parser.add_argument('-m', dest = "model") # no type
parser.add_argument('-t', dest = "test")
parser.add_argument('-d', dest = "permute")
args = parser.parse_args()
# print(args) # good idea when debugging
# Open file names from command line
try:
model_file = open(args.model)
test_file = open(args.test)
decoy_file = open(args.permute)
except IOError:
print('Wrong file name')
So, I came across the getopt module to parse command line args, although I can't make any sense of the docs. For whatever reason, I cannot figure out why this isn't seeing my --domain example.com argument..
$ ./httpdsetup.py -a -u zack --domain example.com
[('-a', ''), ('-u', '')]
I printed out what gets dumped into opts to see what it saw. The code below is just about an exact duplicate from the documentation site.
def main(argv):
import getopt
try:
opts, args = getopt.getopt(argv, "h:al:ud:v", ["user=", "apache", "lighttpd", "dir=", "domain=", "vhost="])
except getopt.GetoptError:
print_usage()
sys.exit(2)
username = ''
directory = ''
domain = ''
httpd = 'apache'
print(opts)
for opt, arg in opts:
if opt == '-h':
print_usage()
sys.exit()
elif opt in ('-u', '--username'):
username = arg
elif opt in ('-d', '--dir'):
directory = arg
elif opt in ('-v', '--domain', '--vhost'):
domain = arg
elif opt in ('-a', '--apache'):
httpd = 'apache'
elif opt in ('-l', '--lighttpd'):
httpd = 'lighttpd'
else:
print_usage()
sys.exit()
if httpd == 'apache':
create_apache_vhost(domain, directory, username)
elif httpd == 'lighttpd':
create_lighty_vhost(domain, directory, username)
if __name__ == '__main__':
main(sys.argv[1:])
I prefer argparse. Python documention here.
It's in Python >=2.7 and >=3.2, but not in Python 3.0 and 3.1. If it's missing in your install, just copy the single file from here to where your script is, or into your Python install.
Here's something close to your example with argparse:
#!/usr/bin/env python3
import sys
def create_apache_vhost(*args, **kwargs):
pass
def create_lighty_vhost(*args, **kwargs):
pass
def main(argv):
import argparse
parser = argparse.ArgumentParser(description="Some server",
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--username', type=str)
parser.add_argument('-u', dest='username', type=str)
parser.add_argument('--apache', dest='httpd', action='store_const', const='apache')
parser.add_argument('-a', dest='httpd', action='store_const', const='apache')
parser.add_argument('--lighthttpd', dest='httpd', action='store_const', const='lighthttpd')
parser.add_argument('-l', dest='httpd', action='store_const', const='lighthttpd')
parser.add_argument('--domain', type=str)
parser.add_argument('--vhost', type=str)
parser.add_argument('-v', dest='domain', type=str)
parser.add_argument('--dir', dest='directory', type=str)
parser.add_argument('-d', dest='directory', type=str)
defaults = {
'httpd': 'apache',
}
parser.set_defaults(**defaults)
args = parser.parse_args(args=argv)
print(args)
if args.httpd == 'apache':
create_apache_vhost(args.domain, args.directory, args.username)
elif args.httpd == 'lighttpd':
create_lighty_vhost(args.domain, args.directory, args.username)
if __name__ == '__main__':
main(sys.argv[1:])