adding language to markdown codeblock in bulk - python

My Problem is to add to every single block of code a language in my markdown files.
I've hundreds of files in nested directories.
The files have this form:
```language
a
```
Normal text
```
b
```
Normal text
```
c
```
Normal text
```language
d
```
and the output for each of these shoud be:
```ios
a
```
Normal text
```ios
b
```
Normal text
```ios
c
```
Normal text
```ios
d
```
(In this case I needed ios lang from a custom lexer I made)
I'm using debian 11 and trying with sed and I found that this regex
(```).*(\n.*)((\n.*)*?)\n```
could help find the blocks but can't find how to use it.
I can use python for more complex regex and behaviour.

My Solution
WARNING!! If you have impared triple-backtick, this code will have unwanted results! always backup your files before!
bash find all files with absolute path (for some reason I don't like relative paths, and my laziness told me not to write a recursive python search :D)
-exec python script with 2 arguments (filename and a second parameter to append a string to original file and keep it, having new one with original filename)
The regex inside the python script I came up with to "add" (I actually replace the whole..) the "ios" text for code block is:
(```).*(\n.*)((\n.*)*?)\n```
replace with
\1ios\2\3\n```
I really couldn't transform this for sed
import re
import sys, getopt
from shutil import move
def main(argv):
inputfile = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hi:a:",["ifile=","afile="])
except getopt.GetoptError:
print ('pyre.py -i <inputfile> -a <append_string>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print ('pyre.py -i <inputfile> -a <append_string>')
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
elif opt in ("-a", "--afile"):
outputfile = inputfile + arg
magic(inputfile, outputfile)
def magic(inputfile, outputfile):
regex = r"(```).*(\n.*)((\n.*)*?)\n```"
subst = r"\1ios\2\3\n```"
move(inputfile, outputfile)
open(inputfile, 'w', encoding="utf-8").write(re.sub(regex, subst, open(outputfile, 'r', encoding="utf-8").read(), 0, re.MULTILINE))
#print(f"{inputfile} DONE")
if __name__ == "__main__":
main(sys.argv[1:])
and the actully find
find ~+ -name '*.md' -exec python pyre.py -i \{\} -a .new.md \;
Hope this will help someone with my same issue.

Related

Using subprocess.run python module with different languages

i'm building tester for programs in different languages, but I'm not able to get C program working, currently the command is called like this:
codeResult = subprocess.run(self.createRunCommand(currLanguage, file),
input = codeToTest,
shell = True,
timeout = TIMEOUT,
capture_output=True)
and createRunCommand() returns:
def createRunCommand(self, language, file):
if language == '.py':
command = f'python {file}'
elif language == '.c':
if not os.path.exists(f'C:/<myPath>/{file}.out'):
command = f'gcc -std=c11 {file} -o C:/<myPath>/{file}.out \
./C:/<myPath>/{file}.out'
else:
command = f'./C:/<myPath>/{file}.out'
elif language == '.java':
command = f''
elif language == '.cpp':
command = f''
return command
the input and test itself is good, as it runs correctly with a python program, but I cannot figure out how to setup C (and probably other compiled first languages).
You'll need multiple command invocations for (e.g.) C/C++, so have your createRunCommand return multiple.
I also changed things up here to
automatically figure out the language from the extension of the filename
use a list of arguments instead of a string; it's safer
use sys.executable for the current Python interpreter, and shutil.which("gcc") to find gcc.
import os
import shlex
import shutil
import subprocess
import sys
def get_commands(file):
"""
Get commands to (compile and) execute `file`, as a list of subprocess arguments.
"""
ext = os.path.splitext(file)[1].lower()
if ext == ".py":
return [(sys.executable, file)]
if ext in (".c", ".cpp"):
exe_file = f"{file}.exe"
return [
(shutil.which("gcc"), "-std=c11", file, "-o", exe_file),
(exe_file,),
]
raise ValueError(f"Unsupported file type: {ext}")
filename = "foo.py"
for command in get_commands(filename):
print(f"Running: {shlex.join(command)}")
code_result = subprocess.run(command, capture_output=True)

How to print the first N lines of a file in python with N as argument

How would I go about getting the first N lines of a text file in python? With N have to give as argument
usage:
python file.py datafile -N 10
My code
import sys
from itertools import islice
args = sys.argv
print (args)
if args[1] == '-h':
print ("-N for printing the number of lines: python file.py datafile -N 10")
if args[-2] == '-N':
datafile = args[1]
number = int(args[-1])
with open(datafile) as myfile:
head = list(islice(myfile, number))
head = [item.strip() for item in head]
print (head)
print ('\n'.join(head))
I wrote the program, can let me know better than this code
Assuming that the print_head logic you've implemented need not be altered, here's the script I think you're looking for:
import sys
from itertools import islice
def print_head(file, n):
if not file or not n:
return
with open(file) as myfile:
head = [item.strip() for item in islice(myfile, n)]
print(head)
def parse_args():
result = {'script': sys.argv[0]}
args = iter(sys.argv)
for arg in args:
if arg == '-F':
result['filename'] = next(args)
if arg == '-N':
result['num_lines'] = int(next(args))
return result
if __name__ == '__main__':
script_args = parse_args()
print_head(script_args.get('filename', ''), script_args.get('num_lines', 0))
Running the script
python file.py -F datafile -N 10
Note: The best way to implement it would be to use argparse library
You can access argument passed to the script through sys
sys.argv
The list of command line arguments passed to a Python script. argv[0] is the script name (it is operating system dependent whether this is a full pathname or not). If the command was executed using the -c command line option to the interpreter, argv[0] is set to the string '-c'. If no script name was passed to the Python interpreter, argv[0] is the empty string.
So in code it would look like this:
import sys
print("All of argv")
print(sys.argv)
print("Last element every time")
print(sys.argv[-1])
Reading the documentation you'll see that the first values stored in the sys.argv vary according to how the user calls the script. If you print the code I pasted with different types of calls you can see for yourself the kind of values stored.
For a basic first approach: access n through sys.argv[-1] which returns the last element every time, assuming. You still have to do a try and beg for forgiveness to make sure the argument passed is a number. For that you would have:
import sys
try:
n = int(sys.argv[-1])
except ValueError as v_e:
print(f"Please pass a valid number as argument, not ${sys.argv[-1]}")
That's pretty much it. Obviously, it's quite basic, you can improve this even more by having the users pass values with flags, like --skip-lines 10 and that would be your n, and it could be in any place when executing the script. I'd create a function in charge of translating sys.argv into a key,value dictionary for easy access within the script.
Arguments are available via the sys package.
Example 1: ./file.py datafile 10
#!/usr/bin/env python3
import sys
myfile = sys.argv[1]
N = int(sys.argv[2])
with open("datafile") as myfile:
head = myfile.readlines()[0:args.N]
print(head)
Example 2: ./file.py datafile --N 10
If you want to pass multiple optional arguments you should have a look at the argparse package.
#!/usr/bin/env python3
import argparse
parser = argparse.ArgumentParser(description='Read head of file.')
parser.add_argument('file', help='Textfile to read')
parser.add_argument('--N', type=int, default=10, help='Number of lines to read')
args = parser.parse_args()
with open(args.file) as myfile:
head = myfile.readlines()[0:args.N]
print(head)

getopt not quite working, what am I doing wrong?

I am not sure why the code below does not work - I get the error
NameError: name 'group1' is not defined.
The code worked fine before I tried to use getopt.. I am trying to parse the command line input so that eg if I put
python -q file1 file2 -r file3 file4
the file1 and file2 become the input into my first loop as 'group1'.
import sys
import csv
import vcf
import getopt
#set up the args
try:
opts, args = getopt.getopt(sys.argv[1:], 'q:r:h', ['query', 'reference', 'help'])
except getopt.GetoptError as err:
print str(err)
sys.exit(2)
for opt, arg in opts:
if opt in ('-h', '--help'):
print "Usage python -q [query files] -r [reference files]"
print "-h this help message"
elif opt in ('-q', '--query'):
group1 = arg
elif opt in ('-r', '--reference'):
group2 = arg
else:
print"check your args"
#extract core snps from query file, saving these to the set universal_snps
snps = []
outfile = sys.argv[1]
for variants in group1:
vcf_reader = vcf.Reader(open(variants))
The problem is that group1 = arg is never running, so when it later gets to for variants in group1:, the variable is not defined.
This is because you are calling the function incorrectly for how you defined your options. When you have the line:
opts, args = getopt.getopt(sys.argv[1:], 'q:r:h', ['query', 'reference', 'help'])
There is a requirement that the arguments with flags (i.e. -q file1 and -r file3 be specified before any other arguments. Therefore, if you were to call the function as:
python <scriptName> -q file1 -r file3 file2 file4
You would have the intended behaviour. This is because all the parameters without an associated flag appear at the end of the call (and would be retrievable through the args parameter

Parsing cmd args like typical filter programs

I spent few hours reading tutorials about argparse and managed to learn to use normal parameters. The official documentation is not very readable to me. I'm new to Python. I'm trying to write a program that could be invoked in following ways:
cat inFile | program [options] > outFile -- If no inFile or outfile is specified, read from stdin and output to stdout.
program [options] inFile outFile
program [options] inFile > outFile -- If only one file is specified it is input and output should go to stdout.
cat inFile | program [options] - outFile -- If '-' is given in place of inFlie read from stdin.
program [options] /path/to/folder outFile -- Process all files from /path/to/folder and it subdirectories.
I want it to behave like regular cli program under GNU/Linux.
It would be also nice if the program would be able to be invoked:
program [options] inFile0 inFile1 ... inFileN outFile -- first path/file always interpreted as input, last one always interpreted as output. Any additional ones interpreted as inputs.
I could probably write dirty code that would accomplish this but this is going to be used, so someone will end up maintaining it (and he will know where I live...).
Any help/suggestions are much appreciated.
Combining answers and some more knowledge from the Internet I've managed to write this(it does not accept multiple inputs but this is enough):
import sys, argparse, os.path, glob
def inputFile(path):
if path == "-":
return [sys.stdin]
elif os.path.exists(path):
if os.path.isfile(path):
return [path]
else:
return [y for x in os.walk(path) for y in glob.glob(os.path.join(x[0], '*.dat'))]
else:
exit(2)
def main(argv):
cmdArgsParser = argparse.ArgumentParser()
cmdArgsParser.add_argument('inFile', nargs='?', default='-', type=inputFile)
cmdArgsParser.add_argument('outFile', nargs='?', default='-', type=argparse.FileType('w'))
cmdArgs = cmdArgsParser.parse_args()
print cmdArgs.inFile
print cmdArgs.outFile
if __name__ == "__main__":
main(sys.argv[1:])
Thank you!
You need a positional argument (name not starting with a dash), optional arguments (nargs='?'), a default argument (default='-'). Additionally, argparse.FileType is a convenience factory to return sys.stdin or sys.stdout if - is passed (depending on the mode).
All together:
#!/usr/bin/env python
import argparse
# default argument is sys.argv[0]
parser = argparse.ArgumentParser('foo')
parser.add_argument('in_file', nargs='?', default='-', type=argparse.FileType('r'))
parser.add_argument('out_file', nargs='?', default='-', type=argparse.FileType('w'))
def main():
# default argument is is sys.argv[1:]
args = parser.parse_args(['bar', 'baz'])
print(args)
args = parser.parse_args(['bar', '-'])
print(args)
args = parser.parse_args(['bar'])
print(args)
args = parser.parse_args(['-', 'baz'])
print(args)
args = parser.parse_args(['-', '-'])
print(args)
args = parser.parse_args(['-'])
print(args)
args = parser.parse_args([])
print(args)
if __name__ == '__main__':
main()
I'll give you a start script to play with. It uses optionals rather than positionals. and only one input file. But it should give a taste of what you can do.
import argparse
parser = argparse.ArgumentParser()
inarg = parser.add_argument('-i','--infile', type=argparse.FileType('r'), default='-')
outarg = parser.add_argument('-o','--outfile', type=argparse.FileType('w'), default='-')
args = parser.parse_args()
print(args)
cnt = 0
for line in args.infile:
print(cnt, line)
args.outfile.write(line)
cnt += 1
When called without arguments, it just echos your input (after ^D). I'm a little bothered that it doesn't exit until I issue another ^D.
FileType is convenient, but has the major fault - it opens the files, but you have to close them yourself, or let Python do so when exiting. There's also the complication that you don't want to close stdin/out.
The best argparse questions include a basic script, and specific questions on how to correct or improve it. Your specs are reasonably clear. but it would be nice if you gave us more to work with.
To handle the subdirectories option, I would skip the FileType bit. Use argparse to get 2 lists of strings (or a list and an name), and then do the necessary chgdir and or glob to find and iterate over files. Don't expect argparse to do the actual work. Use it to parse the commandline strings. Here a sketch of such a script, leaving most details for you to fill in.
import argparse
import os
import sys # of stdin/out
....
def open_output(outfile):
# function to open a file for writing
# should handle '-'
# return a file object
def glob_dir(adir):
# function to glob a dir
# return a list of files ready to open
def open_forread(afilename):
# function to open file for reading
# be sensitive to '-'
def walkdirs(alist):
outlist = []
for name in alist:
if <name is file>;
outlist.append(name)
else <name is a dir>:
glist = glob(dir)
outlist.extend(glist)
else:
<error>
return outlist
def cat(infile, outfile):
<do your thing here>
def main(args):
# handle args options
filelist = walkdirs(args.inlist)
fout = open_outdir(args.outfile)
for name in filelist:
fin = open_forread(name)
cat(fin,fout)
if <fin not stdin>: fin.close()
if <fout not stdout>: fout.close()
if '__name__' == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('inlist', nargs='*')
parser.add_argument('outfile')
# add options
args = parser.parse_args()
main(args)
The parser here requires you to give it an outfile name, even if it is '-'. I could define its nargs='?' to make it optional. But that does not play nicely with the 'inlist` '*'.
Consider
myprog one two three
Is that
namespace(inlist=['one','two','three'], outfile=default)
or
namespace(inlist=['one','two'], outfile='three')
With both a * and ? positional, the identity of the last string is ambiguous - is it the last entry for inlist, or the optional entry for outfile? argparse chooses the former, and never assigns the value to outfile.
With --infile, --outfile definitions, the allocation of these strings is clear.
In sense this problem is too complex for argparse - there's nothing in it to handle things like directories. In another sense it is too simple. You could just as easily split sys.argv[1:] between inlist and outfile without the help of argparse.

Python command line arg with getopt does not work

I modified the sample code given here:
sample code for getopt
as follows, but it does not work. I am not sure what I am missing. I added a "-j" option to this existing code. Eventually, I want to add as many as required command option to meet my needs.
When I give input as below, it does not print anything.
./pyopts.py -i dfdf -j qwqwqw -o ddfdf
Input file is "
J file is "
Output file is "
Can you please let me know whats wrong here?
#!/usr/bin/python
import sys, getopt
def usage():
print 'test.py -i <inputfile> -j <jfile> -o <outputfile>'
def main(argv):
inputfile = ''
jfile = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hij:o:",["ifile=","jfile=","ofile="])
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
usage()
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
elif opt in ("-j", "--jfile"):
jfile = arg
elif opt in ("-o", "--ofile"):
outputfile = arg
print 'Input file is "', inputfile
print 'J file is "', jfile
print 'Output file is "', outputfile
if __name__ == "__main__":
main(sys.argv[1:])
Your error is omitting a colon following the i option. As stated by the link you supplied:
options that require an argument should be followed by a colon (:).
Therefore, the corrected version of your program should contain the following:
try:
opts, args = getopt.getopt(argv,"hi:j:o:",["ifile=","jfile=","ofile="])
except getopt.GetoptError:
usage()
sys.exit(2)
Executing it with the specified arguments derives the expected output:
~/tmp/so$ ./pyopts.py -i dfdf -j qwqwqw -o ddfdf
Input file is " dfdf
J file is " qwqwqw
Output file is " ddfdf
However, as a comment to your question specifies, you should use argparse rather than getopt:
Note: The getopt module is a parser for command line options whose API is designed to be familiar to users of the C getopt() function. Users who are unfamiliar with the C getopt() function or who would like to write less code and get better help and error messages should consider using the argparse module instead.

Categories