I am having some issues passing an argument in a python script to take a specific file like a csv, txt, or xml
I am reviewing python and would like some feedback on why I don't see any output after running the following command: ./my_script some3455.csv
#!/usr/bin/python
import sys
import csv
import xml.etree.ElementTree as ET
FILE = str(sys.argv[1])
def run_files():
if FILE == '*.csv'
run_csv()
elif FILE == '*.txt'
run_txt()
else
run_xml()
def run_csv():
csv_file = csv.register_dialect('dialect', delimiter = '|')
with open(FILE, 'r') as file:
reader = csv.reader(file, dialect='dialect')
for row in reader:
print(row)
def run_txt():
with open(FILE, 'r') as file:
txt_contents = file.read()
print(txt_contents)
def run_xml():
tree = ET.parse(FILE)
root = tree.getroot()
for child in root.findall('Attributes')
car = child.find('Car').text
color = child.find('Color').text
print(car, color)
I have tried to pass it as without the FILE but works just for one and the other file types doesn't get identify.
You need to use fnmatch and not == to compare a string with a glob pattern:
import fnmatch
def run_files():
if fnmatch.fnmatch(FILE, '*.csv'):
run_csv()
elif fnmatch.fnmatch(FILE, '*.txt'):
run_txt()
else:
run_xml()
Related
I created this code to get all excel files in a folder and make a csv file to every sheet in every file. This script works fine, but sometimes the last Excel file converted still locked by python on file system. Can anyone help me to understand what's happening?
import sys
from os import listdir
from os.path import isfile, join
import pandas as pd
import csv
import re
def removeEspecialCharacters(obj):
if isinstance(obj, str) :
retorno = re.sub('[(\x90|\x8F)]','',obj).replace("\r","").replace("\n","")
else:
retorno = obj
return retorno
myFolder = r'C:\Users\myuser\Downloads\ConvertFilesToCsv'
myFiles = [f for f in listdir(myFolder) if isfile(join(myFolder, f))]
for x in range(len(myFiles)):
if (myFiles[x].lower().endswith('.xls') or myFiles[x].lower().endswith('.xlsx') or myFiles[x].lower().endswith('.xlsb')):
print('Converting file: '+myFiles[x]);
if (myFiles[x].lower().endswith('.xlsb')):
file = pd.ExcelFile(myFolder+'\\'+myFiles[x], engine='pyxlsb')
else:
file = pd.ExcelFile(myFolder+'\\'+myFiles[x])
for mySheetName in file.sheet_names:
df = pd.read_excel(file, sheet_name=mySheetName)
df = df.applymap(removeEspecialCharacters)
csvFileName = myFolder+'\\'+myFiles[x].replace('.xlsx','').replace('.xlsb','').replace('.xls','')+'_'+mySheetName+'.csv'
df.to_csv(csvFileName,encoding='utf-8-sig',index=False,sep=",",quoting=csv.QUOTE_NONNUMERIC,quotechar="\"",escapechar="\"",decimal=".",date_format='%Y-%m-%d')#,quotechar='\'', escapechar='\\')
file.close()
file = ''
Note: this is a comment putting here for code format.
Your code looks fine to me. I would advise you to use context management, similar to the doc, like this:
for filename in myFiles:
extension = filename.split('.')[-1]
# you didn't seem to check xlsb in your code
if extension not in ['xls', 'xlsx', 'xlsb']:
continue
kwargs = {'engine': 'pyxlsb'} if extension=='xlsb' else {}
with pd.ExcelFile(myFolder + '\\' + filename, **kwargs) as file:
# do other stuff with file
...
# you don't need to close file here
# file.close()
import glob
import os
import csv
import zipfile
from io import StringIO
for name in glob.glob('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'):
base = os.path.basename(name)
filename = os.path.splitext(base)[0]
datadirectory = 'C:/Users/RAMESH SANTHA/Downloads/'
dataFile = filename
archive = '.'.join([dataFile, 'zip'])
fullpath = ''.join([datadirectory, archive])
csv_file = '.'.join([dataFile, 'csv']) #all fixed
filehandle = open(fullpath, 'rb')
zfile = zipfile.ZipFile(filehandle)
data = StringIO.StringIO(zfile.read(csv_file))
reader = csv.reader(data)
for row in reader:
print (row)
I tried following code to read data from zip folder which contains csv file and print rows but got error:
data = StringIO.StringIO(zfile.read(csv_file))
AttributeError: type object '_io.StringIO' has no attribute 'StringIO'
There is no StringIO.StringIO() but io.StringIO()
import io
data = io.StringIO(...)
With your import it will be even without io.
from io import StringIO
data = StringIO(...)
BTW: I think you overcomplicated code using glob and join(). And you can use filename directly with ZipFile without open()
import os
import csv
import zipfile
import io
zip_fullname = 'C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'
zip_file = os.path.basename(zip_fullname)
csv_file = zip_file.replace('.zip', '.csv')
print(zip_file) # download-NIFTY 50-01012020.zip
print(csv_file) # download-NIFTY 50-01012020.csv
zfile = zipfile.ZipFile(zip_fullname)
data = io.StringIO(zfile.read(csv_file).decode('utf-8')) # bytes needs to be converted to string
reader = csv.reader(data)
for row in reader:
print(row)
But with pandas it should be even simpler
import pandas as pd
df = pd.read_csv('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip')
print(df)
Looking at the script you getting error opening the csv file from zip file. Below is python 3 code that I have working for a zip file having few csv's. The directory to extract should exist before you run the script
import zipfile
path_to_zip_file='/tmp/test1.zip' # Assuming this file exist , This path is from mac, but should work for windows as well'
directory_to_extract_to='/tmp/extract/' # Assuming this directory already exist
import csv,os
import codecs
import glob
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)
for file in glob.glob(directory_to_extract_to+'*.csv'):
path = os.path.join(directory_to_extract_to,file)
with open(path, 'rb') as f:
reader = csv.reader(codecs.iterdecode(f, 'utf-8'))
# Below code is print them as arrays
# for row in reader:
# print(row)
# Reading rows as ordered dictionary
dictReader = csv.DictReader(codecs.iterdecode(f, 'utf-8'))
for row in dictReader:
print(row)
Here is my code,
import os, os.path
import collections
import sys
import re
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
data = []
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
input_file = 'axcfgpasww-from-server.dat'
element = sys.argv[1]
output_value = sys.argv[2]
with open(input_file) as infile, open('axcfgpasww-modified.dat', "w") as outfile:
if element in open(input_file).read():
regex = re.findall("\s*([\S\s]+)", element)
outfile.write(regex[0])
print(regex[0])
else:
print('No match found')
The input_file :
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=2590552
I execute my script this way : modify_file.py NUM_COMM:nNN0.7 Hello world !
So if NUM_COMM:nNN0.7 exists in the file, it writes "NUM_COMM:nNN0.7" in a new axcfgpasww-modified.dat file.
But what I want to do, is execute my command as written above. And the result is the input file, with only the new value.
So my output file would be :
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=Hello world !
Can anyone help me on this ?
Thanks !
I have made some refactoring to your original code, and made it produce the output you seek,
import os, os.path
import collections
import sys
import re
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
data = []
found = False
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
input_file = 'axcfgpasww-from-server.dat'
element = sys.argv[1]
output_value = sys.argv[2]
with open(input_file) as infile:
for line in infile.readlines():
if element in line:
old_value = line.split("=")[1]
data.append(line.replace(old_value, output_value))
found = True
else:
data.append(line)
if not found:
print('No match found')
with open(input_file, 'w') as outfile:
for line in data:
outfile.write(line)
output:
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=Hello World!
Hope this helps
I am new to Python and am starting some online courses. I am trying to convert some data from a paragraph format to CSV format (shown below.) I am able to import a text file containing the paragraph format and export that to CSV but each line in the paragraph format comes in as a single line when imported into a spreadsheet.
import csv
import glob
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
txt_files = os.path.join(directory, '*.txt')
for txt_file in glob.glob(txt_files):
with open(txt_file, "rb") as input_file:
in_txt = csv.reader(input_file, delimiter='=')
filename = os.path.splitext(os.path.basename(txt_file))[0] + '.csv'
with open(os.path.join(output, filename), 'wb') as output_file:
out_csv = csv.writer(output_file)
out_csv.writerows(in_txt)
I do not know how to parse the data to separate the labels and spaces from the numeric values and combine each paragraph section into a single line with quotes and commas for the CSV file. Any help would be greatly appreciated!
Paragraph format:
12-03-06 15:19:36
FLOW: 1.17365 g/m
POS: +9273x1Gal
12-03-06 15:19:37
FLOW: 1.17849 g/m
POS: +9283x1Gal
12-03-06 15:19:38
FLOW: 1.19849 g/m
POS: +9293x1Gal
(repeats)
Desired CSV output (note, I had to add a single quote before the + to allow proper import as text into a spreadsheet, otherwise it comes in as a 0)
"12-03-06 15:19:36","FLOW:","1.17365","g/m","POS:","'+","9273","x1","Gal"
"12-03-06 15:19:37","FLOW:","1.17849","g/m","POS:","'+","9283","x1","Gal"
"12-03-06 15:19:38","FLOW:","1.19849","g/m","POS:","'+","9293","x1","Gal"
I suggest using a collections.deque to work on three lines at a time, and re.match to parse out the items you want:
# -*- coding: utf-8 -*-
from collections import deque
import csv
from functools import partial
import glob
import os
import re
import sys
if sys.hexversion < 0x3000000:
# Python 2.x
inp = raw_input
open_csv_write = partial(open, mode="wb")
else:
# Python 3.x
inp = input
open_csv_write = partial(open, mode="w", newline="")
POS_REG = re.compile("(POS:) ([+-])(\d+(?:\.\d+)?)(x\d+)(\w+)", re.I)
def change_ext(fn, new_ext):
"""
Given `fn` as "path\filename.old_ext",
return "path\filename" + new_ext
"""
return os.path.splitext(fn)[0] + new_ext
def get_pos(line, reg=POS_REG):
"""
Given a string like "POS: +92.73x1Gal",
return ['POS:', '+', '92.73', 'x1', 'Gal']
"""
match = reg.match(line)
return list(match.groups()) if match else []
def process(inf, outcsv):
# line queue
q = deque(maxlen=3)
# preload two lines
q.append(next(inf, '').rstrip())
q.append(next(inf, '').rstrip())
# process rest of lines
for line in inf:
q.append(line.rstrip())
if q[1].startswith('FLOW:'):
pos = get_pos(line)
if pos:
row = [q[0]] + q[1].split() + pos
outcsv.writerow(row)
def main():
# get directories
in_dir = inp("Input directory: ")
out_dir = inp("Output directory: ")
# process file names
in_filespec = os.path.join(in_dir, '*.txt')
in_full_names = glob.glob(in_filespec)
in_names = [os.path.basename(fn) for fn in in_full_names]
out_names = [change_ext(fn, ".csv") for fn in in_names]
out_full_names = [os.path.join(out_dir, fn) for fn in out_names]
# operate on files
for in_name, out_name in zip(in_full_names, out_full_names):
with open(in_name) as inf, open_csv_write(out_name) as outf:
outcsv = csv.writer(outf)
process(inf, outcsv)
if __name__ == "__main__":
main()
This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...