skip code if the file already exists in output directory (outputfile) - python

check output directory(outputfile) before running the code to check the file already in there!
import os
texttofind ='abcd'
texttoreplace ='wxyz'
sourcepath = os.listdir('inputfiles/')
for file in sourcepath:
inputfile = 'inputfiles/'+ file
print('conversion is ongoing for:' +inputfile)
with open(inputfile,'r') as inputfile:
filedata = inputfile.read()
freq = 0
freq = filedata.count(texttofind)
destinationpath = 'outputfile/' + file
filedata = filedata.replace(texttofind,texttoreplace)
with open(destinationpath,'w') as file:
file.write(filedata)
print ('total %d Record replaced %freq')

Something like this?
import os
texttofind ='abcd'
texttoreplace ='wxyz'
sourcepath = os.listdir('inputfiles/')
for file in sourcepath:
destinationpath = 'outputfile/' + file
if not os.path.isfile(destinationpath):
inputfile = 'inputfiles/'+ file
print('conversion is ongoing for:' +inputfile)
with open(inputfile,'r') as inputfile:
filedata = inputfile.read()
freq = 0
freq = filedata.count(texttofind)
filedata = filedata.replace(texttofind,texttoreplace)
with open(destinationpath,'w') as file:
file.write(filedata)
print ('total %d Record replaced %freq')

Related

How can I execute 1TB log file python script in command line

I have a log file which consists the capacity of 1TB. I am uncertain that how to run this python script in the command line. I use the sys library but still my csv data is not added.
Below is my python code.
import re
import sys
from csv import writer
import datetime
log_file = '/Users/kiya/Desktop/mysql/ipscan/ip.txt'
output_file = '/Users/kiya/Desktop/mysql/ipscan/output.csv'
try:
ip_file =sys.argv[1]
except Exception:
print("usage: pythone3 {} [ip file]".format(sys.argv[0]))
sys.exit()
name_to_check = 'MBX_AUTHENTICATION_FAILED'
with open(log_file,encoding="utf-8") as infile:
for line in infile:
if name_to_check in line:
username = re.search(r'(?<=userName=)(.*)(?=,)', line)
username = username.group()
ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
ip = ip.group()
with open(output_file, 'a') as outfile:
outfile.write('{username},{ip}\n'.format(username=username, ip=ip))
try this it work well and if the problem persist check your search regex :
from sys import argv
log_file = ""
if len(argv) > 0 :
log_file = argv[1]
else :
quit("No log_file specified, exiting script.")
with open(log_file, encoding="utf-8") as infile:
for line in infile:
if name_to_check in line:
username = re.search(r'(?<=userName=)(.*)(?=,)', line)
username = username.group()
date = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\+(?P<zone>\d{4})', line)
date = datetime.datetime.strptime(date.group('date'), "%Y%m%d").strftime("%Y-%m-%d")
print(date)
time = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\+(?P<zone>\d{4})', line)
time = datetime.datetime.strptime(time.group('time'), "%H%M%S%f").strftime("%H:%M:%S")
print(time)
ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
with open(output_file, "ab", buffering=0) as outfile:
outfile.write( ("{},{},{},{}\n".format(username, date, time, ip)).encode() )

Running script on every text file in folder

I'm looking to run my script on all text files in a directory, but I'm having a lot of trouble.
Here is the code I have so far:
data = {}
date = ID = values = None
infile = "z140327b.txt"
outfile = "oz140327b.txt"
sample = 1
with open(infile) as datafile, open(outfile, 'w') as f2:
for line in datafile:
if line.lstrip().startswith('!'):
date = line[1:].strip()
elif line.lstrip().startswith('?'):
sample = 2
elif line.lstrip().startswith('#'):
ID = line[1:].strip()
data[ID] = {}
data[ID]['date'] = date
tedtime = ID[0:2] + ":" + ID[2:]
str_1 = str(data[ID])
f2.write(tedtime + ' ' + date + ',' + str(sample))
elif line.strip():
if not ID:
continue
try:
words = line.split()
value = float(words[-1]) # last word
unit = words[-2].lstrip('(').rstrip(')')
item = {'value': value, 'unit': unit}
key = ' '.join(words[:-2])
data[ID][key] = item
except (ValueError) as err:
print("Could not parse this line:")
print(line)
continue
else: # if 'empty' line
ca_str = str(data[ID]['Contact Angle']['value'])
f2.write(',' + ca_str + '\n')
ID = None
ca_str2 = str(data[ID]['Contact Angle']['value'])
f2.write(',' + ca_str2 + '\n')
At the minute, I'm manually adding the filename (infile) and the output filename (outfile). I would like the output file name to be the same as the input file, preceded by an 'o', as shown in the example code.
You can use glob to get all the files in the directory:
from glob import glob
files=glob('*.txt')
for filename in files:
with open(filename,'r') as f, open('o'+filename,'w') as f1:
....
#read from f
#write to f1
Simply iterate over each filename, do what you want to it, and write it to a new file.
Make sure your script is run from the directory you are in, or you need to pass the path to glob.
import glob
import os.path
def text_files(target_dir):
"""Return (infile, outfile) tuple for all *.txt files in target_dir."""
text_files = os.path.join(target_dir, '*.txt')
for fname in glob.glob(text_files):
outfile = 'o' + os.path.basename(fname)
outfile = os.path.join(target_dir, outfile)
yield fname, outfile
# Search for text files in /tmp
for inf, outf in text_files("/tmp"):
print inf, outf

How do i fix IOError: [Errno 2] No such file or directory error?

I have received an IOError: [Errno 2] No such file or directory error and I cant seem to figure out where the problem is. I have checked the directory and the file exists.
Also I am trying to run this program through many text files, so if you could check to see if there are any other problems that would be great!
import glob
import os
import shutil
import fileinput
import arcpy
from arcpy import env
env.workspace = "h:/Python scripts/Tests2"
list_of_files = glob.glob("h:/Python scripts/Tests2/*.txt")
root_path = 'h:/Python scripts/Tests2/'
for file_name in list_of_files:
input = open(file_name, 'r')
output = open(file_name.replace('.txt', 't2.csv'), "w")
for line in input:
str = line.strip(" dd/mm/yyyy hh:mm:ss kA\t")
str = str.replace("date", "ddmmyyyy,hhmmss")
str = str.replace(" lat. long. amp.", " lat,long,ka")
str = str.replace("id ", "id,")
str = str.replace(" ", ",")
str = str.replace(" ", ",")
output.write(str)
input.close()
output.close()
root_path2 = 'h:/Python scripts/Tests2/'
infile = arcpy.ListFiles("*t2.csv")
coordinatesys = 'H:\Python scripts\modeltests\NAD 1983.prj'
#infile = glob.glob("h:/Python scripts/Tests2/*scv.txt")
for file_name2 in infile:
print infile
print file_name2
out_name = file_name2.replace('t2.csv', 'p2.shp')
arcpy.CreateFeatureclass_management(root_path2, out_name, "Point", "", "DISABLED", "DISABLED", coordinatesys)
arcpy.AddField_management(out_name, "ddmmyyyy", "TEXT")
arcpy.AddField_management(out_name, "hhmmss", "TEXT")
arcpy.AddField_management(out_name, "lat", "LONG")
arcpy.AddField_management(out_name, "long", "LONG")
arcpy.AddField_management(out_name, "ka", "LONG")
print out_name
CsvTrack = open(file_name2, "r")
headerLine = CsvTrack.readline()
valueList = headerLine.strip().split(",")
print valueList
daysValueIndex = valueList.index("ddmmyyyy")
timeValueIndex = valueList.index("hhmmss")
latValueIndex = valueList.index("lat")
lonValueIndex = valueList.index("long")
kaValueIndex = valueList.index("ka")
cursor = arcpy.InsertCursor(out_name)
for Rows in CsvTrack.readlines():
segmentedPoint = Rows.split(",")
daysValue = segmentedPoint[daysValueIndex]
timeValue = segmentedPoint[timeValueIndex]
latValue = segmentedPoint[latValueIndex]
lonValue = segmentedPoint[lonValueIndex]
kaValue = segmentedPoint[kaValueIndex]
vertex = arcpy.CreateObject("Point")
vertex.X = lonValue
vertex.Y = latValue
feature = cursor.newRow()
feature.days = daysValue
feature.time = timeValue
feature.shape = vertex
feature.ka = kaValue
cursor.insertRow(feature)
del cursor`
Here is the error specifically:
Traceback (most recent call last):
File "C:\Python27\Lib\site-packages\pythonwin\pywin\framework\scriptutils.py", line 325, in RunScript
exec codeObject in __main__.__dict__
File "H:\Python scripts\Script1.py", line 45, in <module>
CsvTrack = open(file_name2, "r")
IOError: [Errno 2] No such file or directory: u'20060705t2.csv'
You need to prepend the path to each file when you create the file:
input = open(os.path.join(env.workspace, filename), 'r')

printing data from a json file to csv file

when i m printing data from my json file to csv file it is not getting printed in diff columns..
here is my code
import json
import urllib
import csv
def main():
f1 = open('tweet-stream.json','r')
Outputfile =open('newdata3.csv', 'w')
count = 0
for line in f1:
d = json.loads(line)
lang = d["user"]["lang"]
status_count = d["user"]["statuses_count"]
print >>Outputfile,"Language: " + lang + "Status_Count" +str(status_count)
if __name__ == "__main__":
main()
f1 = json.load(open(tweet-stream.json', 'r'))
fileWriter = csv.writer(file1 , delimiter=",",quotechar='"', quoting=csv.QUOTE_MINIMAL)
for x in f1:
temp = [x["user"]["lang"],x["user"]["statuses_count"]]
fileWriter.writerow(temp)
file1.close()

python parse file

I have a file with username and emails, in this format :
pete,pbellyer#gmail.com
I want to only keep the email, so i thought about using a regex like this :
import re,sys
Mailfile = sys.argv[1]
file = open(Mailfile, "r")
for MAIL in file.readlines():
tmp = re.split("\n+", MAIL)
m = re.match( ',(.+)', MAIL)
m.group(0)
But then I don't know how to store the result in a file.
I always get the last email address in the new file.
What would be the best way to store the results in a file ?
Thanks!
import sys
infile, outfile = sys.argv[1], sys.argv[2]
with open(infile) as inf, open(outfile,"w") as outf:
line_words = (line.split(',') for line in inf)
outf.writelines(words[1].strip() + '\n' for words in line_words if len(words)>1)
You could use the csv module (since your data looks comma-separated, at least in your example):
import sys
import csv
with open('mail_addresses.txt', 'w') as outfile:
for row in csv.reader(open(sys.argv[1], 'rb')):
outfile.write("%s\n" % row[1])
Try something like this:
import sys
Mailfile = sys.argv[1]
Outfile = sys.argv[2]
try:
in_file = open(Mailfile, 'r')
out_file = open(Outfile, 'a')
for mail in in_file.readlines():
address = mail.split(',')[1].strip()
out_file.write(address+',') #if you want to use commas to seperate the files, else use something like \n to write a new line.
finally:
in_file.close()
out_file.close()

Categories