Calling a python script within another one - python

I have seen a couple of question related to my issue but haven't been able to get an answer.
In my program I have a .txt file that needs to be converted to PDF.
I came across this script that does the same, https://code.activestate.com/recipes/189858-python-text-to-pdf-converter/
I have imported this into my program, but I am not sure how to call and pass my txt file so that it converts it to PDF.
.txt to .pdf converter script name is txttopdf.py i have imported it as import txttopdf and it is present in the same directory
Last part of my program is trying to convert the .txt to .pdf but it gives me a Syntax error.
Below is my program
import sqlite3
import platform
import sys
import os
import re
import time
import smtplib
import mimetypes
import txttopdf
from datetime import datetime
from email.mime.multipart import MIMEMultipart
from email import encoders
from email.message import Message
from email.mime.text import MIMEText
ipstr = "unknown"
errorstr = "unknown"
gtstr = "unknown"
print "reading the file"
linuxpath = raw_input("Enter the path")
txt_file = open(linuxpath,"r")
countlines = 0
if os.stat("lastline.txt").st_size == 0:
for line in open(linuxpath):
pattern = re.compile('(([2][5][0-5]\.)|([2][0-4][0-9]\.)|([0-1]?[0-9]?[0-9]\.)){3}'+'(([2][5][0-5])|([2][0-4][0-9])|([0-1]?[0-9]?[0-9]))|[\d.]+|\:\:\d|[\w\.]+')
#([\d.]+)[\s-]+\s+"([A-Z]+)\s+(.+?)"\s+([\s\d]+)')\[([\d\/A-Za-z: -]+)\]
iprgex = pattern.search(line)
#print "hi"
countlines = countlines + 1
if iprgex:
ips = iprgex.start()
ipe = iprgex.end()
ipstr = line[ips:ipe]
#print "hi again"
#print ipstr
pattern = re.compile('[\d]+\/[A-Za-z]+\/[\d]+')
#('\[([\d\/A-Za-z: -]+)\]')
datergex = pattern.search(line)
#print "hi"
if datergex:
dates = datergex.start()
datee = datergex.end()
datestr = line[dates:datee]
#countlines = countlines + 1
#print "hi again"
#print datestr
monthstr = datestr[3:6]
#print monthstr
if monthstr == "Jan":
date_chnge = datestr.replace("Jan","01")
elif monthstr == "Feb":
date_chnge = datestr.replace("Feb","02")
elif monthstr == "Mar":
date_chnge = datestr.replace("Mar","03")
#print "am here"
#print datestr
elif monthstr == "Apr":
date_chnge = datestr.replace("Apr","04")
elif monthstr == "May":
date_chnge = datestr.replace("May","05")
elif monthstr == "Jun":
date_chnge = datestr.replace("Jun","06")
elif monthstr == "Jul":
date_chnge = datestr.replace("Jul","07")
elif monthstr == "Aug":
date_chnge = datestr.replace("Aug","08")
elif monthstr == "Sep":
date_chnge = datestr.replace("Sep","09")
elif monthstr == "Oct":
date_chnge = datestr.replace("Oct","10")
elif monthstr == "Nov":
date_chnge = datestr.replace("Nov","11")
elif monthstr == "Dec":
date_chnge = datestr.replace("Dec","12")
#print date_chnge
dt_day = date_chnge[0:2]
dt_month = date_chnge[3:5]
dt_year = date_chnge[6:]
new_date = dt_year + '-' + dt_month + '-' + dt_day
pattern = re.compile('\:[\d]+\:[\d]+\:[\d]+')
#('\[([\d\/A-Za-z: -]+)\]')
timergex = pattern.search(line)
#print "hi"
if timergex:
times = timergex.start()
timee = timergex.end()
timestr = line[times:timee]
#countlines = countlines + 1
#print "hi again"
#print timestr
extract_time = timestr[1:]
datestring = new_date + ' ' + extract_time
dt = datetime.strptime(datestring, '%Y-%m-%d %H:%M:%S')
#print dt.year, dt.month, dt.day
pattern = re.compile('"([A-Z]+)\s+(.+?)"|"\-"')
getrgex = pattern.search(line)
#print line
if getrgex:
gts = getrgex.start()
gte = getrgex.end()
gtstr = line[gts:gte]
#countlines = countlines + 1
#print "hi again"
#print gtstr
pattern = re.compile('200|401|403|404|412|500|302')
errorrgex = pattern.search(line)
#print "hi"
if errorrgex:
errors = errorrgex.start()
errore = errorrgex.end()
errorstr = line[errors:errore]
#countlines = countlines + 1
#print "hi again"
#print errorstr
file = open('parse1.txt','a')
file.write(ipstr + datestr +timestr + gtstr + errorstr + "\n")
#Analysing the get request
print countlines
#print ipstr,dt,gtstr,errorstr
with open('ALLINONE.txt','r') as f:
for cheatsheetline in f:
indexvalue = gtstr.strip().find(cheatsheetline.strip())
#print gtstr
if indexvalue > 0:
#print indexvalue
file = open('CAUTION.txt','a')
file.write(ipstr + datestr +timestr + gtstr + errorstr + "\n")
#break
file.close()
lastlinefile = open('lastline.txt','w+')
lastlinefile.write(line)
#this part should convert the txt file CAUTION.txt to PDF
#txttopdf.main()
txttopdf CAUTION.txt

The easiest way to do this is via subprocess.Popen:
Example:
import sys
from subprocess import Popen, PIPE,, STDOUT
PYTEXT2PDF = "/path/to/pytext2pdf"
def convert(filename):
print("Converting {} to PDF".format(filename))
p = Popen(
[sys.executable, PYTEXT2PDF, filename],
stdout=PIPE, stderr=STDOUT
)
stdout, _ = p.communicate()
print(stdout)
convert("filename.txt")
By the looks of it; pyText2Pdf will convert the Text file to PDF and name the output file the same "basenaem" as the input file with the extension of .pdf.

Related

FileNotFoundError: [Errno 2] No such file or directory: 'o'

I'm getting this error message when using yield.
When I remove the yield results and yield timeout the code works fine without the error message
I don't know what is directory or file 'o' since I'm not using it in any way in the code.
here is my full code:
import gradio as gr
import ipaddress
import requests
from requests.auth import HTTPBasicAuth
import os
import string
from datetime import date, datetime
####SETTING UP DATE AND TIME WITH ISRAELI FORMAT###
current_date = date.today()
current_month = current_date.strftime('%B')
current_year = current_date.strftime('%Y')
date_reformat = current_date.strftime('%d/%m/%y')
current_day = current_date.strftime('%d')
###SWITCH###
def switch_ver(ip):
with open('switches_successful_results.txt','w') as switches_successful, open('switches_failed_results.txt', 'w') as switches_failed:
ip_addr = ip.split()
for i in ip_addr:
ip_addr = list(ipaddress.ip_network(i))
try:
basic=HTTPBasicAuth('some','password')
login = requests.post('http://'+i+':80/rest/v7/login-sessions', auth=basic)
cookie = login.cookies
get_ver = requests.get('http://'+i+':80/rest/v7/system/status', cookies=cookie)
get_ver = get_ver.json()
get_ver = get_ver['firmware_version']
get_ver = get_ver
with open('switches_successful_results.txt', 'a+') as sw:
results = 'Switch version for {} is: {} \n'.format(i, get_ver)
sw.write(results)
yield results
except requests.exceptions.ConnectTimeout:
timeout = 'Could not connect to switch: '+i+' REQUEST TIMED OUT\n'
with open('switches_failed_results.txt', 'a+') as sw:
sw.write(timeout)
yield timeout
with open('switches_successful_results.txt','r') as switches_successful, open('switches_failed_results.txt', 'r') as switches_failed:
summary = switches_failed.read() + switches_successful.read()
return (summary),['switches_successful_results.txt', 'switches_failed_results.txt']
###IPBlockerK###
def block_ip(ip):
duplicate_ips = []
blocked_ips = []
invalid_ips = []
with open('fortigate_ips.txt','r+') as f, open('fortigate_urls.txt', 'r+') as u:
fortigate_ips = f.read()
fortigate_urls = u.read()
ip_addr = ip.split()
for i in ip_addr:
try:
list(ipaddress.ip_network(i))
if i in fortigate_ips:
duplicate_ips.append(i)
elif ipaddress.ip_address(i).is_private:
invalid_ips.append(i)
else:
blocked_ips.append(i)
f.write(i + '\n')
except ValueError:
if i in fortigate_ips or i in fortigate_urls:
duplicate_ips.append(i)
elif i[0] in string.ascii_letters or i[0] == '*':
blocked_ips.append(i)
u.write(i + '\n')
else:
invalid_ips.append(i)
current_time = datetime.now()
current_time = current_time.strftime('%H:%M:%S')
if os.path.exists(current_year) == False:
os.makedirs(current_year + '\\'+ current_month + '\\' + current_day)
os.chdir(current_year+ '\\' + current_month +'\\'+ current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
elif os.path.exists(current_year) == True and os.path.exists(current_year + '\\'+ current_month) == False:
os.chdir(current_year)
os.makedirs(current_month + '\\' + current_day)
os.chdir(current_month +'\\'+ current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
elif os.path.exists(current_year) == True and os.path.exists(current_year + '\\'+ current_month) == True and os.path.exists(current_year + '\\'+ current_month + '\\' + current_day) == False:
os.chdir(current_year + '\\'+ current_month)
os.mkdir(current_day)
os.chdir(current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
else:
os.chdir(current_year + '\\' + current_month + '\\' + current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
blocked_ips_result = 'Following IP\s or URLs were Blocked!: \n'+'\n'.join(blocked_ips) +'\n'
duplicate_ips_result = 'Skipped!...Found duplicates IP\s for: \n'+'\n'.join(duplicate_ips) +'\n'
invalid_ips_result = 'Skipped!..Invalid IP\s for \n'+'\n'.join(invalid_ips) +'\n'
with open('fortigate_ips.txt', 'r') as f, open('fortigate_urls.txt', 'r') as u:
current_commit_stats = len(blocked_ips)
ips_stats = len(f.readlines())
urls_stats = len(u.readlines())
total_stats = ips_stats + urls_stats
if bool(duplicate_ips) == True and bool(blocked_ips) == False:
print(1)
return duplicate_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
elif bool(duplicate_ips) == True and bool(blocked_ips) == True and bool(invalid_ips) == True:
print(2)
return invalid_ips_result + duplicate_ips_result + blocked_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
elif bool(invalid_ips) == True and bool(blocked_ips) == True:
print(3)
return invalid_ips_result + blocked_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
elif bool(invalid_ips) == True and bool(blocked_ips) == True:
print(4)
return invalid_ips_result + blocked_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
else:
print(5)
return (blocked_ips_result), current_commit_stats, ips_stats, urls_stats, total_stats
###GRADIO GUI###
#f = open('fortigate_ips.txt', 'r')
#fortigate = (f.read().split())
#f.close()
with gr.Blocks(title = 'Switcher') as switches_ver:
gr.Markdown('Welcome to IPBlocker')
with gr.Tab(label = 'IPBlocker'):
with gr.Row():
with gr.Column():
ips_to_block = gr.Textbox(label = "IPs", lines = 10, placeholder=('Please fill Ips to block'))
block_btn = gr.Button('Block')
#ip_lookup = gr.Dropdown(fortigate)
with gr.Column():
output_textbox = gr.Textbox(label = "Results", lines=10)
with gr.Row():
current_commit_stats = gr.Textbox(label = 'Current IP\s or URLs added to block:')
forti_ips_stats = gr.Textbox(label = 'Total blocked IP\s on Fortigate: ')
forti_urls_stats = gr.Textbox(label = 'Total URLs blocked on Fortigate')
forti_total_stats = gr.Textbox(label = 'Total blocked IP\s and URLs on Fortigate')
block_btn.click(fn=block_ip, inputs = ips_to_block, outputs = [output_textbox, current_commit_stats, forti_ips_stats, forti_urls_stats, forti_total_stats])
with gr.Tab(label = 'Switcher'):
with gr.Row():
with gr.Column():
switch_box = gr.Textbox(label = 'Switches', lines = 10, placeholder='Please fill switches IPs...')
show_ver = gr.Button('Show current switches version')
upgrade_ver = gr.Button('Upgrade selected switches')
with gr.Column():
output_textbox = gr.Textbox(label='Results',lines = 10)
output_file = gr.File(['switches_successful_results.txt', 'switches_failed_results.txt'])
show_ver.click(fn=switch_ver, inputs = switch_box, outputs = [output_textbox, output_file])
upgrade_ver.click(fn=block_ip, inputs = ips_to_block, outputs=[output_textbox, output_file])
switches_ver.queue(concurrency_count=20, max_size=20).launch()
full error traceback:
Traceback (most recent call last):
File "D:\programs\Python310\lib\site-packages\gradio\routes.py", line 273, in run_predict
output = await app.blocks.process_api(
File "D:\programs\Python310\lib\site-packages\gradio\blocks.py", line 757, in process_api
predictions = self.postprocess_data(fn_index, result["prediction"], state)
File "D:\programs\Python310\lib\site-packages\gradio\blocks.py", line 721, in postprocess_data
block.postprocess(prediction_value)
File "D:\programs\Python310\lib\site-packages\gradio\components.py", line 2147, in postprocess
"name": processing_utils.create_tmp_copy_of_file(
File "D:\programs\Python310\lib\site-packages\gradio\processing_utils.py", line 323, in create_tmp_copy_of_file
shutil.copy2(file_path, file_obj.name)
File "D:\programs\Python310\lib\shutil.py", line 434, in copy2
copyfile(src, dst, follow_symlinks=follow_symlinks)
File "D:\programs\Python310\lib\shutil.py", line 254, in copyfile
with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: 'o'
The 'o' came from the timeout text "Could not connect..."
From what I understand about gradio, the result, for both yield and return seems to be processed to outputs, which is output_textbox and output_file
As the yield result is timeout (similar goes for results yield case):
output_textbox = timeout[0] = 'C'
output_file = timeout[1] = 'o'
If you want to remove the errors, you should change the yield result to be compatible to the outputs.
For example:
yield timeout, ['switches_successful_results.txt', 'switches_failed_results.txt']
If you are using yield you can iterate only once. It doesn't keep data on memory for all time. Check this out: https://stackoverflow.com/a/231855/17318894

How to read .evtx file using python?

Guys do anyone know how to read event log file in C:\Windows\System32\winevt\Logs with .evtx extension?
I have already tried to open it using notepad and read using python but notepad says access is denied...
Do anyone know how to do it? Thanks in advance..
This is how you would read the file "Forwarded Events" from the event viewer. You need admin access so I would run it as admin but I it will prompt you for a password if you don't.
import win32evtlog
import xml.etree.ElementTree as ET
import ctypes
import sys
def is_admin():
try:
return ctypes.windll.shell32.IsUserAnAdmin()
except:
return False
if is_admin():
# open event file
query_handle = win32evtlog.EvtQuery(
'C:\Windows\System32\winevt\Logs\ForwardedEvents.evtx',
win32evtlog.EvtQueryFilePath)
read_count = 0
a = 1
while a == 1:
a += 1
# read 1 record(s)
events = win32evtlog.EvtNext(query_handle, 1)
read_count += len(events)
# if there is no record break the loop
if len(events) == 0:
break
for event in events:
xml_content = win32evtlog.EvtRender(event, win32evtlog.EvtRenderEventXml)
# parse xml content
xml = ET.fromstring(xml_content)
# xml namespace, root element has a xmlns definition, so we have to use the namespace
ns = '{http://schemas.microsoft.com/win/2004/08/events/event}'
substatus = xml[1][9].text
event_id = xml.find(f'.//{ns}EventID').text
computer = xml.find(f'.//{ns}Computer').text
channel = xml.find(f'.//{ns}Channel').text
execution = xml.find(f'.//{ns}Execution')
process_id = execution.get('ProcessID')
thread_id = execution.get('ThreadID')
time_created = xml.find(f'.//{ns}TimeCreated').get('SystemTime')
#data_name = xml.findall('.//EventData')
#substatus = data_name.get('Data')
#print(substatus)
event_data = f'Time: {time_created}, Computer: {computer}, Substatus: {substatus}, Event Id: {event_id}, Channel: {channel}, Process Id: {process_id}, Thread Id: {thread_id}'
print(event_data)
user_data = xml.find(f'.//{ns}UserData')
# user_data has possible any data
else:
ctypes.windll.shell32.ShellExecuteW(None, "runas", sys.executable, " ".join(sys.argv), None, 1)
input()
.evtx is the extension for Windows Eventlog files. It contains data in a special binary format designed by Microsoft so you cannot simply open it in a text editor.
The are open source tools to read .evtx and the NXLog EE can also read .evtx files. (Disclaimer: I'm affiliated with the latter).
I modified the accepted answer a bit as following, so it becomes reusable:
import xml.etree.ElementTree as Et
import win32evtlog
from collections import namedtuple
class EventLogParser:
def __init__(self, exported_log_file):
self.exported_log_file = exported_log_file
def get_all_events(self):
windows_events = []
query_handle = win32evtlog.EvtQuery(str(self.exported_log_file),
win32evtlog.EvtQueryFilePath | win32evtlog.EvtQueryReverseDirection)
while True:
raw_event_collection = win32evtlog.EvtNext(query_handle, 1)
if len(raw_event_collection) == 0:
break
for raw_event in raw_event_collection:
windows_events.append(self.parse_raw_event(raw_event))
return windows_events
def parse_raw_event(self, raw_event):
xml_content = win32evtlog.EvtRender(raw_event, win32evtlog.EvtRenderEventXml)
root = Et.fromstring(xml_content)
ns = "{" + root.tag.split('}')[0].strip('{') + "}"
system = root.find(f'{ns}System')
event_id = system.find(f'{ns}EventID').text
level = system.find(f'{ns}Level').text
time_created = system.find(f'{ns}TimeCreated').get('SystemTime')
computer = system.find(f'{ns}Computer').text
WindowsEvent = namedtuple('WindowsEvent',
'event_id, level, time_created, computer')
return WindowsEvent(event_id, level, time_created, computer)
I use the "python-evtx" library, you can install it using this command:
pip install python-evtx
In my case, I'm not interested in reading records with the "Information" level.
import os
import codecs
from lxml import etree
import Evtx.Evtx as evtx
def evtxFile(absolutePath, filenameWithExt, ext, _fromDate, _toDate):
print("Reading: " + filenameWithExt)
outText = ""
channel = ""
#read the windows event viewer log and convert its contents to XML
with codecs.open(tempFilePath, "a+", "utf-8", "ignore") as tempFile:
with evtx.Evtx(absolutePath) as log:
for record in log.records():
xmlLine = record.xml()
xmlLine = xmlLine.replace(" xmlns=\"http://schemas.microsoft.com/win/2004/08/events/event\"", "")
xmlParse = etree.XML(xmlLine)
level = parseXMLtoString(xmlParse, ".//Level/text()")
if not level == "0" and not level == "4":
providerName = parseXMLtoString(xmlParse, ".//Provider/#Name")
qualifiers = parseXMLtoString(xmlParse, ".//EventID/#Qualifiers")
timestamp = parseXMLtoString(xmlParse, ".//TimeCreated/#SystemTime")
eventID = parseXMLtoString(xmlParse, ".//EventID/text()")
task = parseXMLtoString(xmlParse, ".//Task/text()")
keywords = parseXMLtoString(xmlParse, ".//Keywords/text()")
eventRecordID = parseXMLtoString(xmlParse, ".//EventRecordID/text()")
channel = parseXMLtoString(xmlParse, ".//Channel/text()")
computer = parseXMLtoString(xmlParse, ".//Computer/text()")
message = parseXMLtoString(xmlParse, ".//Data/text()")
if level == "1":
level = "Critical"
elif level == "2":
level = "Error"
elif level == "3":
level = "Warning"
date = timestamp[0:10]
time = timestamp[11:19]
time = time.replace(".", "")
_date = datetime.strptime(date, "%Y-%m-%d").date()
if _fromDate <= _date <= _toDate:
message = message.replace("<string>", "")
message = message.replace("</string>", "")
message = message.replace("\r\n", " ")
message = message.replace("\n\r", " ")
message = message.replace("\n", " ")
message = message.replace("\r", " ")
outText = date + " " + time + "|" + level + "|" + message.strip() + "|" + task + "|" + computer + "|" + providerName + "|" + qualifiers + "|" + eventID + "|" + eventRecordID + "|" + keywords + "\n"
tempFile.writelines(outText)
with codecs.open(tempFilePath, "r", "utf-8", "ignore") as tempFile2:
myLinesFromDateRange = tempFile2.readlines()
#delete the temporary file that was created
os.remove(tempFilePath)
if len(myLinesFromDateRange) > 0:
createFolder("\\filtered_data_files\\")
outFilename = "windows_" + channel.lower() + "_event_viewer_logs" + ext
myLinesFromDateRange.sort()
#remove duplicate records from the list
myFinalLinesFromDateRange = list(set(myLinesFromDateRange))
myFinalLinesFromDateRange.sort()
with codecs.open(os.getcwd() + "\\filtered_data_files\\" + outFilename, "a+", "utf-8", "ignore") as linesFromDateRange:
linesFromDateRange.seek(0)
if len(linesFromDateRange.read(100)) > 0:
linesFromDateRange.writelines("\n")
linesFromDateRange.writelines(myFinalLinesFromDateRange)
del myLinesFromDateRange[:]
del myFinalLinesFromDateRange[:]
else:
print("No data was found within the specified date range.")
print("Closing: " + filenameWithExt)
I hope it helps you or someone else in the future.
EDIT:
The "tempFilePath" can be anything you want, for example:
tempFilePath = os.getcwd() + "\\tempFile.txt"
I collected some information first before calling the "evtxFile" function:
The "From" and the "To" dates are in the following format: YYYY-MM-DD
Converted the dates to "date" data type:
_fromDate = datetime.strptime(fromDate, "%Y-%m-%d").date()
_toDate = datetime.strptime(toDate, "%Y-%m-%d").date()
Divided the directory where the .evtx files are located into different parts:
def splitDirectory(root, file):
absolutePathOfFile = os.path.join(root, file)
filePathWithoutFilename = os.path.split(absolutePathOfFile)[0]
filenameWithExt = os.path.split(absolutePathOfFile)[1]
filenameWithoutExt = os.path.splitext(filenameWithExt)[0]
extension = os.path.splitext(filenameWithExt)[1]
return absolutePathOfFile, filePathWithoutFilename, filenameWithExt, filenameWithoutExt, extension
for root, subFolders, files in os.walk(directoryPath):
for f in files:
absolutePathOfFile, filePathWithoutFilename, filenameWithExt,
filenameWithoutExt, extension = splitDirectory(root, f)
if extension == ".evtx":
evtxFile(absolutePathOfFile, filenameWithExt, ".txt", _fromDate, _toDate)

How to retrieve column values by column name in python whit cx_Oracle

I'm programming a script that connects to an Oracle database and get the results into a log file. I want to get a output like this:
FEC_INCLUSION = 2005-08-31 11:43:48,DEBITO_PENDIENTE = None,CAN_CUOTAS = 1.75e-05,COD_CUENTA = 67084,INT_TOTAL = None,CAN_CUOTAS_ANTERIOR = None,COD_INVERSION = 1,FEC_MODIFICACION = 10/04/2012 09:45:22,SAL_TOT_ANTERIOR = None,CUOTA_COMISION = None,FEC_ULT_CALCULO = None,MODIFICADO_POR = CTAPELA,SAL_TOTAL = 0.15,COD_TIPSALDO = 1,MONTO_COMISION = None,COD_EMPRESA = 1,SAL_INFORMATIVO = None,COD_OBJETIVO = 5,SAL_RESERVA = None,INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM = None,CREDITO_PENDIENTE = None,SAL_PROM = 0.0,
FEC_INCLUSION = 2005-08-31 11:43:49,DEBITO_PENDIENTE = None,CAN_CUOTAS = 0.0,COD_CUENTA = 67086,INT_TOTAL = None,CAN_CUOTAS_ANTERIOR = None,COD_INVERSION = 9,FEC_MODIFICACION = 25/02/2011 04:38:52,SAL_TOT_ANTERIOR = None,CUOTA_COMISION = None,FEC_ULT_CALCULO = None,MODIFICADO_POR = OPEJAMO,SAL_TOTAL = 0.0,COD_TIPSALDO = 1,MONTO_COMISION = None,COD_EMPRESA = 1,SAL_INFORMATIVO = None,COD_OBJETIVO = 5,SAL_RESERVA = None,INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM = None,CREDITO_PENDIENTE = None,SAL_PROM = 0.0,
I created a dictionary with the query results:
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
Then I created this function which finally save the results into my log:
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
This is the main function:
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
data = cursor.fetchall()
WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
But I realized that it works if I use a query that fetch few records, however if I try to fetch many records my script never ends.
This is my output when I executed a query with 5000 records. As you can see it takes too long.
jballesteros#SplunkPorvenir FO_TIPSALDOS_X_CUENTA]$ python db_execution.py
Starting connection: 5636
GetLastCheckpoint function took 0.073 ms
GetLastCheckpoint function took 0.025 ms
ReadQuery function took 0.084 ms
File does not exist, writing new log file
DictFactory function took 23.050 ms
ReadCheckpointName function took 0.079 ms
WriteCheckpoint function took 0.204 ms
WriteLog function took 45112.133 ms
GetInfo function took 46193.033 ms
I'm pretty sure you know a much better way to do what I am trying to do.
This is the complete code:
#!/usr/bin/env python
# encoding: utf-8
import re
import sys
try:
import cx_Oracle
except:
print "Error: Oracle module required to run this plugin."
sys.exit(0)
import datetime
import re
import commands
import os
from optparse import OptionParser
import csv
import time
#################################
#### Database Variables ####
#################################
Config = {
"host" : "",
"user" : "",
"password" : "",
"instance" : "",
"port" : "",
}
Query = {
"sql" : "",
"checkpoint_datetype" : "",
"checkpoint_name" : "",
}
dir = '/home/jballesteros/PENS2000/FO_TIPSALDOS_X_CUENTA/'
connection_dir = '/home/jballesteros/PENS2000/Connection'
checkpoint_file = dir + 'checkpoint.conf'
log_file = '/var/log/Pens2000/FO_TIPSALDOS_X_CUENTA.csv'
internal_log = '/var/log/Pens2000/internal.log'
query = dir + 'query'
sys.path.append(os.path.abspath(connection_dir))
from db_connect_pool import *
def Timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print "%s function took %0.3f ms" % (f.func_name,(time2- time1)*1000.0)
return ret
return wrap
#Timing
def InternalLogWriter(message):
now = datetime.datetime.now()
log = open(internal_log, 'a')
log.write("%s ==> %s" % (now.strftime("%Y-%m-%d %H:%M:%S"),message))
log.close()
return
#Timing
def GetLastCheckpoint():
global cur_checkpoint
conf = open(checkpoint_file, 'r')
cur_checkpoint = conf.readline()
cur_checkpoint = cur_checkpoint.rstrip('\n')
cur_checkpoint = cur_checkpoint.rstrip('\r')
conf.close()
#Timing
def ReadQuery():
global cur_checkpoint
GetLastCheckpoint()
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
sql = Query["sql"]
checkpoint_datetype = Query["checkpoint_datetype"]
checkpoint_name = Query["checkpoint_name"]
if (checkpoint_datetype == "DATETIME"):
sql = sql + " AND " + checkpoint_name + " >= " + "TO_DATE('%s','YYYY-MM-DD HH24:MI:SS') ORDER BY %s" % (cur_checkpoint,checkpoint_name)
if (checkpoint_datetype == "NUMBER"):
sql = sql + " AND " + checkpoint_name + " > " + "%s ORDER BY %s" % (cur_checkpoint,checkpoint_name)
qr.close()
return str(sql)
#Timing
def ReadCheckpointName():
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
checkpoint_name = Query["checkpoint_name"]
return str(checkpoint_name)
#Timing
def LocateCheckPoint(description):
description
checkpoint_name = ReadCheckpointName()
#print checkpoint_name
#print description
startcounter = 0
finalcounter = 0
flag = 0
for d in description:
prog = re.compile(checkpoint_name)
result = prog.match(d[0])
startcounter = startcounter + 1
if result:
finalcounter = startcounter - 1
counterstr = str(finalcounter)
print "Checkpoint found in the array position number: " + counterstr
flag = 1
if (flag == 0):
print "Checkpoint did not found"
return finalcounter
#Timing
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
#Timing
def WriteCsvLog(log_file,header,data):
checkpoint_index = LocateCheckPoint(header)
file_exists = os.path.isfile(log_file)
with open(log_file,'ab') as csv_file:
headers = [i[0] for i in header]
csv_writer = csv.writer(csv_file,delimiter='|')
if not file_exists:
print "File does not exist, writing new CSV file"
csv_writer.writerow(headers) # Writing headers once
for d in data:
csv_writer.writerow(d)
cur_checkpoint = d[checkpoint_index]
cur_checkpoint = str(cur_checkpoint)
WriteCheckpoint(cur_checkpoint,checkpoint_file)
csv_file.close()
#Timing
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
#prin #string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
#Timing
def WriteCheckpoint(cur_checkpoint,conf_file):
conf = open(conf_file,'w')
conf.write(cur_checkpoint)
conf.close()
#Timing
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
#data = cursor.fetchall()
#WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
def __main__():
parser = OptionParser()
parser.add_option("-c","--change- password",dest="pass_to_change",help="Change the password for database connection",metavar="1")
(options, args) = parser.parse_args()
if (options.pass_to_change):
UpdatePassword()
else:
GetInfo()
__main__()
This is a query sample:
SELECT COD_EMPRESA, COD_TIPSALDO, COD_INVERSION, COD_CUENTA, COD_OBJETIVO, CAN_CUOTAS, SAL_TOTAL, INT_TOTAL, SAL_RESERVA, APORTE_PROM, SAL_PROM, COSTO_PROM, SAL_TOT_ANTERIOR, FEC_ULT_CALCULO, INCLUIDO_POR, FEC_INCLUSION, MODIFICADO_POR, TO_CHAR(FEC_MODIFICACION,'DD/MM/YYYY HH24:MI:SS') AS FEC_MODIFICACION, CUOTA_COMISION, MONTO_COMISION, SAL_INFORMATIVO, CREDITO_PENDIENTE, DEBITO_PENDIENTE, CAN_CUOTAS_ANTERIOR FROM FO.FO_TIPSALDOS_X_CUENTA WHERE ROWNUM <=100000 AND FEC_INCLUSION >= TO_DATE('2005-08-31 11:43:49','YYYY-MM-DD HH24:MI:SS') ORDER BY FEC_INCLUSION
PS: I've really been searching in google and this forum about my question but I haven't found anything similar.

Python - Variable being printed over string

I am using python 2.7 and i have a problem that i haven't encountered before, when i print a certain string and then a variable on the same line the variable is printed over the string. e.g. the script is coded like so print 'IP Rating = ', ipRating and the output in command prompt will be 'IP20ating = '. I have no idea why this is happening but i have the same code for various variables and string in the same script and they all come out as expected, i have tried renaming the variable and changing the string but there is still no difference, has anybody encoutered this error before or have any ideas why this might be happening? i can post the code if requested.
Many thanks :)
EDIT
Here is the code - I know i may have repeated myself a few times and there are unneccessary library's in there but the way i work is by importing all libraries i might need and then removing unnecessary code at the end.
from bs4 import BeautifulSoup as Soup
from bs4 import BeautifulSoup
from urllib import urlopen
import webbrowser
import httplib
import urllib2
import urllib
import string
import mylib
import xlrd
import glob
import xlwt
import bs4
import sys
import os
import re
print '\nStarting Web Search'
found = False
while found == False:
excelFile = "F:\\len\\web sheets completed\\csv formatted\\imported\\re-imported\\Import Corrections\\saxby web spreadsheet.xls"
try:
inFi = xlrd.open_workbook(excelFile)
found = True
except IOError:
print 'File not found.'
inFi = xlrd.open_workbook(excelFile)
inWS = inFi.sheet_by_index(0)
headers = mylib.getHeader(inWS)
supplyHead = mylib.findHeader('Supplier Part Ref', headers)
saxbeginurl = "http://www.saxbylighting.com/index.php?pg=search&ser="
badLink = "index.php?pg=search&ser=10180&next=0"
resLink = "http://www.saxbylighting.com/images/ProductImages/Zoomed/"
overCount = 0
for t in range(524,534):
projection = 0
ipRating = 0
diameter = 0
width = 0
weight = 0
length = 0
height = 0
i = 0
w = 0
l = 0
h = 0
d = 0
p = 0
x = 0
iP = 0
wei = 0
imgStock = str(inWS.cell(t, supplyHead).value.encode('latin-1'))
overCount = overCount + 1
print '\n',imgStock
if imgStock == '3TRAWI':
url = 'http://www.saxbylighting.com/index.php?pg=details&prod=53'
elif imgStock == '10313':
url = 'http://www.saxbylighting.com/index.php?pg=details&prod=204'
else:
url = saxbeginurl + imgStock
html_page = urllib2.urlopen(url)
soup = BeautifulSoup(html_page)
img_tags = soup.find_all("img")
the_image_tag = soup.find("img", src='/images/dhl_logo.png')
try:
for dataSheet in soup.find('div',{'class':'panes'}):
#print dataSheet, ' -- ', str(i)
i = i + 1
if i == 4:
reqData = str(dataSheet).split('<img', 1)[0]
first_Data = reqData.replace('<br/>','\n')
second_Data = first_Data.replace('<b>','')
third_Data = second_Data.replace('</b>','')
fourth_Data = third_Data.replace(':',': ')
dataList = fourth_Data.split('\n')
#print dataList
for information in dataList:
if 'Weight' in dataList[wei]:
pre_Weight = dataList[wei]
sec_weight = str(pre_Weight).replace('Weight :','')
weight = sec_weight.replace(' ','')
wei += 1
if 'IP' in dataList[iP]:
ipRating = str(dataList[iP])
iP += 1
for product_Dimensions in dataList:
if 'Product dimensions :' in dataList[x]:
#print dataList[x]
dimensionList = str(dataList[x]).replace('mm','mm:')
#print dimensionList
prelim_Dimensions = dimensionList.replace('Product dimensions :','')
first_Dimensions = prelim_Dimensions.replace('cm','0mm')
sec_Dimensions = first_Dimensions.replace(' ',' ')
third_Dimensions = sec_Dimensions.strip()
dimenList = third_Dimensions.split('mm:')
#print dimenList
for project in dimenList:
if 'Proj' in dimenList[p]:
pre_pro = str(dimenList[p]).replace('Proj','')
sec_pro = pre_pro.replace(':','')
thro_pro = sec_pro.replace(' ','')
projection = thro_pro
elif p == len(dimenList):
print 'Projection not found'
p += 1
for diamet in dimenList:
if 'dia' in dimenList[d]:
pre_dia = str(dimenList[d]).replace('dia','')
sec_dia = pre_dia.replace(':','')
third_dia = sec_dia.replace(' ','')
diameter = third_dia
elif d == len(dimenList):
print 'Diameter not found'
d += 1
for heig in dimenList:
if 'H:' in dimenList[h]:
pre_hei = str(dimenList[h]).replace('H','')
sec_hei = pre_hei.replace(':','')
third_hei = sec_hei.replace(' ','')
height = third_hei
elif h == len(dimenList):
print 'Height not found'
h += 1
for lent in dimenList:
if 'L:' in dimenList[l]:
pre_leng = str(dimenList[l]).replace('L','')
sec_leng = pre_leng.replace(':','')
third_leng = sec_leng.replace(' ','')
length = third_leng
elif l == len(dimenList):
print 'Length not found'
l += 1
for wid in dimenList:
if 'W:' in dimenList[w]:
pre_wid = str(dimenList[w]).replace('W','')
sec_wid = pre_wid.replace(':','')
third_wid = sec_wid.replace(' ','')
width = third_wid
elif w == len(dimenList):
print 'Width not found'
w += 1
x += 1
print 'IP Rating = ', ipRating
print 'Weight = ', weight
print 'Projection = ', projection, 'mm'
print 'Diameter = ',diameter, 'mm'
print 'Length = ',length, 'mm'
print 'Height = ',height, 'mm'
print 'Width = ',width, 'mm'
except TypeError:
print 'Type Error... skipping this product and carrying on.'
Here is an example output
IP44ating =
Weight = .51KGS
Projection = 35 mm
Diameter = 0 mm
Length = 0 mm
Height = 90 mm
Width = 120 mm
I strongly suspect that your data ipRating that you think is IP20 is actually \rIP20. That is: that you have a stray 0x13 carriage return character in there at the start of the variable. The carriage return character is moving the print position to the start of the line and then the variable is overwriting what you printed before.
You can test whether this is the problem by adding the line:
ipRating = ipRating.replace("\r", "")
before your print statement.
This is the proper way to do what you're doing.
print('IP Rating = %s' % ipRating)
or
print('IP Rating = %d' % ipRating)
That is just one example from all the print statements you have at the end of your code.
If you're putting a string variable in print, use a %s or otherwise use a %d. If you have any more questions just ask.

Python how can change my open "mode" options for "open". R+A maybe?

I would like to change my code to do the same result. How can I do the same but in a different way ?
Just the open interrest me.
i mean the "f = open.....
to the end.
here my code :
#Source : http://www.wunderground.com/weather/api/d/docs?d=resources/code-samples
import urllib2
import json
import time
import csv
from datetime import datetime#set the time
def get_information(url):
try:
wunder_url_obj = urllib2.urlopen(url)
except:
print 'Could not open URL'
return None
else:
now = datetime.now()
current_year = now.year
current_day = now.day
current_month = now.month
current_hour = now.hour
current_minute = now.minute
current_second = now.second
json_string = wunder_url_obj.read()
parsed_json = json.loads(json_string)
temp_f = parsed_json['current_observation']['temp_f']
weather = parsed_json['current_observation']['weather']
date = str(now.month) + "/" + str(now.day) + "/" + str(now.year) + " " + str(now.hour) + ":" + str(now.minute) + ":" + str(now.second)
now = datetime.now()
header = "Datetime,current condition,Temperature,\n"
with open('out.csv', 'a') as f:
if f.tell() == 0:
f.write(header)
f.write(','.join([date, str(temp_f), weather]))
f.write('\n')
f.close()
get_information('http://api.wunderground.com/api/8d3b5d3fa03ddb6f/conditions/weather/q/China/Beijing.json')
here my editor :
You can rewrite open ... close part as follow:
with open('out.csv', 'a') as f:
f.seek(0, 2) # os.SEEK_END, on Windows file position set to 0 even in append mode.
if f.tell() == 0:
f.write(header)
f.write(','.join([date, str(temp_f), weather]))
f.write('\n')
file.tell() returns current file position.

Categories