Python - Pandas writing blank files to file [closed] - python

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 2 years ago.
Improve this question
I have a python script that writes to several file formats via Pandas. It can write to CSV/JSON/HTML/Excel.
However for some reason the script is writing blank files. When I open the file this is what I see:
Before printing the file I am printing the dataframe to the screen output so I can validate that the data is there. For example with CSV the output to the screen is this:
CSV data: ,AWS Account,Account Number,Name,Instance ID,AMI ID,Volumes,Private IP,Public IP,Private DNS,Availability Zone,VPC ID,Type,Key Pair Name,State,Launch Date
0,project-client-lab,123456789101,bastion001,i-xxxxxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,vol-xxxxxxxxxxxxxxx,10.238.2.166,3.214.15.175,ip-10-238-2-166.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,t3.small,project-client-int01,running,March 10 2020
1,project-client-lab,123456789101,logicmonitor001,i-xxxxxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,vol-0xxxxxxxxxxxxxx,10.238.2.52,,ip-10-238-2-52.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,m5.large,project-client-int01,running,September 02 2019
2,project-client-lab,123456789101,project-cassandra001,i-xxxxxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,"vol-xxxxxxxxxxxxxxxxxx, vol-xxxxxxxxxxxxxxxxx",10.238.2.221,,ip-10-238-2-221.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,m5.large,project-client-int01,running,January 14 2020
3,project-client-lab,123456789101,project-cassandra003,i-xxxxxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,"vol-xxxxxxxxxxxxxxxxxx, vol-xxxxxxxxxxxxxxxxx",10.238.2.207,,ip-10-238-2-207.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,m5.large,project-client-int01,running,January 14 2020
4,project-client-lab,123456789101,project-cassandra003,i-xxxxxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,"vol-xxxxxxxxxxxxxxxxxx, vol-xxxxxxxxxxxxxxxxx",10.238.2.203,,ip-10-238-2-203.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,c5.xlarge,project-client-int01,running,January 22 2020
5,project-client-lab,123456789101,project-cassandra001,i-xxxxxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,"vol-xxxxxxxxxxxxxxxxxx, vol-xxxxxxxxxxxxxxxxx",10.238.2.209,,ip-10-238-2-209.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,c5.xlarge,project-client-int01,running,January 22 2020
6,project-client-lab,123456789101,haproxy001,i-xxxxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,vol-xxxxxxxxxxxxxxxxxx,10.238.2.169,54.242.118.165,ip-10-238-2-169.ec2.internal,us-east-1a,vpc-xxxxxxxxxxxxxxxxx,m5.large,project-client-int01,running,February 20 2020
7,project-client-lab,123456789101,logicmonitor002,i-xxxxxxxxxxxxxxx,ami-xxxxxxxxxxxxxxxxxxx,vol-0c48ff6ebb031008a,10.238.2.69,,ip-10-238-2-69.ec2.internal,us-east-1b,vpc-xxxxxxxxxxxxxxxxx,m5.large,project-client-int01,running,September 13 2019
These are the functions that write to file:
def mongo_export_to_file(interactive, aws_account, aws_account_number,instance_col=None,date=None):
create_directories()
if date == None:
format= "%m-%d-%Y"
today = datetime.today()
today = today.strftime(format)
date = today
else:
format= "%m-%d-%Y"
date = datetime.strptime(date,"%m%d%Y")
date = date.strftime(format)
if not instance_col:
_, _, instance_col = set_db()
# make an API call to the MongoDB server
if interactive == 0:
mongo_docs = instance_col.find({})
else:
mongo_docs = instance_col.find({"Account Number": aws_account_number})
# Convert the mongo docs to a DataFrame
docs = pandas.DataFrame(mongo_docs)
# Discard the Mongo ID for the documents
docs.pop("_id")
if __name__ == "__main__":
print("Choose a file format")
print("1. CSV")
print("2. JSON")
print("3. HTML")
print("4. Excel")
choice = input("Enter a number 1-4: ")
choice = int(choice)
else:
choice = 1
if choice == 1:
if __name__ == "__main__":
# export MongoDB documents to CSV
csv_export = docs.to_csv(sep=",") # CSV delimited by commas
print ("\nCSV data:", csv_export)
# Set the CSV output directory
output_dir = os.path.join("..", "..", "output_files", "aws_instance_list", "csv", "")
if interactive == 1:
output_file = os.path.join(output_dir, "aws-instance-list-" + aws_account + "-" + date +".csv")
else:
output_file = os.path.join(output_dir, "aws-instance-master-list-" + date +".csv")
# export MongoDB documents to a CSV file, leaving out the row "labels" (row numbers)
docs.to_csv(output_file, ",", index=False) # CSV delimited by commas
elif choice == 2:
if __name__ == "__main__":
json_export = docs.to_json() # return JSON data
print ("\nJSON data:", json_export)
# Set the JSON output directory
output_dir = os.path.join("..", "..", "output_files", "aws_instance_list", "json", "")
if interactive == 1:
output_file = os.path.join(output_dir, "aws-instance-list-" + aws_account + "-" + date +".json")
else:
output_file = os.path.join(output_dir, "aws-instance-master-list-" + date +".json")
# export MongoDB documents to a CSV file, leaving out the row "labels" (row numbers)
docs.to_json(output_file)
elif choice == 3:
html_str = io.StringIO()
# export as HTML
docs.to_html(
buf=html_str,
classes="table table-striped"
)
if __name__ == "__main__":
# print out the HTML table
print (html_str.getvalue())
# Set the HTML output directory
output_dir = os.path.join("..", "..", "output_files", "aws_instance_list", "html", "")
if interactive == 1:
output_file = os.path.join(output_dir, "aws-instance-list-" + aws_account + "-" + date +".html")
else:
output_file = os.path.join(output_dir, "aws-instance-master-list-" + date + ".html")
# save the MongoDB documents as an HTML table
docs.to_html(output_file)
elif choice == 4:
# Set the Excel output directory
output_dir = os.path.join("..", "..", "output_files", "aws_instance_list", "excel", "")
time.sleep(5)
if interactive == 1:
output_file = os.path.join(output_dir, "aws-instance-list-" + aws_account + "-" + date + ".xlsx")
else:
output_file = os.path.join(output_dir, "aws-instance-master-list-" + date + ".xlsx")
# export MongoDB documents to a Excel file, leaving out the row "labels" (row numbers)
writer = ExcelWriter(output_file)
docs.to_excel(writer,"EC2 List",index=False)
writer.save()
writer.close()
if __name__ == "__main__":
exit = input("Exit program (y/n): ")
if exit.lower() == "y" or exit.lower() == "yes":
exit_program()
else:
main()
def print_reports(interactive,aws_account,aws_account_number):
set_db(instance_col=None)
inputDate = input("Enter the date in format 'dd/mm/yyyy': ")
day,month,year = inputDate.split('/')
isValidDate = True
try:
datetime(int(year),int(month),int(day))
except ValueError :
isValidDate = False
print_reports(interactive,aws_account,aws_account_number)
if(isValidDate) :
print(f"Input date is valid: {inputDate}")
format= "%m%d%Y"
inputDate = datetime.strptime(inputDate,"%m/%d/%Y")
inputDate = inputDate.strftime(format)
else:
print(f"Input date is not valid: {inputDate}")
print_reports(interactive,aws_account,aws_account_number)
myclient = connect_db()
mydb = myclient["aws_inventories"]
instance_col = "ec2_list_" + inputDate
instance_col = mydb[instance_col]
mongo_export_to_file(interactive, aws_account, aws_account_number,instance_col,date=inputDate)
This is all my code in this script.
Why is this happening and how to I correct that?

You can view the file in Excel by:
Opening Excel
Going to the "Data" tab
In the "Get & Transform Data" section, click "From Text/CSV"

Related

Writing to Console and File in Python Script

I am looking for some help on a project I am doing where I need to output the responses to the console as well as write them to a file. I am having trouble figuring that part out. I have been able to write the responses to a file successfully, but not both at the same time. Can someone help with that portion? The only lines that need to be written to the file are the ones that I have currently being written to a file
from datetime import datetime
import requests
import pytemperature
def main():
api_start = 'https://api.openweathermap.org/data/2.5/weather?q='
api_key = '&appid=91b8698c2ed6c192aabde7c9e75d23cb'
now = datetime.now()
filename = input("\nEnter the output filename: ")
myfile = None
try:
myfile = open(filename, "w")
except:
print("Unable to open file " + filename +
"\nData will not be saved to a file")
choice = "y"
print("ISQA 3900 Open Weather API", file=myfile)
print(now.strftime("%A, %B %d, %Y"), file=myfile)
while choice.lower() == "y":
# input city and country code
city = input("Enter city: ")
print("Use ISO letter country code like: https://countrycode.org/")
country = input("Enter country code: ")
# app configures url to generate json data
url = api_start + city + ',' + country + api_key
json_data = requests.get(url).json()
try:
# getting weather data from json
weather_description = json_data['weather'][0]['description']
# printing weather information
print("\nThe Weather Report for " + city + " in " + country + " is:", file=myfile)
print("\tCurrent conditions: ", weather_description, file=myfile)
# getting temperature data from json
current_temp_kelvin = json_data['main']['temp']
current_temp_fahrenheit = pytemperature.k2f(current_temp_kelvin)
# printing temperature information
print("\tCurrent temperature in Fahrenheit:", current_temp_fahrenheit, file=myfile)
# getting pressure data from json
current_pressure = json_data['main']['pressure']
# printing pressure information
print("\tCurrent pressure in HPA:", current_pressure, file=myfile)
# getting humidity data from json
current_humidity = json_data['main']['humidity']
# printing humidity information
print("\tCurrent humidity:", "%s%%" % current_humidity, file=myfile)
# getting expected low temp data from json
expected_low_temp = json_data['main']['temp_min']
expected_low_temp = pytemperature.k2f(expected_low_temp)
# printing expected low temp information
print("\tExpected low temperature in Fahrenheit:", expected_low_temp, file=myfile)
# getting expected high temp data from json
expected_high_temp = json_data['main']['temp_max']
expected_high_temp = pytemperature.k2f(expected_high_temp)
# printing expected high temp information
print("\tExpected high temperature in Fahrenheit:", expected_high_temp, file=myfile)
choice = input("Continue (y/n)?: ")
print()
except:
print("Unable to access ", city, " in ", country)
print("Verify city name and country code")
if myfile:
myfile.close()
print('Thank you - Goodbye')
if __name__ == "__main__":
main()
Honestly I am kind of at a loss on this one for some reason it is just kicking my butt.
For printing a single object:
def mprint(text, file):
print(text)
print(text, file = file)
A more general one for printing several objects:
def mprint(*args):
print(*args[:-1])
print(*args[:-1],file = args[-1])
Usage: mprint(obj1, obj2, ... , myfile)
A completely general print function replacement would look something like:
def myprint(*args, file=None, **kwargs):
print(*args, **kwargs) # print to screen
if file is not None:
print(*args, file=fp, **kwargs) # print to file
this will let you use end=.. etc. as well
filename = input("\nEnter the output filename: ")
myfile = None
try:
myfile = open(filename, "w")
except:
print("Unable to open file " + filename +
"\nData will not be saved to a file")
choice = "y"
myprint("ISQA 3900 Open Weather API", file=myfile)
myprint(now.strftime("%A, %B %d, %Y"), file=myfile)
if myfile couldn't be opened and is therefore None, the myprint function will only print to screen.

How to export dict list to csv cleanly?

I have a script I'm writing to make pulling data from my fantasy football league easy and exported in a format that can be played with in Excel easily.
The script I have attached only contains the relevant parts to this questions as the larger script I have written has a lot of moving parts that doesn't apply here.
I'm essentially pulling this players.get_all_players() data from the Sleeper platform using the Sleeper-API-Wrapper (Github link here).
My script will take player data and put it into a .csv like this, with the player ID in the top row and all the info in a single cell below the ID. Screenshot of this below.
Excel .csv screenshot
How can I export this so that the data is nicely formatted into separate rows? I have a different spreadsheet where I'd like to be able to pull this data to automatically.
Alternatively, if I'm doing this in a really roundabout way, please let me know! This is the JSON response from the platform: JSON Response
# 9 All players - players.get_all_players()
warning = 1
while warning == 1:
print("%s%s\n\n\nWARNING:%s" % (fg(15), bg(9), attr(0)))
print("%s%sthe 'all players' option is intensive and may freeze your PC for several minutes.%s" % (fg(15), bg(0), attr(1)))
warning = input("continue anyway? (y/n)\n")
if warning == "n":
pe_loop = 0
action = 0
elif warning == "y":
name = "all players"; file = name
output = players.get_all_players()
break
else:
print("Not a valid option, try again.")
warning = 1
overwrite = 0
name_change = 0
while action == 0:
try:
action = int(input("%s%s\n1 - print\n2 - export to Excel\n3 - back to tasks\n4 - end\n--------------------\n%s" % (fg(14), bg(0), attr(1))))
except ValueError:
print("Not a valid option, try again.")
## Print
if action == 1 and week != 18:
print(output)
break
elif action == 1 and week == 18:
week = 0
while week < 18:
week += 1
if task == 3:
output = league.get_matchups(week)
elif task == 4:
output = league.get_transactions(week)
print(output)
## Export
elif action == 2:
path = os.path.join(parent_dir, file)
name_change = input("\nDo you want to name the file? (y/n)\n")
if name_change == "y":
name = input("\nEnter file name now:\n")
if name_change == "n":
file_path = path + "\\" + name + '_' + str(year) + ".xlsx"
if os.path.isfile(file_path) == True:
overwrite = input("\nFile name... '" + name + "' already exists! Would you like to overwrite this file? (y/n)\n")
if overwrite == "n":
count = 0
while os.path.isfile(file_path) == True:
count += 1
new_name = name + "_" + str(count)
file_path = path + "\\" + new_name + ".xlsx"
else:
name = new_name
print("\nThe new file was automatically named: " + new_name + "_wk" + str(week) + "\nand placed in: " + path)
if os.path.isdir(path) == False and overwrite == 0:
os.mkdir(path)
print("\nCreating new file path... " + file + "\n")
elif os.path.isdir(path) == True and overwrite == 0:
print("\nFile path... '" + file + "' exists!\n")
toCSV = output
# 9 All Players CSV file
with open(parent_dir + file + "\\" + name + ".csv", 'w', encoding='utf8', newline='') as output_file:
fc = csv.DictWriter(output_file, output.keys())
fc.writeheader()
fc.writerow(toCSV)
It turns out that sleeper_wrapper exposes a method players.get_players_df that gives you a pandas DataFrame containing all players.
Write that to a csv file using to_csv as suggested in the comments.
Strip down your code to receive better answers faster :)
This is the code that your question needs:
from sleeper_wrapper import Players
import csv
players = Players()
toCSV = players.get_all_players()
with open(parent_dir + file + "\\" + name + ".csv", 'w', encoding='utf8', newline='') as output_file:
fc = csv.DictWriter(output_file, output.keys())
fc.writeheader()
fc.writerow(toCSV)
This is how you write the csv using pandas:
import pandas as pd
from sleeper_wrapper import Players
players = Players()
all_players = players.get_all_players()
# stolen from https://github.com/NotTheCrocHunter/sleeper-api-wrapper/blob/91d8cf1b64cf55884b4c4746d53ccd1259d11c1f/sleeper_wrapper/players.py#L41
# because that method is unavailable in the version of sleeper_wrapper in PyPI
all_players_df = pd.DataFrame.from_dict(all_players, orient="index")
# all_players_df contains some information on teams as well, maybe you want to filter that out...
all_players_df.to_csv("your_output_file.csv")

storing user input into json file python

is there a way to store these input into a dictionary json file so when i import into panda it's easy to analyse? and if this code can be written in an easier way that would also be great (like loop)
#student's profile to be saved in file separately
j = open("jessica.txt",'a')
w = open("wendy.txt", 'a')
t = open("tatiana.txt", 'a')
#user input to record the log
name = input("Name:")
date = input('Enter a date in YYYY-MM-DD format:')
hours = input("Hours:")
rate = input("Rate:")
topic = input('Topic:')
if name == 'Jessica':
j.writelines("Date:" + date + '\n')
j.writelines("Hours:" + hours + '\n')
j.writelines("Rate:" + rate + '\n')
elif name == 'Tatiana':
t.writelines("Date:" + date + '\n')
t.writelines("Hours:" + hours + '\n')
t.writelines("Rate:" + rate + '\n')
else:
w.writelines("Date:" + date + '\n')
w.writelines("Hours:" + hours + '\n')
w.writelines("Rate:" + rate + '\n')
Here is an example:
import json
def get_inputs():
#user input to record the log
name = input("Name:")
d = {}
d['date'] = input('Enter a date in YYYY-MM-DD format:')
d['hours'] = input("Hours:")
return(name,d)
out = {}
while True:
exit = input('Do you want to add another input (y/n)? ')
if exit.lower() == 'n':
break
else:
name, d = get_inputs()
out[name] = d
with open('names.json','w') as f:
json.dump(out, f, indent=2)
And then:
import pandas as pd
print(pd.read_json('names.json'))
And you have:
Jessica
date 2014-12-01
hours 12

How to read .evtx file using python?

Guys do anyone know how to read event log file in C:\Windows\System32\winevt\Logs with .evtx extension?
I have already tried to open it using notepad and read using python but notepad says access is denied...
Do anyone know how to do it? Thanks in advance..
This is how you would read the file "Forwarded Events" from the event viewer. You need admin access so I would run it as admin but I it will prompt you for a password if you don't.
import win32evtlog
import xml.etree.ElementTree as ET
import ctypes
import sys
def is_admin():
try:
return ctypes.windll.shell32.IsUserAnAdmin()
except:
return False
if is_admin():
# open event file
query_handle = win32evtlog.EvtQuery(
'C:\Windows\System32\winevt\Logs\ForwardedEvents.evtx',
win32evtlog.EvtQueryFilePath)
read_count = 0
a = 1
while a == 1:
a += 1
# read 1 record(s)
events = win32evtlog.EvtNext(query_handle, 1)
read_count += len(events)
# if there is no record break the loop
if len(events) == 0:
break
for event in events:
xml_content = win32evtlog.EvtRender(event, win32evtlog.EvtRenderEventXml)
# parse xml content
xml = ET.fromstring(xml_content)
# xml namespace, root element has a xmlns definition, so we have to use the namespace
ns = '{http://schemas.microsoft.com/win/2004/08/events/event}'
substatus = xml[1][9].text
event_id = xml.find(f'.//{ns}EventID').text
computer = xml.find(f'.//{ns}Computer').text
channel = xml.find(f'.//{ns}Channel').text
execution = xml.find(f'.//{ns}Execution')
process_id = execution.get('ProcessID')
thread_id = execution.get('ThreadID')
time_created = xml.find(f'.//{ns}TimeCreated').get('SystemTime')
#data_name = xml.findall('.//EventData')
#substatus = data_name.get('Data')
#print(substatus)
event_data = f'Time: {time_created}, Computer: {computer}, Substatus: {substatus}, Event Id: {event_id}, Channel: {channel}, Process Id: {process_id}, Thread Id: {thread_id}'
print(event_data)
user_data = xml.find(f'.//{ns}UserData')
# user_data has possible any data
else:
ctypes.windll.shell32.ShellExecuteW(None, "runas", sys.executable, " ".join(sys.argv), None, 1)
input()
.evtx is the extension for Windows Eventlog files. It contains data in a special binary format designed by Microsoft so you cannot simply open it in a text editor.
The are open source tools to read .evtx and the NXLog EE can also read .evtx files. (Disclaimer: I'm affiliated with the latter).
I modified the accepted answer a bit as following, so it becomes reusable:
import xml.etree.ElementTree as Et
import win32evtlog
from collections import namedtuple
class EventLogParser:
def __init__(self, exported_log_file):
self.exported_log_file = exported_log_file
def get_all_events(self):
windows_events = []
query_handle = win32evtlog.EvtQuery(str(self.exported_log_file),
win32evtlog.EvtQueryFilePath | win32evtlog.EvtQueryReverseDirection)
while True:
raw_event_collection = win32evtlog.EvtNext(query_handle, 1)
if len(raw_event_collection) == 0:
break
for raw_event in raw_event_collection:
windows_events.append(self.parse_raw_event(raw_event))
return windows_events
def parse_raw_event(self, raw_event):
xml_content = win32evtlog.EvtRender(raw_event, win32evtlog.EvtRenderEventXml)
root = Et.fromstring(xml_content)
ns = "{" + root.tag.split('}')[0].strip('{') + "}"
system = root.find(f'{ns}System')
event_id = system.find(f'{ns}EventID').text
level = system.find(f'{ns}Level').text
time_created = system.find(f'{ns}TimeCreated').get('SystemTime')
computer = system.find(f'{ns}Computer').text
WindowsEvent = namedtuple('WindowsEvent',
'event_id, level, time_created, computer')
return WindowsEvent(event_id, level, time_created, computer)
I use the "python-evtx" library, you can install it using this command:
pip install python-evtx
In my case, I'm not interested in reading records with the "Information" level.
import os
import codecs
from lxml import etree
import Evtx.Evtx as evtx
def evtxFile(absolutePath, filenameWithExt, ext, _fromDate, _toDate):
print("Reading: " + filenameWithExt)
outText = ""
channel = ""
#read the windows event viewer log and convert its contents to XML
with codecs.open(tempFilePath, "a+", "utf-8", "ignore") as tempFile:
with evtx.Evtx(absolutePath) as log:
for record in log.records():
xmlLine = record.xml()
xmlLine = xmlLine.replace(" xmlns=\"http://schemas.microsoft.com/win/2004/08/events/event\"", "")
xmlParse = etree.XML(xmlLine)
level = parseXMLtoString(xmlParse, ".//Level/text()")
if not level == "0" and not level == "4":
providerName = parseXMLtoString(xmlParse, ".//Provider/#Name")
qualifiers = parseXMLtoString(xmlParse, ".//EventID/#Qualifiers")
timestamp = parseXMLtoString(xmlParse, ".//TimeCreated/#SystemTime")
eventID = parseXMLtoString(xmlParse, ".//EventID/text()")
task = parseXMLtoString(xmlParse, ".//Task/text()")
keywords = parseXMLtoString(xmlParse, ".//Keywords/text()")
eventRecordID = parseXMLtoString(xmlParse, ".//EventRecordID/text()")
channel = parseXMLtoString(xmlParse, ".//Channel/text()")
computer = parseXMLtoString(xmlParse, ".//Computer/text()")
message = parseXMLtoString(xmlParse, ".//Data/text()")
if level == "1":
level = "Critical"
elif level == "2":
level = "Error"
elif level == "3":
level = "Warning"
date = timestamp[0:10]
time = timestamp[11:19]
time = time.replace(".", "")
_date = datetime.strptime(date, "%Y-%m-%d").date()
if _fromDate <= _date <= _toDate:
message = message.replace("<string>", "")
message = message.replace("</string>", "")
message = message.replace("\r\n", " ")
message = message.replace("\n\r", " ")
message = message.replace("\n", " ")
message = message.replace("\r", " ")
outText = date + " " + time + "|" + level + "|" + message.strip() + "|" + task + "|" + computer + "|" + providerName + "|" + qualifiers + "|" + eventID + "|" + eventRecordID + "|" + keywords + "\n"
tempFile.writelines(outText)
with codecs.open(tempFilePath, "r", "utf-8", "ignore") as tempFile2:
myLinesFromDateRange = tempFile2.readlines()
#delete the temporary file that was created
os.remove(tempFilePath)
if len(myLinesFromDateRange) > 0:
createFolder("\\filtered_data_files\\")
outFilename = "windows_" + channel.lower() + "_event_viewer_logs" + ext
myLinesFromDateRange.sort()
#remove duplicate records from the list
myFinalLinesFromDateRange = list(set(myLinesFromDateRange))
myFinalLinesFromDateRange.sort()
with codecs.open(os.getcwd() + "\\filtered_data_files\\" + outFilename, "a+", "utf-8", "ignore") as linesFromDateRange:
linesFromDateRange.seek(0)
if len(linesFromDateRange.read(100)) > 0:
linesFromDateRange.writelines("\n")
linesFromDateRange.writelines(myFinalLinesFromDateRange)
del myLinesFromDateRange[:]
del myFinalLinesFromDateRange[:]
else:
print("No data was found within the specified date range.")
print("Closing: " + filenameWithExt)
I hope it helps you or someone else in the future.
EDIT:
The "tempFilePath" can be anything you want, for example:
tempFilePath = os.getcwd() + "\\tempFile.txt"
I collected some information first before calling the "evtxFile" function:
The "From" and the "To" dates are in the following format: YYYY-MM-DD
Converted the dates to "date" data type:
_fromDate = datetime.strptime(fromDate, "%Y-%m-%d").date()
_toDate = datetime.strptime(toDate, "%Y-%m-%d").date()
Divided the directory where the .evtx files are located into different parts:
def splitDirectory(root, file):
absolutePathOfFile = os.path.join(root, file)
filePathWithoutFilename = os.path.split(absolutePathOfFile)[0]
filenameWithExt = os.path.split(absolutePathOfFile)[1]
filenameWithoutExt = os.path.splitext(filenameWithExt)[0]
extension = os.path.splitext(filenameWithExt)[1]
return absolutePathOfFile, filePathWithoutFilename, filenameWithExt, filenameWithoutExt, extension
for root, subFolders, files in os.walk(directoryPath):
for f in files:
absolutePathOfFile, filePathWithoutFilename, filenameWithExt,
filenameWithoutExt, extension = splitDirectory(root, f)
if extension == ".evtx":
evtxFile(absolutePathOfFile, filenameWithExt, ".txt", _fromDate, _toDate)

Not writing into an Excel file

I'm reading the data from one file named SPD_file. Matching the data with another file named Custom. And all the records which are matching in both the files will be written into the third file.
But it seems that something is wrong, because the code is matching the records and printing on console. But when I'm writing into another file nothing is coming into the new file, other than the header.
workbook = xlrd.open_workbook(SPD_file)
worksheets = workbook.sheet_names()
mapping_records = {}
for worksheet_name in worksheets:
worksheet = workbook.sheet_by_name(worksheet_name)
mapping_record = MappingRecord()
if worksheet_name == "CD":
for curr_row in range(0,worksheet.nrows):
mapping_record = worksheet.row(curr_row)
print worksheet_name
print mapping_record[0].value
for curr_row in mapping_record:
#print "In Loop...."
spd_record = MappingRecord()
spd_record.id = "00002269"
spd_record.erocode = None
spd_record.scno = None
mapping_records[mapping_record[8]] = spd_record
print "Read SPD File....."
custom_file_name = "Custom_" + today.strftime('%Y-%m-%d') + ".csv"
custom_file = ops_home + path + "\\" + custom_file_name
custom = open(custom_file, 'rb')
reader = csv.reader(custom, delimiter=',', quotechar='"')
for line in reader:
if mapping_records.has_key(mapping_record[8]):
spd_record = mapping_records[mapping_record[8]]
if line[7] == "ERO Code":
spd_record.erocode = line[8]
elif line[7] == "Service Number":
spd_record.scno = line[8]
#create a new file.
New_file = ops_home + '\\Reports\\SPD_new_' + today.strftime('%d%m%Y') + '.xlsx'
workbook = xlsxwriter.Workbook(New_file)
# Add a bold format to use to highlight cells.
bold = workbook.add_format({'bold': 1})
money = workbook.add_format({'num_format': '#,##0.00'})
worksheetCd = workbook.add_worksheet("CD")
cdHeader = ("Merchant ID", "EroCode", "Service Number")
cd_row = 0
cd_col = 0
for columnHeader in cdHeader:
worksheetCd.write(cd_row, cd_col, columnHeader,bold)
cd_col += 1
for ctx in mapping_records:
spd_record = mapping_records[ctx]
if spd_record.payment_mode == "CRD":
cd_row += 1
cd_col = 0
cdRow = (spd_record.id, spd_record.erocode, spd_record.scno)
for columnData in cdRow:
if cd_col == 5 or cd_col == 19 or cd_col ==20 or cd_col ==21:
worksheetCd.write_number(cd_row, cd_col, columnData, money)
else:
worksheetCd.write(cd_row, cd_col, columnData)
cd_col += 1
workbook.close()

Categories