I have a program that is basically a library simulation, you can look up books, edit, delete, etc.
In my program I've initialized some default books into a class such as this:
class BookData:
def __init__(self):
self.bookTitle = ''
self.isbn = ''
self.author = ''
self.publisher = ''
self.dateAdded = ''
self.quantity = 0.0
self.wholesale = 0.0
self.retail = 0.0
def __str__(self):
return 'Title: {} ISBN: {} Author: {} ' \
'Publisher: {} Date Added: {} ' \
'Quantity: {} Wholesale Value: {} ' \
'Retail Value: {}'.format(self.bookTitle, self.isbn, self.author, self.publisher, self.dateAdded,
self.quantity, self.wholesale, self.retail)
An example of a book I have stored in the program:
book0.bookTitle, book0.isbn, book0.author, book0.publisher, book0.dateAdded, book0.quantity, book0.wholesale, book0.retail = "INTRODUCING PYTHON", "978-1-4493-5936-2", "Bill Lubanovic", "O'Reilly Media, Inc.", "11/24/2014", 25, 39.95, 50.00
Each book then gets appended into a list.
What I want to do is store all the books into a separate file so that it can be updated and edited within that file, but I don't quite get how to properly open the file, read each part (such as title, isbn, author) then in the main program make those into BookData objects and put them into a list.
I've considered either a plain .txt document with commas to format. I don't know if something like JSON or XML will make this easier.
Psuedo code example:
open(file):
for word in file:
create book with title, author, isbn, etc in file
append to list of books
Python natively supports CSV (comma separated values) files: Python documentation
An example would be:
import csv
books = []
with open('file.csv', newline = '') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
book = BookData()
book.bookTitle = row[0]
book.isbn = row[1]
However, that being said, it may be more constructive to change your constructor (ha ha) to take in a row, and then assign it directly:
def __init__(self, row):
self.bookTitle = row[0]
Related
I'm trying to figure out how to make the objects I create inside a definition appear on the same workspace as my "name == main" area, so I can call it in other definitions. I don't think I can use a simple "return" statement in my def, because my function is designed to create n-number of objects of a particular class.
def book_to_object(subfolders):
# converts my dictionary of book titles and volumes into individual book objects
for key, val in subfolders.items():
title = replace_spaces_with_underscores(key)
# check to see if book already exists in the registry
if title in [book.name for book in Book._registry]:
print("Book already exists")
else:
exec(title + " = Book(subfolders ,key, Fpath)")
The whole program below.
I'm calling this def from my if __name__ == '__main__': function
print("Done")
#converts my dictionary of book titles and volumes into individual book objects
book_to_object(subfolders)
Originally I had the code in my def in the name=main area of my code but I wanted to reduce the clutter and make it into a definition to give me more flexibility later.
The whole program below.
import os
#get date modified time of a file to determine which file is the newest
def get_date_modified(file):
return os.path.getmtime(file)
#replace spaces with underscores in a string
def replace_spaces_with_underscores(string):
aa = string.replace(" ", "_")
bb= aa.replace('-', '_')
cc= bb.replace("__", "_")
title = cc.replace("__", "_")
return title
# create a list of volumes that have not been exported
def get_unexported_volumes(self):
unexported_volumes = []
for volume in self.volumes:
if volume not in self.last_exported:
unexported_volumes.append(volume)
return unexported_volumes
def book_to_object(subfolders):
# converts my dictionary of book titles and volumes into individual book objects
for key, val in subfolders.items():
title = replace_spaces_with_underscores(key)
# check to see if book already exists in the registry
if title in [book.name for book in Book._registry]:
print("Book already exists")
else:
exec(title + " = Book(subfolders ,key, Fpath)")
class Book(object):
#__metaclass__ = IterBook
_registry = []
#constructor
def __init__(self, my_dict, key, Fpath):
self._registry.append(self)
self.name = key
self.volumes = my_dict[key]
self.Filepath = Fpath + "/" +self.name
#self.last_exported = self.volumes[0]
self.last_exported = ""
self.newest = self.volumes[-1]
self.last_downloaded = self.volumes[-1]
self.last_converted = self.volumes[-1]
self.last_exported_date = get_date_modified(self.Filepath + "/" + self.last_exported)
self.newest_date = get_date_modified(self.Filepath + "/" + self.newest)
self.last_downloaded_date = get_date_modified(self.Filepath + "/" + self.last_downloaded)
self.last_converted_date = get_date_modified(self.Filepath + "/" + self.last_converted)
self.last_exported_volume = self.last_exported
self.newest_volume = self.newest
self.last_downloaded_volume = self.last_downloaded
self.last_converted_volume = self.last_converted
self.last_exported_volume_date = self.last_exported_date
self.newest_volume_date = self.newest_date
self.last_downloaded_volume_date = self.last_downloaded_date
self.last_converted_volume_date = self.last_converted_date
self.last_exported_volume_name = self.last_exported
self.newest_volume_name = self.newest
self.unexported_volumes = get_unexported_volumes(self)
if __name__ == '__main__':
print("Starting")
# File Paths for Debugging purposes
Fpath = '~/'
F_Temp_path = '~/temp'
#parses directory for book based on folder names
subfolders = {'Encyclopedia': ['Vol.001', 'Vol.002', 'Vol.003', 'Vol.004', 'Vol.005'], 'Enginnering_Encyclopedia': ['Avionics', 'Civil', 'Electrical', 'Materials', 'Mechanical']}
print("Done")
#converts my dictionary of book titles and volumes into individual book objects
book_to_object(subfolders)
Notes:
In case anyone is wondering why I'm using objects instead of keeping everything in dictionaries, I need the flexibility that objects give.
Variables such as, file_paths and subfolders are dynamic and are based on what directory the user gives and what files are in that directory, for the purpose of asking the questions they have been hard coded so someone could copy and paste my code and reproduce the problem in question.
This will be hard to explain but please bare with me. I'm currently trying to complete a question for an online course and I have literally no idea what to do. The scenario of the question places me as a programmer in an office that needs to create a program assigning specific staff members that have attended a conference. The task gave me two text files; one text file titled "confPack.txt" that reads
Basic conference pack
Bonus conference pack
And another titled "employees.txt" that reads:
Williams,Mary,Y
Nguyen,Vinh,,Y
Kingsley,Margret
Kline,Bob,Y,Y
Mitchell,Frank,Y
Lowe,Elizabeth,Y,Y
Basically, I need to assign certain staff members to their appropriate group/"pack" based on how many days they have attended the conference. The "Y"'s in the employee.txt file represents the amount of days they have attend (one Y = one day of attendance).
The course question itself wants me to access the confpack.txt file and read the records into an array , access the employees.txt file and loop through the records (checking for the end of file) and use logical operators to select the appropriate conference attendees. They said it should be displayed like so:
Report date: [dd/mm/yyyy] *i've already displayed the time correctly
Attendee: [Surname, first name] Pack/s: [1 or 2 days pack], [both days pack]
And here is what my code looks like so far:
import datetime
dTime = datetime.datetime.now()
confFile = open("confPack.txt", "r+")
print("Report Date: "+ dTime.strftime("%d/%m/%Y"))
print(confFile.read())
with open("employees.txt", "r") as f:
data = f.readlines()
for line in data:
words = line.split(",")
print(words)
confFile.close()
Any help is appreciated. And if you're wondering why I can't contact my course teachers for help, believe me when I say that they are never online.
Edit: In regards #Adirio
I want the output to look like this:
Report Date: 7/9/2020
Attendee: [Williams, Mary] Pack/s: [Basic Conference Pack]
Attendee: [Nguyen, Vinh] Pack/s: [Basic Conference Pack]
Attendee: [Kingsley, Margret] Pack/s: [N/A]
Attendee: [Kline, Bob] Pack/s: [Bonus Conference Pack]
Attendee: [Mitchell, Frank] Pack/s: [Basic Conference Pack]
Attendee: [Lowe, Elizabeth] Pack/s: [Bonus Conference Pack]
Edit #2: Thanks again #Adirio for the answer. However, I actually needed to access the confPack.txt file which reads:
Basic Conference Pack
Bonus Conference Pack
and the print out either the Basic or Bonus conference Pack for its employee.
from datetime import datetime
class Employee:
def __init__(self, surname, name, *args):
self.name = name.strip()
self.surname = surname.strip()
self.days = 0
for arg in args:
if arg.strip() == 'Y':
self.days += 1
now = datetime.now()
print("Report Date: " + now.strftime("%d/%m/%Y"))
#Here i've tried making a .readlines variable to print out the specific conference pack
conf = open("confPack.txt")
all_lines = conf.readlines()
with open("employees.txt", "r") as f:
employees = []
for line in f.readlines():
if len(line.strip()) != 0:
employees.append(Employee(*line.split(",")))
for employee in employees:
print(f'Attendee: [{employee.surname}, {employee.name}]', end=' ')
if employee.days == 2:
print("Pack/s: [" + all_lines[2]+"]")
elif employee.days == 1:
print("Pack/s: [" + all_lines[0]+"]")
else:
print("Pack/s: [N/A]")
Output:
Report Date: 09/09/2020
Attendee: [Williams, Mary] Pack/s: [Basic conference pack
] #As you can see, it prints on a new line
Attendee: [Nguyen, Vinh] Pack/s: [Basic conference pack
]
Attendee: [Kingsley, Margret] Pack/s: [N/A]
Attendee: [Kline, Bob] Pack/s: [Bonus conference pack]
Attendee: [Mitchell, Frank] Pack/s: [Basic conference pack
]
Attendee: [Lowe, Elizabeth] Pack/s: [Bonus conference pack]
Process finished with exit code 0
First I will clean your original code a bit, removing the file you are opening and closing for nothing and using the a with clause for the other as it is a very healthy pattern.
from datetime import datetime
now = datetime.now()
print("Report Date: " + now.strftime("%d/%m/%Y"))
with open("confPack.txt", "r+") as confFile:
print(confFile.read())
with open("employees.txt", "r") as f:
for line in f.readlines():
words = line.split(",")
print(words)
Now let's get to work. We will create a class that represents each employee:
class Employee:
def __init__(self, surname, name, *args):
self.name = name
self.surname = surname
self.days = 0
for arg in args:
if arg.strip() == 'Y':
self.days += 1
The __init__ method accepts the arguments as they are read from the file (surname, name, and a sequence of 'Y'). The name and surname are assigned directly while the rest of the arguments are stored in a list called args. We loop through this list adding 1 day to the day counter if it is equal to 'Y'. The .strip() part removes leading and trailing whitespaces so that we can compare to 'Y' safely.
So all together:
from datetime import datetime
class Employee:
def __init__(self, surname, name, *args):
self.name = name.strip()
self.surname = surname.strip()
self.days = 0
for arg in args:
if arg.strip() == 'Y':
self.days += 1
print("Report Date: " + datetime.now().strftime("%d/%m/%Y"))
with open("confPack.txt", "r+") as f:
packs = ['N/A']
for line in f.readlines():
if len(line.strip()) != 0:
packs.append(line.strip())
with open("employees.txt", "r") as f:
employees = []
for line in f.readlines():
if len(line.strip()) != 0:
employees.append(Employee(*line.split(",")))
# Do whatever you need with the employee list
for employee in employees:
print(f"Attendee: [{employee.surname}, {employee.name}] Pack/s: [{packs[employee.days]}]")
We could also make the part where we open the files shorter by using list comprehensions:
with open("confPack.txt", "r+") as f:
packs = ['N/A'] + [line.strip() for line in f.readlines() if len(line.strip())]
with open("employees.txt", "r") as f:
employees = [Employee(line.split(",")) for line in f.readlines() if len(line.strip())]
I would like to create an approach where I can define the structure of a CSV file (obvious extension to excel should follow) where there is a row definition as well has the header. In this approach a simple re-ordering of the definition will move the columns in the output.
My first attempt was to use a namedtuple. Actually handled most of my needs but I can't create an empty row the populate it as needed. I tried to use a recordclass but have much the same problem.
My output file might have > 30 columns so it gets very sloppy to have to create a new instance with a bunch of Nones. I also want to be able to add a column to the structure without having to update the __init__, etc.
My idea pseudo-code (using namedtuples for illustration) would be:
class TableRow(namedtuple(TableRow, "id name password hostip"))
__slots__ = ()
class TableRowHeader:
def __init__(self):
header = TableRow()
header.id = 'ID'
header.name = "Name"
header.password = "Password"
header.hostip = "Host IP"
class OutputTable():
def __init__(self):
self.header = TableRowHeader()
self.rows = list()
def add(self, new_row):
# Example assumes new_row is an instance of TableRow
self.rows.append(new_row)
def to_csv(self, file_name):
with open(file_name, 'w') as csv_file:
# creating a csv writer object
csv_writer = csv.writer(csv_file)
# writing the fields
csv_writer.writerow(self.header)
for row in sorted(self.rows):
csv_writer.writerow(row)
outtable = OutputTable()
row = TableRow()
row.id = 1
row.name = 'Matt'
row.hostip = '10.0.0.1'
row.password = 'obvious'
outtable.add(row)
outtable.to_csv('./example.csv')
I like the pattern but can't figure out a clean way to handle this in Python.
Do you want something like that?
import csv
from collections import namedtuple
TableRowShort = namedtuple('TableRow', "id name password hostip")
TableRowFull = namedtuple('TableRowFull', "id name password hostip description source admin_name")
class TableRowOptional:
def __init__(self, id, name, password=None, hostip=None, description=None, source=None, admin_name=None):
super().__init__()
self.id = id
self.name = name
self.password = password
self.hostip = hostip
self.description = description
self.source = source
self.admin_name = admin_name
class OutputTable():
def __init__(self):
self.headers = []
self.rows = list()
def add(self, row):
if hasattr(row, '_asdict'):
value = row._asdict()
elif hasattr(row, '__dict__'):
value = row.__dict__
elif isinstance(row, dict):
value = row
else:
raise ValueError('Not supported row type: {}'.format(type(row)))
for header in value.keys():
if header not in self.headers:
self.headers.append(header)
self.rows.append(value)
def to_csv(self, file_name):
with open(file_name, 'w') as csv_file:
# creating a csv writer object
csv_writer = csv.writer(csv_file)
# writing the fields
csv_writer.writerow(self.headers)
for row in self.rows:
csv_writer.writerow([row.get(header, None) for header in self.headers])
outtable = OutputTable()
outtable.add(TableRowShort(1, 'Matt', 'obvious', '10.0.0.1'))
outtable.add(TableRowFull(2, 'Maria', 'obvious as usual', '10.1.0.1', 'some description', 'localnet', 'super_admin'))
outtable.add(TableRowOptional(3, 'Maria', hostip='10.1.0.1', description='some description', source='localnet'))
outtable.add({
'id': 1337,
'name': 'hacker',
'hostip': '127.0.0.1',
'extra': "I've hacked you guys lol!",
})
outtable.to_csv('./example.csv')
This solution provides you interface to store some "prepared namedtuples, normal objects (using __dict__ interface) and raw dict objects as rows. It manages CSV headers automatically based on provided rows structures :)
Looks pretty clear & useful to me. What do you think?
Output CSV
# > cat example.csv
id,name,password,hostip,description,source,admin_name,extra
1,Matt,obvious,10.0.0.1,,,,
2,Maria,obvious as usual,10.1.0.1,some description,localnet,super_admin,
3,Maria,,10.1.0.1,some description,localnet,,
1337,hacker,,127.0.0.1,,,,I've hacked you guys lol!
The initial code can be rewriten as follows using recordclass library:
import csv
from recordclass import make_dataclass
TableRow = make_dataclass(
'TableRow',
"id name password hostip description source admin_name",
defaults=5*(None,),
iterable=True)
class OutputTable():
def __init__(self):
self.header = TableRow(*TableRow.__fields__)
self.rows = list()
def add(self, new_row):
# Example assumes new_row is an instance of TableRow
self.rows.append(new_row)
def to_csv(self, file_name):
with open(file_name, 'w') as csv_file:
# creating a csv writer object
csv_writer = csv.writer(csv_file)
# writing the fields
csv_writer.writerow(self.header)
for row in sorted(self.rows):
csv_writer.writerow(row)
outtable = OutputTable()
outtable.add(TableRow(1, 'Matt', 'obvious', '10.0.0.1'))
outtable.add(TableRow(2, 'Maria', 'obvious as usual', '10.1.0.1', 'some description', 'localnet', 'super_admin'))
outtable.add(TableRow(3, 'Maria', hostip='10.1.0.1', description='some description', source='localnet'))
outtable.to_csv('./example.csv')
The result will be:
id,name,password,hostip,description,source,admin_name
1,Matt,obvious,10.0.0.1,,,
2,Maria,obvious as usual,10.1.0.1,some description,localnet,super_admin
3,Maria,,10.1.0.1,some description,localnet,
Is a dictionary the correct way to be doing this? Ideally this will be more then 5+ deep. Sorry my only language experience is powershell there I would just make an array of object. Im not looking for someone to write the code I just wanna know if there is a better way?
Thanks
Cody
My Powershell way:
[$title1,$title2,$title3]
$titleX.comment = "comment here"
$titleX.comment.author = "bob"
$titleX.comment.author.karma = "200"
$titleX.comment.reply = "Hey Bob love your comment."
$titleX.comment.reply.author = "Alex"
$titleX.comment.reply.reply = "I disagree"
#
Python code Borken:
import praw
d = {}
reddit = praw.Reddit(client_id='XXXX',
client_secret='XXXX',
user_agent='android:com.example.myredditapp:'
'v1.2.3 (by /u/XXX)')
for submission in reddit.subreddit('redditdev').hot(limit=2):
d[submission.id] = {}
d[submission.id]['comment'] = {}
d[submission.id]['title']= {}
d[submission.id]['comment']['author']={}
d[submission.id]['title'] = submission.title
mySubmission = reddit.submission(id=submission.id)
mySubmission.comments.replace_more(limit=0)
for comment in mySubmission.comments.list():
d[submission.id]['comment'] = comment.body
d[submission.id]['comment']['author'] = comment.author.name
print(submission.title)
print(comment.body)
print(comment.author.name)
print(d)
File "C:/git/tensorflow/Reddit/pull.py", line 23, in <module>
d[submission.id]['comment']['author'] = comment.author.name
TypeError: 'str' object does not support item assignment
#
{'6xg24v': {'comment': 'Locking this version. Please comment on the [original post](https://www.reddit.com/r/changelog/comments/6xfyfg/an_update_on_the_state_of_the_redditreddit_and/)!', 'title': 'An update on the state of the reddit/reddit and reddit/reddit-mobile repositories'}}
I think your approach using a dictionary is okay, but you might also solve this by using a data structure for your posts: Instead of writing
d[submission.id] = {}
d[submission.id]['comment'] = {}
d[submission.id]['title']= {}
d[submission.id]['comment']['author']={}
d[submission.id]['title'] = submission.title
you could create a class Submission like this:
class Submission(object):
def __init__(self, id, author, title, content):
self.id = id
self.author = author
self.title = title
self.content = content
self.subSubmissions = {}
def addSubSubmission(self,submission):
self.subSubmission[submission,id] = submission
def getSubSubmission(self,id):
return self.subSubmission[id]
by using you could change your code to this
submissions = {}
for sm in reddit.subreddit('redditdev').hot(limit=2):
submissions[sm.id] = Submission(sm.id, sm.author, sm.title, sm.content)
# I am not quite sure what these lines are supposed to do, so you might be able to improve these, too
mySubmission = reddit.submission(id=sm.id)
mySubmission.comments.replace_more(limit=0)
for cmt in mySubmission.comments.list():
submissions[sm.id].addSubSubmission(Submission(cmt.id, cmt.title, cmt.author, cmt.body))
By using this apporach you are also able to export the code to readout the comments/subSubmissions into an extra function which can call itself recursively, so that you can read infitive depths of the comments.
I am trying to extract raw data from a text file and after processing the raw data, I want to export it to another text file. Below is the python code I have written for this process. I am using the "petl" package in python 3 for this purpose. 'locations.txt' is the raw data file.
import glob, os
from petl import *
class ETL():
def __init__(self, input):
self.list = input
def parse_P(self):
personids = None
for term in self.list:
if term.startswith('P'):
personids = term[1:]
personid = personids.split(',')
return personid
def return_location(self):
location = None
for term in self.list:
if term.startswith('L'):
location = term[1:]
return location
def return_location_id(self, location):
location = self.return_location()
locationid = None
def return_country_id(self):
countryid = None
for term in self.list:
if term.startswith('C'):
countryid = term[1:]
return countryid
def return_region_id(self):
regionid = None
for term in self.list:
if term.startswith('R'):
regionid = term[1:]
return regionid
def return_city_id(self):
cityid = None
for term in self.list:
if term.startswith('I'):
cityid = term[1:]
return cityid
print (os.getcwd())
os.chdir("D:\ETL-IntroductionProject")
print (os.getcwd())
final_location = [['L','P', 'C', 'R', 'I']]
new_location = fromtext('locations.txt', encoding= 'Latin-1')
stored_list = []
for identifier in new_location:
if identifier[0].startswith('L'):
identifier = identifier[0]
info_list = identifier.split('_')
stored_list.append(info_list)
for lst in stored_list:
tabling = ETL(lst)
location = tabling.return_location()
country = tabling.return_country_id()
city = tabling.return_city_id()
region = tabling.return_region_id()
person_list = tabling.parse_P()
for person in person_list:
table_new = [location, person, country, region, city]
final_location.append(table_new)
totext(final_location, 'l1.txt')
However when I use "totext" function of petl, it throws me an "Assertion Error".
AssertionError: template is required
I am unable to understand what the fault is. Can some one please explain the problem I am facing and what I should be doing ?
The template parameter to the toext function is not optional there is no default format for how the rows are written in this case, you must provide a template. Check the doc for toext here for an example: https://petl.readthedocs.io/en/latest/io.html#text-files
The template describes the format of each row that it writes out using the field headers to describe things, you can optionally pass in a prologue to write the header too. A basic template in your case would be:
table_new_template = "{L} {P} {C} {R} {I}"
totext(final_location, 'l1.txt', template=table_new_template)