Python dictionary editing entries - python

def replace_acronym(): # function not yet implemented
#FIND
for abbr, text in acronyms.items():
if abbr == acronym_edit.get():
textadd.insert(0,text)
#DELETE
name = acronym_edit.get().upper()
name.upper()
r =dict(acronyms)
del r[name]
with open('acronym_dict.py','w')as outfile:
outfile.write(str(r))
outfile.close() # uneccessary explicit closure since used with...
message ='{0} {1} {2} \n '.format('Removed', name,'with its text from the database.')
display.insert('0.0',message)
#ADD
abbr_in = acronym_edit.get()
text_in = add_expansion.get()
acronyms[abbr_in] = text_in
# write amended dictionary
with open('acronym_dict.py','w')as outfile:
outfile.write(str(acronyms))
outfile.close()
message ='{0} {1}:{2}{3}\n '.format('Modified entry', abbr_in,text_in, 'added')
display.insert('0.0',message)
I am trying to add the functionality of editing my dictionary entries in my tkinter widget. The dictionary is in the format {ACRONYM: text, ACRONYM2: text2...}
What I thought the function would achieve is to find the entry in the dictionary, delete both the acronym and its associated text and then add whatever the acronym and text have been changed to. What happens is for example if I have an entry TEST: test and I want to modify it to TEXT: abc what is returned by the function is TEXT: testabc - appending the changed text although I have (I thought) overwritten the file.
What am I doing wrong?

That's a pretty messy lookin' function. The acronym replacement itself can be done pretty simple:
acronyms = {'SONAR': 'SOund Navigation And Ranging',
'HTML': 'HyperText Markup Language',
'CSS': 'Cascading Style Sheets',
'TEST': 'test',
'SCUBA': 'Self Contained Underwater Breathing Apparatus',
'RADAR': 'RAdio Detection And Ranging',
}
def replace_acronym(a_dict,check_for,replacement_key,replacement_text):
c = a_dict.get(check_for)
if c is not None:
del a_dict[check_for]
a_dict[replacement_key] = replacement_text
return a_dict
new_acronyms = replace_acronym(acronyms,'TEST','TEXT','abc')
That works perfect for me (in Python 3). You could just call this in another function that writes the new_acronyms dict into the file or do whatever else you want with it 'cause it's no longer tied to just being written to the file.

Related

What would be the best way to cycle through an array of elements from HTML in order to use 2 separate tag names in the order they came?

Not really sure how to word this question properly, but I'm basically playing around with python and using Selenium to scrape a website and I'm trying to create a JSON file with the data.
Here's the goal I'm aiming to achieve:
{
"main1" : {
"sub1" : "data",
"sub2" : "data",
"sub3" : "data",
"sub4" : "data"
},
"main2" : {
"sub1" : "data",
"sub2" : "data",
"sub3" : "data",
"sub4" : "data"
}
}
The problem I'm facing at the moment is that the website has no indentation or child elements. It looks like this (but longer and actual copy, of course):
<h3>Main1</h3>
<p>Sub1</p>
<p>Sub2</p>
<p>Sub3</p>
<p>Sub4</p>
<h3>Main2</h3>
Now I want to iterate through the HTML in order to use the <h3> tags as the parent ("Main" in the JSON example) and <p> tags as the children(sub[num]). I'm new to both python and Selenium, so I may have done this wrong, but I've tried using items.find_elements_by_tag_name('el') to separate two, but I don't know how to put them back together in the order that they originally came.
I then tried looping through all the elements and separating the tags using if (item.tag_name == "el"): loops. This works perfectly when I print the results of each loop, but when it comes to putting them together in a JSON file, I have the same issue as the previous method where I cannot seem to get the 2 to join. I've tried a few variations and I either get key errors or only the last item in the loop gets recorded.
Just for reference, here's the code for this step:
items = browser.find_element_by_xpath(
'//*[#id="main-content"]') #Main Content
itemList = items.find_elements_by_xpath(".//*")
statuses = [
"Status1",
"Status2",
"Status3",
"Status4"
]
for item in itemList: #iterate through the HTML
if (item.tag_name == "h3"): #Separate H3 Tags
main = item.text
print("======================================")
print(main)
print("======================================")
if (item.tag_name == 'p'): #Separate P tags
for status in statuses:
if(status in item.text): #Filter P tags to only display info that contains words in the Status array
delimeters = ":", "(", "See"
regexPattern = "|".join(map(re.escape, delimeters))
zoneData = re.split(regexPattern, item.text)
#Split P tags into separate parts
sub1 = zoneData[0]
sub2 = zoneData[1].translate({ord('*'): None})
sub3 = zoneData[2].translate({ord(")"): None})
print(sub1)
print(sub2)
print(sub3)
The final option I've decided to try is to try going through all the HTML again, but using enumerate() and using the element's IDs and including all the tags between the 2 IDs, but I'm not really sure what my plan of action is with this just yet.
In general, the last option seems a bit convoluted and I'm pretty certain there's a simpler way to do this. What would you suggest?
Here's my idea, but I didn't do the data part, you can add it later.
I assume that there's no duplicate in main name, or else you will lose some info.
items = browser.find_element_by_xpath(
'//*[#id="main-content"]') #Main Content
itemList = items.find_elements_by_xpath(".//p|.//h3") # only finds h3 or p
def construct(item_list):
current_main = ''
final_dict: dict = {}
for item in item_list:
if item.tag_name == "h3":
current_main = item.text
final_dict[current_main] = {} # create empty dict inside main. remove if you want to update the main dict
if item.tag_name == "p":
p_name = item.text
final_dict[current_main][p_name] = "data"
return final_dict

How to make a Python program automatically prints what matched after iterating through lists

I have this Python code:
with open('save.data') as fp:
save_data = dict([line.split(' = ') for line in fp.read().splitlines()])
with open('brute.txt') as fp:
brute = fp.read().splitlines()
for username, password in save_data.items():
if username in brute:
break
else:
print("didn't find the username")
Here is a quick explanation; the save.data is a file that contains variables of Batch-file game (such as username, hp etc...) and brute.txt is a file that contains "random" strings (like what seen in wordlists used for brute-force).
save.data:
username1 = PlayerName
password1 = PlayerPass
hp = 100
As i said before, it's a Batch-file game so, no need to quote strings
brute.txt:
username
usrnm
username1
password
password1
health
hp
So, let's assume that the Python file is a "game hacker" that "brute" a Batch-file's game save file in hope of finding matches and when it does find, it retrieves them and display them to the user.
## We did all the previous code
...
>>> print(save_data["username1"])
PlayerName
Success! we retrieved the variables! But I want to make the program capable of displaying the variables it self (because I knew that "username1" was the match, that's why I chose to print it). What I mean is, I want to make the program print the variables that matched. E.g: If instead of "username1" in save.data there was "usrnm", it will surely get recognized after the "bruting" process because it's already in brute.txt. So, how to make the program print what matched? because I don't know if it's "username" or "username1" etc... The program does :p (of course without opening save.data) And of course that doesn't mean the program will search only for the username, it's a game and there should be other variables like gold/coins, hp etc... If you didn't understand something, kindly comment it and I will clear it up, and thanks for your time!
Use a dict such as this:
with open('brute.txt', 'r') as f:
# First get all the brute file stuff
lookup_dic = {word.strip(): None for word in f.readlines()}
with open('save.data', 'r') as f:
# Update that dict with the stuff from the save.data
lines = (line.strip().split(' = ') for line in f.readlines())
for lookup, val in lines:
if lookup in lookup_dic:
print(f"{lookup} matched and its value is {val}")
lookup_dic[lookup] = val
# Now you have a complete lookup table.
print(lookup_dic)
print(lookup_dic['hp'])
Output:
username1 matched and its value is PlayerName
password1 matched and its value is PlayerPass
hp matched and its value is 100
{'username': None, 'usrnm': None, 'username1': 'PlayerName', 'password': None, 'password1': 'PlayerPass','health': None, 'hp': '100'}
100

How to Make Reusable Functions in Python

I am new to programming, and to Python. I have written a simple random quote generator which loads various categories of quotes as lists into a dictionary. It then randomly chooses a list and then a specific quote from the list and outputs it to the screen. It is mostly complete but I am looking for ways to clean up the code to make it more efficient. Right now I have a set of 14 different categories that the user can select from to populate the dictionary. Each one of those category selections calls a function to update the dictionary and the config.ini file to save the user preferences. That results in hundreds of lines of near-identical code, where the only differences are the specific category and files in use. I am looking for a way to rewrite it so that the same function can be reused each time and simply pass in the correct information to make it work. I have posted snippets of the relevant code below. I am using Python 3.6 and TKinter. Thank you for any help you can provide.
Adversity check button to call update_adversity function and add/remove adversity category quotes to/from the dictionary
self.adversity = BooleanVar()
self.adv = Checkbutton(self, text = 'Adversity/Hardship', variable = self.adversity, command = self.update_adversity)
self.adv.grid(row = 1, column = 0, sticky = 'W', padx = 0, pady = 0)
if 'adversity' in quotes:
self.adversity.set(1)
elif 'adversity' not in quotes:
self.adversity.set(0)
add/remove adversity list in dictionary based on checkbutton value
def update_adversity(self):
if self.adversity.get() == True:
config.set('categories', 'adversity', 'True') # updates config file
with open('adversity.py', 'r', encoding = 'UTF8') as f:
new_quotes_added = f.readlines()
quotes['adversity'] = new_quotes_added
try:
del quotes['default']
config.set('categories', 'default', 'False') # updates config file
return quotes
except:
return quotes
elif self.adversity.get() == False:
config.set('categories', 'adversity', 'False') # updates config file
try:
del quotes['adversity']
if quotes == {}:
with open('default.py', 'r', encoding = 'UTF8') as f:
quotes['default'] = f.readlines()
config.set('categories', 'default', 'True') # updates config file
return quotes
else:
return quotes
except:
return quotes

PyQt5: Problems naming a row after using insertRow()

I am trying to create a button that adds a row to a table (QtTableWidget) and uses a dialog box to ask for the name, and I have hit a big problem (seemingly a flaw within PyQt).
By adding a row using the insertRow() function the row header automatically has a value of none, which then means you cannot use the verticalHeaderItem(rowPosition).setText(...) on the row Header as it cannot set the text of an item with value none.
The relevant code is here:
def RenameRow(self, i, name):
self.tab1table.verticalHeaderItem(i).setText(name)
def DatabaseAddRow(self):
text, ok = QInputDialog.getText(self, "Row Entry", 'Please Enter A Row Name:', QLineEdit.Normal, 'e.g. ECN 776')
if ok and text != '':
rowPosition = self.tab1table.rowCount()
self.tab1table.insertRow(rowPosition)
self.RenameRow(rowPosition, text)
Any Ideas how to get around this or maybe methods I do not know about?
So I managed to solve this myself just after asking this after wasting half a day on this problem, such is life. The solution to the problem is to assign an empty item to the header and then rename it, the implementation is here:
def RenameRow(self, i, name, table):
item = QTableWidgetItem()
table.setVerticalHeaderItem(i, item)
item = table.verticalHeaderItem(i)
item.setText(QCoreApplication.translate("MainWindow", name))

Convert pango markup string to GtkTextTag properties

I've got a gtk.TextView that I'd like to add markup-like text to. I know this can be achieved through the use of gtk.TextTag which you can create with similar properties as a pango markup string. I noticed there is no easy way to just say set_markup to a gtk.TextBuffer much like you can with multiple other widgets. Instead you have to create a TextTag, give it properties, and then insert it into the TextBuffer's TagTable specifying the iters that the tag applies to.
I'd ideally like to create a function that can convert a pango markup string into a TextTag to get the same effect. But gtk doesn't appear to have that functionality built-in.
I've noticed that you can use pango.parse_markup() on a marked up string and it will create a pango.AttributeList which contains information regarding the properties set on the string and the indices that they occur at. But there are slight differences in each type of attribute that make it difficult to generalize for every case. Is there a better way to go about this? Or is pango markup just not meant to be converted into gtk.TextTag's?
I finally worked out my own solution to this problem. I created a function that parses the markup string (using pango.parse_markup). Through reading the documentation and python introspection, I was able to work out how to take pango.Attribute and turn convert it into properties that a GtkTextTag can use.
Here's the function:
def parse_markup_string(string):
'''
Parses the string and returns a MarkupProps instance
'''
#The 'value' of an attribute...for some reason the same attribute is called several different things...
attr_values = ('value', 'ink_rect', 'logical_rect', 'desc', 'color')
#Get the AttributeList and text
attr_list, text, accel = pango.parse_markup( string )
attr_iter = attr_list.get_iterator()
#Create the converter
props = MarkupProps()
props.text = text
val = True
while val:
attrs = attr_iter.get_attrs()
for attr in attrs:
name = attr.type
start = attr.start_index
end = attr.end_index
name = pango.AttrType(name).value_nick
value = None
#Figure out which 'value' attribute to use...there's only one per pango.Attribute
for attr_value in attr_values:
if hasattr( attr, attr_value ):
value = getattr( attr, attr_value )
break
#There are some irregularities...'font_desc' of the pango.Attribute
#should be mapped to the 'font' property of a GtkTextTag
if name == 'font_desc':
name = 'font'
props.add( name, value, start, end )
val = attr_iter.next()
return props
This function creates a MarkupProps() object that has the ability to generate GtkTextTags along with the index in the text to apply them to.
Here's the object:
class MarkupProps():
'''
Stores properties that contain indices and appropriate values for that property.
Includes an iterator that generates GtkTextTags with the start and end indices to
apply them to
'''
def __init__(self):
'''
properties = ( {
'properties': {'foreground': 'green', 'background': 'red'}
'start': 0,
'end': 3
},
{
'properties': {'font': 'Lucida Sans 10'},
'start': 1,
'end':2,
},
)
'''
self.properties = []#Sequence containing all the properties, and values, organized by like start and end indices
self.text = ""#The raw text without any markup
def add( self, label, value, start, end ):
'''
Add a property to MarkupProps. If the start and end indices are already in
a property dictionary, then add the property:value entry into
that property, otherwise create a new one
'''
for prop in self.properties:
if prop['start'] == start and prop['end'] == end:
prop['properties'].update({label:value})
else:
new_prop = {
'properties': {label:value},
'start': start,
'end':end,
}
self.properties.append( new_prop )
def __iter__(self):
'''
Creates a GtkTextTag for each dict of properties
Yields (TextTag, start, end)
'''
for prop in self.properties:
tag = gtk.TextTag()
tag.set_properties( **prop['properties'] )
yield (tag, prop['start'], prop['end'])
So with this function and the MarkupProps object, I am able to, given a pango markup string, breakdown the string into it's properties, and text form, and then convert that into GtkTextTags.
Haven't followed GTK+ development, maybe they added something lately, but see these bugs: #59390 and #505478. Since they are not closed, likely nothing is done.

Categories