Related
So i need to store input from system and trigger code block when given command via input matches with condition. Given commands are randomly produced by system and its not same everytime when codes are executed. What i do below is; i store input in a list until input become blankspace and which shows commands are over and it specifically stated in statement that commands will end with blankspace after last command. Read commands, input and values from that command list until there is no command to perform. I know this is bad practice. Since I am newb in this language i need some advice to change my code. Thanks in advance. Btw i cant change conditions in if statements as given commands via input is not the same but like this and more:
append_it 15
insert_it 0 25
remove_it 30
Code works just fine i need advice to make it good code practice to improve myself in Python.
i = 0
command_list = []
while True:
command = input('')
if command == '':
break
command_list.append(command)
i += 1
b = 0
arr = []
while i != b:
command1 = command_list[b]
b += 1
if command1[0:8] == "append_it":
value = int(command1[9:])
arr.append(value)
elif command1[0:4] == "insert_it":
index = int(command1[5:7])
value = int(command1[7:])
arr.insert(index, value)
elif command1[0:3] == "remove_it":
value = int(command1[4:])
if value in liste:
arr.remove(value)
elif command1[0:] == "print_it":
print(arr)
elif command1[0:] == "reverse_it":
arr.reverse()
elif command1[0:] == "sort_it":
arr.sort()
elif command1[0:] == "pop_it":
arr.pop()
You can improve by defining actions to do in a dictionary, adding the inputted values as splitted list and call the appropriate function for the appropriate input:
def appendit(a, *prms):
v = int(prms[0])
a.append(v)
def insertit(a, *prms):
i = int(prms[0])
v = int(prms[1])
a.insert(i,v)
def removeit(a, *prms):
v = int(prms[0])
a.remove(v) # no need to test
def reverseit(a): a.reverse()
def sortit(a): a.sort()
def popit(a): a.pop()
# define what command to run for what input
cmds = {"append_it" : appendit,
"insert_it" : insertit,
"remove_it" : removeit,
"print_it" : print, # does not need any special function
"reverse_it": reverseit,
"sort_it" : sortit,
"pop_it" : popit}
command_list = []
while True:
command = input('')
if command == '':
break
c = command.split() # split the command already
# only allow commands you know into your list - they still might have the
# wrong amount of params given - you should check that in the functions
if c[0] in cmds:
command_list.append(c)
arr = []
for (command, *prms) in command_list:
# call the correct function with/without params
if prms:
cmds[command](arr, *prms)
else:
cmds[command](arr)
Output:
# inputs from user:
append_it 42
append_it 32
append_it 52
append_it 62
append_it 82
append_it 12
append_it 22
append_it 33
append_it 12
print_it # 1st printout
sort_it
print_it # 2nd printout sorted
reverse_it
print_it # 3rd printout reversed sorted
pop_it
print_it # one elem popped
insert_it 4 99
remove_it 42
print_it # 99 inserted and 42 removed
# print_it - outputs
[42, 32, 52, 62, 82, 12, 22, 33, 12]
[12, 12, 22, 32, 33, 42, 52, 62, 82]
[82, 62, 52, 42, 33, 32, 22, 12, 12]
[82, 62, 52, 42, 33, 32, 22, 12]
[82, 62, 52, 99, 33, 32, 22, 12]
import re
import os
import sys
class Marks:
def __init__(self):
self.marks = []
self.marks_file = '/root/projectpython/mark.txt'
def loadAll(self):
file = open(self.marks_file, 'r')
for line in file.readlines():
name,math,phy,chem = line.strip().split()
name=name
math=int(math)
phy=int(phy)
chem=int(chem)
self.marks=[name,math,phy,chem]
print(self.marks)
file.close()
def percent(self):
dash = '-' * 40
self.loadAll()
for n in self.marks:
print(n)
Book_1 = Marks()
Book_1.percent()
output:-
['gk', 50, 40, 30]
['rahul', 34, 54, 30]
['rohit', 87, 45, 9]
rohit
87
45
9
but i want to print all value in tabular format,it showing only last record.
is it correct method to use list to store student data name and marks.
problem here is with the line read
self.marks=[name,math,phy,chem]
this will keep reinitializing the list each time mark is read
instead use:
self.marks.append([name,math,phy,chem])
You continue to initialize the list in the for statement
and declare it so that only the array value of the last line is reflected.
I think you can remove the initialization statement and process it as an append.
import re
import os
import sys
class Marks:
def __init__(self):
self.marks = []
self.marks_file = '/root/projectpython/mark.txt'
def loadAll(self):
file = open(self.marks_file, 'r')
for line in file.readlines():
name,math,phy,chem = line.strip().split()
name=name
math=int(math)
phy=int(phy)
chem=int(chem)
self.marks.append(name)
self.marks.append(math)
self.marks.append(phy)
self.marks.append(chem)
# self.marks=[name,math,phy,chem]
print(self.marks)
file.close()
def percent(self):
dash = '-' * 40
self.loadAll()
for n in self.marks:
print(n)
Book_1 = Marks()
Book_1.percent()
Make self.marks=[name,math,phy,chem] as self.marks.append([name,math,phy,chem]).
Then easiest solution is to transpose the self.marks list and print them.
suppose your marks list is [['gk', 50, 40, 30],['rahul', 34, 54, 30],['rohit', 87, 45, 9]] then simply transpose it.
print(marks)
transposed=list(zip(*marks))
print(transposed)
for x in transposed:
print(x)
output :
[['gk', 50, 40, 30], ['rahul', 34, 54, 30], ['rohit', 87, 45, 9]] #marks list
[('gk', 'rahul', 'rohit'), (50, 34, 87), (40, 54, 45), (30, 30, 9)] #transposed list
('gk', 'rahul', 'rohit') # output the way you want
(50, 34, 87)
(40, 54, 45)
(30, 30, 9)
Its working now.
i was doing mistake earlier here only self.marks.append([name,math,phy,chem])
[['gk', 50, 40, 30], ['rahul', 34, 54, 30], ['rohit', 87, 45, 9]]
from multiprocessing import Process , Queue
from datetime import datetime
c = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
out = Queue()
def support(m):
for k in m :
print "%s <-- hi" % k
out.put("done")
all = Queue()
temp = []
total = len(c)
count = 0
for m in c :
count += 1
total = total - 1
temp.append(m)
if count == 5 or total == 0 :
all.put(temp)
count = 0
temp = []
process_count = 3
while all.qsize() != 0 :
process_list = []
try :
for x in range(process_count) :
p = Process(target=support, args=(all.get(),))
process_list.append(p)
for p in process_list :
p.start()
for p in process_list :
p.join()
except Exception as e :
print e
while out.qsize != 0 :
print out.get()
print "all done"
I dont know why it does not end and does not print "all done" , just remain continuously in loop or keep executing .
Will be of great help if you can make this code more efficient but first i want to know why it does not end .
The problem is:
while out.qsize != 0 :
print out.get()
out.qsize is a function, so now you're comparing the function itself (not the return value!) with 0, with is of course always False.
You should use:
while out.qsize() != 0 :
print out.get()
I'm using the collection.Counter function to create a dictionary of paths to its listing of its mime types in order. It's a great little module, however the Counter doesn't change it's values from path to path.
I have a dictionary called package_mime_types that each entry looks like this:
package_mime_types['/a/path/to/somewhere'] = [('text/plain'),('text/plain'),('application/msword')]...
As you can imagine, the values in that dictionary are very long. I'm trying to convert it to a listing like this:
package_mime_types['/a/path/to/somewhere'] = ['text/plain':780, 'application/msword':400, 'audio/mp3':30]
This is my little iteration that's supposed to do that:
for package_path, mime_types_list in package_mime_types.items():
c = collections.Counter(mime_types_list)
package_mime_types[package_path] = c
return package_mime_types
The end result works, but all the Counter arrays are the exact same for each path.
/path1/ relates to Counter({'text/plain': 2303, 'audio/x-wav': 90, 'text/html': 17, 'application/msword': 17, 'application/x-trash': 6, 'application/x-tar': 4, 'application/xml': 1, 'text/x-sh': 1})
/path2/ relates to Counter({'text/plain': 2303, 'audio/x-wav': 90, 'text/html': 17, 'application/msword': 17, 'application/x-trash': 6, 'application/x-tar': 4, 'application/xml': 1, 'text/x-sh': 1})
/path3/ relates to Counter({'text/plain': 2303, 'audio/x-wav': 90, 'text/html': 17, 'application/msword': 17, 'application/x-trash': 6, 'application/x-tar': 4, 'application/xml': 1, 'text/x-sh': 1})
/path4/ relates to Counter({'text/plain': 2303, 'audio/x-wav': 90, 'text/html': 17, 'application/msword': 17, 'application/x-trash': 6, 'application/x-tar': 4, 'application/xml': 1, 'text/x-sh': 1})
/path5/ relates to Counter({'text/plain': 2303, 'audio/x-wav': 90, 'text/html': 17, 'application/msword': 17, 'application/x-trash': 6, 'application/x-tar': 4, 'application/xml': 1, 'text/x-sh': 1})
Am I missing something with using the Counter?
I'm facepalming myself right now. It wasn't a problem with the Counter at all but rather the iteration I was doing to create the listing of the file types. I didn't make a new array each time the iteration was populating my dictionary. So all of the files were associated with each path.
def find_mimes(package_paths):
package_mime_types = {}
mime_types_list =[]
## Walking through directories looking for the mime types
for package_path in package_paths:
print(package_path, "is being walked through")
for root, dirs, files in os.walk(package_path, followlinks = True):
for file in files:
if mimetypes.guess_type(os.path.join(root, file)) != (None, None):
mime_types_list.append(mimetypes.guess_type(os.path.join(root, file))[0])
package_mime_types[package_path] = mime_types_list
See how mime_types_list is above the iteration? It was a static variable. Moving into the package_path loop fixed it.
def find_mimes(package_paths):
package_mime_types = {}
## Walking through directories looking for the mime types
for package_path in package_paths:
##Setting mime_types_list array back to empty for every path. (Duh)
##Now mime_types_list will be empty before the walking starts
mime_types_list =[]
print(package_path, "is being walked through")
for root, dirs, files in os.walk(package_path, followlinks = True):
for file in files:
if mimetypes.guess_type(os.path.join(root, file)) != (None, None):
mime_types_list.append(mimetypes.guess_type(os.path.join(root, file))[0])
package_mime_types[package_path] = mime_types_list
I have loaded HTML into pyqt and would like to create a list of all the content on the page.
I then need to be able to get the position of the text, using .geometry()
I would like a list of objects, where the following would be possible:
for i in list_of_content_in_html:
print i.toPlainText(), i.geometry() #prints the text, and the position.
In case I am unclear, by "contents" I mean in the HTML below, contents is
'c', 'r1 c1', 'r1, c2', 'row2 c2', 'more contents' - the text the web user sees in the browser, basically.
c
<table border="1">
<tr>
<td>r1 c1</td>
<td>r1 c2</td>
</tr>
<tr>
<td></td>
<td>row2 c2</td>
</tr>
</table>
more contents
This doesn't seem to be possible using QtWebKit and pages like this one, that nest objects but don't use <p>...</p> for other text, that is outside of the table. In result c and more contents don't go into separate QWebElements. They are only to be found in the BODY level block. As a solution one could run that page through a parser. Simply traversing through children of currentFrame documentElement brings out following elements:
# position in element tree, bounding box, tag, text:
(0, 0) [0, 0, 75, 165] HTML - u'c\nr1 c1\tr1 c2\nrow2 c2\nmore contents'
(1, 1) [8, 8, 67, 157] BODY - u'c\nr1 c1\tr1 c2\nrow2 c2\nmore contents'
(2, 0) [8, 27, 75, 119] TABLE - u'r1 c1\tr1 c2\nrow2 c2'
(3, 0) [9, 28, 74, 118] TBODY - u'r1 c1\tr1 c2\nrow2 c2'
(4, 0) [9, 30, 74, 72] TR - u'r1 c1\tr1 c2'
(5, 0) [11, 30, 32, 72] TD - u'r1 c1'
(5, 1) [34, 30, 72, 72] TD - u'r1 c2'
(4, 1) [9, 74, 74, 116] TR - u'row2 c2'
(5, 1) [34, 74, 72, 116] TD - u'row2 c2'
Code for this:
import sys
from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtWebKit import *
class WebPage(QObject):
finished = Signal()
def __init__(self, data, parent=None):
super(WebPage, self).__init__(parent)
self.output = []
self.data = data
self.page = QWebPage()
self.page.loadFinished.connect(self.process)
def start(self):
self.page.mainFrame().setHtml(self.data)
#Slot(bool)
def process(self, something=False):
self.page.setViewportSize(self.page.mainFrame().contentsSize())
frame = self.page.currentFrame()
elem = frame.documentElement()
self.gather_info(elem)
self.finished.emit()
def gather_info(self, elem, i=0):
if i > 200: return
cnt = 0
while cnt < 100:
s = elem.toPlainText()
rect = elem.geometry()
name = elem.tagName()
dim = [rect.x(), rect.y(),
rect.x() + rect.width(), rect.y() + rect.height()]
if s: self.output.append(dict(pos=(i, cnt), dim=dim, tag=name, text=s))
child = elem.firstChild()
if not child.isNull():
self.gather_info(child, i+1)
elem = elem.nextSibling()
if elem.isNull():
break
cnt += 1
webpage = None
def print_strings():
for s in webpage.output:
print s['pos'], s['dim'], s['tag'], '-', repr(s['text'])
if __name__ == '__main__':
app = QApplication(sys.argv)
data = open(sys.argv[1]).read()
webpage = WebPage(data)
webpage.finished.connect(print_strings)
webpage.start()
.
A different approach
The desired course of action depends on what you want to achieve. You can get all the strings from the QWebPage using webpage.currentFrame().documentElement().toPlainText(), but that just shows the whole page as a string with no positioning information related to all the tags. Browsing the QWebElement tree gives you the desired information but it has the drawbacks, which I mentioned above.
If you really want to know the position of all text, The only accurate way to do this (other than rendering the page and using OCR) is breaking text into characters and saving their individual bounding boxes. Here's how I did it:
First I parsed the page with BeautifulSoup4 and enclosed every non-space text character X in a <span class="Nd92KSx3u2">X</span>. Then I ran a PyQt script (actually a PySide script) which loads the altered page and printed out the characters with their bounding boxes after I looked them up using findAllElements('span[class="Nd92KSx3u2"]').
parser.py:
import sys, cgi, re
from bs4 import BeautifulSoup, element
magical_class = "Nd92KSx3u2"
restricted_tags="title script object embed".split()
re_my_span = re.compile(r'<span class="%s">(.+?)</span>' % magical_class)
def no_nl(s): return str(s).replace("\r", "").replace("\n", " ")
if len(sys.argv) != 3:
print "Usage: %s <input_html_file> <output_html_file>" % sys.argv[0]
sys.exit(1)
def process(elem):
for x in elem.children:
if isinstance(x, element.Comment): continue
if isinstance(x, element.Tag):
if x.name in restricted_tags:
continue
if isinstance(x, element.NavigableString):
if not len(no_nl(x.string).strip()):
continue # it's just empty space
print '[', no_nl(x.string).strip(), ']', # debug output of found strings
s = ""
for c in x.string:
if c in (' ', '\r', '\n', '\t'): s += c
else: s += '<span class="%s">%s</span>' % (magical_class, c)
x.replace_with(s)
continue
process(x)
soup = BeautifulSoup(open(sys.argv[1]))
process(soup)
output = re_my_span.sub(r'<span class="%s">\1</span>' % magical_class, str(soup))
with open(sys.argv[2], 'w') as f:
f.write(output)
charpos.py:
import sys
from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtWebKit import *
magical_class = "Nd92KSx3u2"
class WebPage(QObject):
def __init__(self, data, parent=None):
super(WebPage, self).__init__(parent)
self.output = []
self.data = data
self.page = QWebPage()
self.page.loadFinished.connect(self.process)
def start(self):
self.page.mainFrame().setHtml(self.data)
#Slot(bool)
def process(self, something=False):
self.page.setViewportSize(self.page.mainFrame().contentsSize())
frame = self.page.currentFrame()
elements = frame.findAllElements('span[class="%s"]' % magical_class)
for e in elements:
s = e.toPlainText()
rect = e.geometry()
dim = [rect.x(), rect.y(),
rect.x() + rect.width(), rect.y() + rect.height()]
if s and rect.width() > 0 and rect.height() > 0: print dim, s
if __name__ == '__main__':
app = QApplication(sys.argv)
data = open(sys.argv[1]).read()
webpage = WebPage(data)
webpage.start()
input.html (slightly altered to show more problems with simple string dumping:
a<span>b<span>c</span></span>
<table border="1">
<tr><td>r1 <font>c1</font> </td><td>r1 c2</td></tr>
<tr><td></td><td>row2 & c2</td></tr>
</table>
more <b>contents</b>
and the test run:
$ python parser.py input.html temp.html
[ a ] [ b ] [ c ] [ r1 ] [ c1 ] [ r1 c2 ] [ row2 & c2 ] [ more ] [ contents ]
$ charpos.py temp.html
[8, 8, 17, 26] a
[17, 8, 26, 26] b
[26, 8, 34, 26] c
[13, 48, 18, 66] r
[18, 48, 27, 66] 1
[13, 67, 21, 85] c
[21, 67, 30, 85] 1
[36, 48, 41, 66] r
[41, 48, 50, 66] 1
[36, 67, 44, 85] c
[44, 67, 53, 85] 2
[36, 92, 41, 110] r
[41, 92, 50, 110] o
[50, 92, 61, 110] w
[61, 92, 70, 110] 2
[36, 111, 47, 129] &
[51, 111, 59, 129] c
[59, 111, 68, 129] 2
[8, 135, 21, 153] m
[21, 135, 30, 153] o
[30, 135, 35, 153] r
[35, 135, 44, 153] e
[8, 154, 17, 173] c
[17, 154, 27, 173] o
[27, 154, 37, 173] n
[37, 154, 42, 173] t
[42, 154, 51, 173] e
[51, 154, 61, 173] n
[61, 154, 66, 173] t
[66, 154, 75, 173] s
Looking at the bounding boxes, it is (in this simple case without changes in font size and things like subscripts) quite easy to glue them back into words if you wish.
I worked it out.
for elem in QWebView().page().currentFrame().documentElement().findAll('*'):
print unicode(elem.toPlainText()), unicode(elem.geometry().getCoords()), '\n'
It matches anything, and then iterates over what is found - thereby iterating over the DOM tree.