Not sure what I am doing wrong... - ||| PYTHON ||| - python
I am doing a thing while following a tutorial. I think I did everything correct but when starting the program I am getting an error.
Here are my files codes:
1) the main file - frs.py
from parser import Parser
from lexer import Lexer
def main():
filename = 'hello.frs'
file = open(filename, 'r')
lexer = Lexer(file)
parser = Parser(lexer.tokens)
lexer.tokenizer()
print ("TOKENS:")
print (lexer.tokens, "\n")
parser.build_AST()
print ("AST:")
print (parset.AST, "\n")
if __name__ == "__main__":
main()
2) the Lexer class - lexer.py
class Lexer:
def __init__(self, data):
self.data = data
self.tokens = []
self.keywords = [
'tosay'
]
def tokenizer(self):
for loc in self.data:
tmp = []
tid = ''
for l in loc:
if l == '"' and tid == '':
tid = 'char'
tmp = []
elif l == '"' and tid == 'char':
self.tokens.append({'id': tid, 'value': ''.join(tmp)})
tid = ''
tmp = []
elif l == ':':
self.tokens.append({'id': 'label', 'value': ''.join(tmp)})
tmp = []
elif ''.join(tmp) in self.keywords:
self.tokens.append({'id': 'keyword', 'value': ''.join(tmp)})
tmp = []
elif l == ' ' and tid != 'char':
continue
else:
tmp.append(l)
3) the Parser class - parser.py
class Parser:
def __init__(self, tokens):
self.tokens = tokens
self.AST = []
def add_node(self, parent, node):
for a in self.AST:
if parent in a:
a[parent].append(node)
def build_AST(self):
saved = {}
parent = {}
collect = False
for token in self.tokens:
if token['id'] == 'label':
t = {token['value']: []}
if parent != t:
parent = token['value']
self.AST.append(t)
elif token['id'] == 'keyword':
if token['value'] == 'stop':
t = {token['value']: 0}
self.add_node(parent, t)
else:
if collect == False:
saved = token
collect = True
else:
t = {saved['value']: token[:value]}
self.add_node(parent, t)
collect = False
elif token['id'] == 'char':
if collect = False:
saved = token
collect = True
else:
t = {saved['value']: token['value']}
self.add_node(parent, t)
collect = False
4) the file with my own language and is a goal of the tutorial - hello.frs:
commence:
tosay "Hello World"
stop
Basically, until I added the from parser import Parser, everything worked. But after adding, I am getting this error message:
Traceback (most recent call last):
File "frs.py", line 1, in <module>
from parser import Parser
ImportError: cannot import name 'Parser'
I tried renaming the class, but it still doesn't work.
Please help me!
Thank you in advance.
Two errors in your files.
1) File parser.py:
Change:
if collect = False:
To
if collect == False:
2) File frs.py
Change:
print (parset.AST, "\n")
To:
print (parser.AST, "\n")`
After Above Corrections My Output
TOKENS:
[{'id': 'label', 'value': 'commence'}, {'id': 'keyword', 'value': 'tosay'}, {'id': 'char', 'value': 'Hello World'}]
AST:
[{'commence': [{'tosay': 'Hello World'}]}]
Related
For Loop in Function in Python
I'm trying to shift some codes from my main function to a function called read but somehow the loop breaks and it doesn't go through my csv file. Below are the 2 scripts and the csv. Thank you for your advice and tips as the learning curve is getting steeper and steeper ---Code Below--- The script 'NotinFunct' will read the csv file and returns this data The script 'InFunct' will read the same csv file but only returns one set a data The 'NotinFunct' is # -*- coding: utf-8 -*- import csv FILE = 'C://shared//API//NADEV-Numbers_20190220-092956.csv' NBS = {'5684', '7445477'} NEW_NBS = {'56847', '74454773'} def main(): fields_route = {'Pattern', 'CalledX', 'CalledPrefix', 'CallingX', 'CallingPrefix'} for row in csv.DictReader(open(FILE)): if row['Type'] == 'RoutePattern': for nb in NBS: for field in fields_route: if nb in row[field]: for new in NEW_NBS: if nb in new: rp = row['Pattern'] pt = row['Partition'] newrp = row['Pattern'].replace(nb, new) if row['CalledX'] == 'None': cedp = row['CalledX'].replace('None', '') else: cedp = row['CalledX'].replace(nb, new) if row['CalledPrefix'] == 'None': pced = row['CalledPrefix'].replace('None', '') else: pced = row['CalledPrefix'].replace(nb, new) if row['CallingX'] == 'None': cingp = row['CallingX'].replace('None', '') else: cingp = row['CallingX'].replace(nb, new) if row['CallingPrefix'] == 'None': pcing = row['CallingPrefix'].replace('None', '') else: pcing = row['CallingPrefix'].replace(nb, new) print(rp) print(pt) print(newrp) print(cedp) print(pced) print(cingp) print(pcing) print('################') if __name__ == '__main__': main() the 'InFunct' is # -*- coding: utf-8 -*- import csv FILE = 'C://shared//API//NADEV-Numbers_20190220-092956.csv' NBS = {'5684', '7445477'} NEW_NBS = {'56847', '74454773'} def read(): fields_route = {'Pattern', 'CalledX', 'CalledPrefix', 'CallingX', 'CallingPrefix'} for row in csv.DictReader(open(FILE)): if row['Type'] == 'RoutePattern': for nb in NBS: for field in fields_route: if nb in row[field]: for new in NEW_NBS: if nb in new: rp = row['Pattern'] pt = row['Partition'] newrp = row['Pattern'].replace(nb, new) if row['CalledX'] == 'None': cedp = row['CalledX'].replace('None', '') else: cedp = row['CalledX'].replace(nb, new) if row['CalledPrefix'] == 'None': pced = row['CalledPrefix'].replace('None', '') else: pced = row['CalledPrefix'].replace(nb, new) if row['CallingX'] == 'None': cingp = row['CallingX'].replace('None', '') else: cingp = row['CallingX'].replace(nb, new) if row['CallingPrefix'] == 'None': pcing = row['CallingPrefix'].replace('None', '') else: pcing = row['CallingPrefix'].replace(nb, new) return rp, pt, newrp, cedp, pced, cingp, pcing def main(): for test in read(): print(test) if __name__ == '__main__': main() the csv is Type,Pattern,Partition,Description,CalledX,CalledPrefix,CallingX,CallingPrefix,FwdAll,FwdBusyInt,FwdBusyExt,FwdNAnsInt,FwdNAnsExt,FwdNCovInt,FwdNCovExt,FwdCTIFail,FwdURegInt,FwdURegExt,ExtPNMask,Device DirectoryNumber,875423,a_nothing_partition,a_nothing_DN,N/A,N/A,N/A,N/A,11,22,33,44,55,66,744547722,77,88,99,9898,SEP798798465143 DirectoryNumber,5684001,a_nothing_partition,None,N/A,N/A,N/A,N/A,None,None,None,None,None,None,None,None,None,None,N/A,N/A TranslationPattern,568412,a_nothing_partition,a_nothing_tp,None,None,None,5236,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,568411,a_nothing_partition,a_nothing_tp,None,None,875421,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,744547720,a_nothing_partition,a_nothing_tp,961433,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,744547721,a_nothing_partition,a_nothing_tp,None,786512,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,47852,a_nothing_partition,a_nothing_tp,None,None,744547711,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,9632,a_nothing_partition,a_nothing_tp,None,None,None,5684,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,897435496,a_nothing_partition,a_nothing_tp,568433,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A TranslationPattern,7896312145697,a_nothing_partition,a_nothing_tp,None,7445477,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A RoutePattern,6568433,a_nothing_partition,None,None,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A RoutePattern,6568434,a_nothing_partition,None,None,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A RoutePattern,24132,a_nothing_partition,a_nothing_rp,None,None,7445477,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A HuntPilot,568444,a_nothing_partition,a_nothing_hunt pilot,88,99,66,77,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A CingPartyX,8787,a_nothing_partition,a_nothing_calling party X,N/A,N/A,11,744547722,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A CedPartyX,98563,a_nothing_partition,a_nothing_called party X,N/A,N/A,568496,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
Your read function only returns one set of values (the last one). If you changed the function to a generator you can get all the values. Change the end of the read function to the following, making sure to align the yield to the innermost loop block: ... if row['CallingPrefix'] == 'None': pcing = row['CallingPrefix'].replace('None', '') else: pcing = row['CallingPrefix'].replace(nb, new) yield rp, pt, newrp, cedp, pced, cingp, pcing Then you get: ('6568433', 'a_nothing_partition', '65684733', '', '', '', '') ('6568434', 'a_nothing_partition', '65684734', '', '', '', '') ('24132', 'a_nothing_partition', '24132', '', '', '74454773', '') Change your main function to the following to get similar output to NotInFunct: def main(): for test in read(): for col in test: print(col) print('################') Output: 6568433 a_nothing_partition 65684733 ################# 6568434 a_nothing_partition 65684734 ################# 24132 a_nothing_partition 24132 74454773 #################
PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
output_rdpartition = mp.Queue() def read_partition_zipfile(infile,stop_words,startline,endline): # endline = startline + 100 chunk_user_d = defaultdict(lambda: defaultdict(list)) chunk_user_withoutstamp_d = defaultdict(list) with gzip.open(in_file, "rb") as f: for j, line in enumerate(f): if j >= startline and j < endline: if j%10000==0 : print "processed",j,"lines" line = line[:-1].split("|:|") time_stamp = int(line[0]) user_id = line[-1] keywords=line[1].split(',') keywords = [item.lower() for item in keywords if len(item)>=2] keywords = [item for item in keywords if item not in stop_words] # print 'user_id', user_id # print 'time_stamp', time_stamp # print 'keywords',keywords chunk_user_d[user_id][time_stamp] += keywords chunk_user_withoutstamp_d[user_id] +=keywords # print chunk_user_withoutstamp_d,'chunk_user_withoutstamp_d' # return chunk_user_d, chunk_user_withoutstamp_d output_rdpartition.put((chunk_user_d,chunk_user_withoutstamp_d)) def main(): start_time = datetime.datetime.now() print("at the start of main") user_id ='1ss7fef4' lenth = 0 tf_idf = defaultdict(int) key_dic = defaultdict(float) time_latest = 0 processes_rd = [mp.Process(target = read_partition_zipfile, args =(in_file, stop_words, p_index[j], p_index[j+1])) for j in range(0,3)] for p in processes_rd: p.start() results_rd = [output_rdpartition.get() for p in processes_rd] # results_rd[0]is the chunkuser ,results_rd[1]is the chunkuser_without stamp print results_rd if __name__ == '__main__': stop_words = "a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your" stop_words = stop_words.split(",") in_file = 'uniq.txt.gz' p_index = range(0,28000000,2800000) main() It seems that it is because of the queue issue, i can print within function ,but i can not return the output of the function
Python syntax error, command, *args= line.split()
I am getting a Python syntax error for command, *args = line.split() Here is my bce.py file import sys import os import pcn import urp class BCE(object): def __init__(self, inPath, outPath): self.inPath = inPath self.outPath = outPath self.eqs = {} self.operations = { "r": self.read_pcn, "!": self.do_not, "+": self.do_or, "&": self.do_and, "p": self.write_pcn, "q": self.quit } self.done = False def process(self, commandFilePath): with open(commandFilePath, "r") as f: for line in f: command, *args = line.split() self.operations[command](*args) if self.done: return def read_pcn(self, fNum): _, self.eqs[fNum] = pcn.parse(os.path.join(self.inPath, fNum + ".pcn")) def write_pcn(self, fNum): with open(os.path.join(self.outPath, fNum + ".pcn"), "w") as f: pcn.write(f, None, self.eqs[fNum]) def do_not(self, resultNum, inNum): self.eqs[resultNum] = urp.complement(self.eqs[inNum]) def do_or(self, resultNum, leftNum, rightNum): self.eqs[resultNum] = urp.cubes_or(self.eqs[leftNum], self.eqs[rightNum]) def do_and(self, resultNum, leftNum, rightNum): self.eqs[resultNum] = urp.cubes_and(self.eqs[leftNum], self.eqs[rightNum]) def quit(self): self.done = True Usage = """\ USAGE: {} COMMAND_FILE """ if __name__ == "__main__": if len(sys.argv) > 1: solutionDir = "BCESolutions" thisSolDir = os.path.join(solutionDir, sys.argv[1][-5]) try: os.mkdir(thisSolDir) except OSError: # It's okay if it's already there pass bce = BCE("BooleanCalculatorEngine", thisSolDir) bce.process(sys.argv[1]) else: print(Usage.format(sys.argv[0])) And here is my pcn.py file from itertools import islice from itertools import chain def parse(filePath): with open(filePath, "rb") as f: # First line is size of array try: lines = iter(f) numVars = int(next(lines)) cubeCount = int(next(lines)) cubes = [None]*cubeCount for i in range(cubeCount): line = next(lines) cubes[i] = tuple(islice(map(int, line.split()), 1, None)) return (numVars, tuple(cubes)) except Exception as error: raise AssertionError("Bad pcn file {}".format(filePath)) from error def write(f, numVars, cubes): endl = "\n" f.write(str(max(max(map(abs, cube)) for cube in cubes))) f.write(endl) f.write(str(len(cubes))) f.write(endl) cubes = tuple(set(tuple(sorted(cube, key=abs)) for cube in cubes)) for cube in cubes: f.write(' '.join(map(str, chain((len(cube),), cube)))) f.write(endl) f.write(endl)
Tuple assignment with a *star_target entry only works in Python 3. You cannot use it in Python 2. See PEP 3132 - Extended Iterable Unpacking. As a workaround, just just one target then use slicing: split_result = line.split() command, args = split_result[0], split_result[1:]
The reading loop of QXmlReader for PyQt5 does not return the expected data
I'd like to make an QAbstractItemModel that gets its data from a series of Xml files, all situated in the same directory. Since PyQt5 no longer supports QDomDocument (or atleast i couldn't find a way to make it work), i've had to resort to a QXmlStreamReader. I'm putting the data itself in a giant python dictionary (well... not exactly giant by computer science standards) that contains other dictionaries under various keys to create a tree-like structure. this is my code so far: class DataModel(QtCore.QAbstractItemModel): def __init__(self, settingsDirectory, parent = None): super(DataModel, self).__init__(parent) settingsDirectory.setNameFilters(["*.xml"]) files = settingsDirectory.entryList() print(files) self.data = {} for i in range(len(files)): filePath = str(files[i]) file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath)) fileOpens = file.open(file.ReadOnly | file.Text) if fileOpens: parser = QtCore.QXmlStreamReader(file) print("--------Beginning parsing----------") print("Reading file: "+str(filePath)) while not parser.atEnd(): parser.readNext() token = parser.tokenType() print("Reading tag: " + str(parser.name())) print("Tag type is: " + str(token)) if token == parser.StartDocument: self.data["XML Version"] = str(parser.documentVersion()) self.data["XML Encoding"] = str(parser.documentEncoding()) if token == parser.StartElement: tokenName = parser.name() if parser.tokenType() == parser.Characters: tokenText = parser.text() print("This tag has a text value: " + str(tokenText)) print("current data: " + str(self.data)) if token == parser.EndElement: if tokenText != None: self.data[tokenName] = tokenText else: self.data[tokenName] = {} tokenName = None tokenText = None else: print(self.tr("xml file did not open properly")) print(self.data) While this code doesn't crash or anything, it does have a few issues that i have no idea why they're happening or how to fix: 1.the tokenName never changes from None for some reason - solved 2.the structure of the self.data dictionary does not turn into a tree-like one, no idea why :| example data: <?xml version="1.0" encoding="UTF-8"?> <tag> <description>This is a text</description> <types> <typesAllowed></typesAllowed> <typesEnabled></typesEnabled> </types> </tag> yields the final result: {'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'typesAllowed': '\n\t\t', None: '\n', 'typesEnabled': '\n\t\t', 'description': 'This is a text'} instead of the wanted: {'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'tag': {'description': 'this is a text', typesAllowed': '\n\t\t', 'typesEnabled': '\n\t\t'}} I know these issues are most likely a result of my poor understanding of how a StreamReader works, so any and all tips would be welcome :) edit 1: the tokenName change was a silly positioning error, silly me. the code reflects the fix. edit 2: added an example and example output
This question is now solved; I took a different approach to the problem. I basically took a list into which i appended tuples (name, {}) if the StartElement token had the attribute parseAs == "element" and put an evaluated string (parseText function) into the last tuple's dictionary. When it meets an EndElement token, it finds the tuple with name == tokenName, which is the name of the current token, puts it into the previous tuple's dictionary as an entry with key name. There's a few more details as to how it works, but I'd probably just overly complicate my explanation if I included them (how it knows when to submit currData to self.data etc.) class DataModel(QtCore.QAbstractItemModel): def __init__(self, settingsDirectory, parent = None): super(DataModel, self).__init__(parent) settingsDirectory.setNameFilters(["*.xml"]) files = settingsDirectory.entryList() print(files) self.data = {} self.parsingLog = {} for i in range(len(files)): filePath = str(files[i]) file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath)) fileOpens = file.open(file.ReadOnly | file.Text) if fileOpens: parser = QtCore.QXmlStreamReader(file) currData = [] haveStartToken = False print(self.tr("--------Beginning parsing--------")) print(self.tr("Reading file: "+str(filePath))) print(self.tr("---------------------------------")) while not parser.atEnd(): if not parser.hasError(): parser.readNext() token = parser.tokenType() print(self.tr("--------------------")) print(self.tr("Token type: " + str(self.printTokenType(token)))) if token == parser.StartElement: tokenName = parser.name() attributes = parser.attributes() parseAs = attributes.value("parseAs") print(self.tr("Reading StartElement: " + str(tokenName))) print(self.tr("parseAs: " + str(parseAs))) if parseAs == "text": textValue = self.parseText(parser.readElementText()) print(self.tr("Text Value: " + str(textValue))) if len(currData) != 0: currData[len(currData)-1][1][tokenName] = textValue else: print(self.tr("*******Terminating application*******")) print(self.tr("Reason: currData is empty")) print(self.tr("*******Terminating application*******")) sys.exit() elif parseAs == "element": currData.append((tokenName, {})) else: print(self.tr("******WARNING******")) print(self.tr("parseAs attribute is not given correctly")) print(self.tr("******WARNING******")) print(self.tr("--------------------")) elif token == parser.EndElement: tokenName = parser.name() print(self.tr("Reading EndElement: " + str(tokenName))) print(self.tr("currData before: " + str(currData))) if not haveStartToken: startToken = currData[0][0] haveStartToken = True for i in currData: if i[0] == tokenName: print(self.tr("Closing token: " + str(tokenName))) if i[0] != startToken: currData[len(currData)-2][1][tokenName] = currData[len(currData)-1][1] del currData[len(currData)-1] print(self.tr("currData after: " + str(currData))) print(self.tr("--------------------")) elif i[0] == startToken: print(self.tr("This is the final token, writing to self.data"), end = "") self.data[startToken] = currData[0][1] for i in range(5): time.sleep(0.25) print(self.tr("."), end = "") print(self.tr("done.")) print(self.tr("--------------------")) elif token == parser.Characters: print(self.tr("Characters value: " + str(parser.text()))) print(self.tr("--------------------")) elif token == parser.StartDocument: self.parsingLog["File: "+str(filePath)] = {} self.parsingLog["File: "+str(filePath)]["XML Version"] = str(parser.documentVersion()) self.parsingLog["File: "+str(filePath)]["XML Encoding"] = str(parser.documentEncoding()) print(self.tr("File Version: " + str(self.parsingLog["File: "+str(filePath)]["XML Version"]))) print(self.tr("File Encoding: " + str(self.parsingLog["File: "+str(filePath)]["XML Encoding"]))) elif token == parser.EndDocument: print(self.tr("Cleaning up"), end = "") for i in range(5): time.sleep(0.25) print(self.tr("."), end = "") time.sleep(0.1) print(self.tr("done.")) print(self.tr("self.data: " + str(self.data))) print(self.tr("types of data: yesNo (should be str) - " + str(type(self.data["building"]["specialSlot"]["yesNo"])) + " - id - should be int - " + str(type(self.data["building"]["specialSlot"]["id"])) + " - isItFloat - should be float - " + str(type(self.data["building"]["specialSlot"]["isItFloat"])))) print(self.tr("--------------------")) else: print(self.tr("XML file is not well-formatted")) else: print(self.tr("xml file did not open properly")) def parseText(self, text): if isinstance(text, str): if text == "": return str(text) for i in text: if i not in ("0123456789."): return str(text) for j in text: if j not in ("0123456789"): return float(text) return int(text) else: return ValueError def printTokenType(self, token): if token == QtCore.QXmlStreamReader.NoToken: return "NoToken" elif token == 1: return "Invalid" elif token == QtCore.QXmlStreamReader.StartDocument: return "StartDocument" elif token == QtCore.QXmlStreamReader.EndDocument: return "EndDocument" elif token == QtCore.QXmlStreamReader.StartElement: return "StartElement" elif token == QtCore.QXmlStreamReader.EndElement: return "EndElement" elif token == QtCore.QXmlStreamReader.Characters: return "Characters" elif token == QtCore.QXmlStreamReader.Comment: return "Comment" elif token == QtCore.QXmlStreamReader.DTD: return "DTD" elif token == QtCore.QXmlStreamReader.EntityReference: return "EntityReference" elif token == QtCore.QXmlStreamReader.ProcessingInstruction: return "ProcessingInstruction"
replace method does work (python2.7)
I want to use following codes to replace strings like "/xxxxx/" with "/xxxxx.html" in the page_data, but doesn't work. page_data is bytes type which is downloaded by a crawler. page_data.replace(each, neweach) Only when I change them to: page_data = page_data.replace(each, neweach) the strings(each) in page_data are actually replaceed. The whole code is below: import os import sys import re import urllib import urllib2 class WebGet(object): base_url = "" urls_list = [] history_list = [] replace_ch={} def __init__(self, base_url): self.base_url = base_url[:-1] self.urls_list.append('/') self.replace_ch[">>"] = "%3E%3E" self.replace_ch["<<"] = "%3C%3C" self.replace_ch["::"] = "%3A%3A" def recurseGet(self): '''Get page data recursively''' while(len(self.urls_list) != 0): url_suffix = self.urls_list[0] self.urls_list.remove(url_suffix) self.history_list.append(url_suffix) url_to_get = self.base_url + url_suffix "Get page data with url" print "To get",url_to_get page_data = urllib2.urlopen(url_to_get).read() page_data_done = self.pageHandle(page_data) "Write the page data into file" if url_suffix[-1] == '/': url_suffix = url_suffix[:-1] if url_suffix == '': url_suffix = "index" elif url_suffix[0] == '/': url_suffix = url_suffix[1:] url_suffix.replace('/','\\') url_suffix.replace('>>','%3E%3E') url_suffix.replace('<<','%3C%3C') url_suffix.replace('::','%3A%3A') file_str = "e:\\reference\\"+url_suffix if file_str.rfind("\\") != 12: new_dir = file_str[:file_str.rfind("\\")] if os.path.isdir(file_str) == False: os.mkdir(file_str) file_str = file_str.strip()+".html" print "write file",file_str f_page = open(file_str, "wb") f_page.write(page_data_done) f_page.close def pageHandle(self, page_data): page_data.replace("http://www.cplusplus.com/","/") #here the replace works re_rule = '<a href="/reference(/\S{2,40}/)\">' list_page_urls = re.findall(re_rule, page_data) for each in list_page_urls: neweach = each neweach = neweach[:-1]+".html" #page_data = page_data.replace(each, neweach) page_data.replace(each, neweach) if each in page_data: print "fail replace" if each in self.history_list: continue elif each in self.urls_list: continue elif each == '/': continue self.urls_list.append(each) return page_data def main(): url = "http://www.cplusplus.com/reference/" fc = WebGet(url) fc.recurseGet() if __name__ == "__main__": main() Why could be this?
Because that's what the replace method does: returns a copy of the string with the relevant characters replaced. Apart from anything else, strings are immutable in Python, so it couldn't work any other way.