How to print for loop output and change to pandas dataframe - python

I know this is gonna be long questions, so sorry. I'm very thankful if anyone can solve my problem.
I Have code to scoring and labeling text sentiment like this:
import re
from collections import OrderedDict
import numpy as np
class sentistrength:
def __init__(self, config=dict()):
self.negasi = [line.replace('\n','') for line in open("/content/negatingword.txt").read().splitlines()]
#create sentiment words dictionary
self.sentiwords_txt = [line.replace('\n','').split(":") for line in open("/content/sentiwords_modif.txt").read().splitlines()]
self.sentiwords_dict = OrderedDict()
for term in self.sentiwords_txt:
self.sentiwords_dict[term[0]] = int(term[1])
#create boosterwords dictionary
self.boosterwords_txt = [line.replace('\n','').split(":") for line in open("boosterwords_id.txt").read().splitlines()]
self.boosterwords_dict = OrderedDict()
for term in self.boosterwords_txt:
self.boosterwords_dict[term[0]] = int(term[1])
self.negation_conf = config["negation"]
self.booster_conf = config["booster"]
self.mean_conf = False
def senti(self,term):
try:
return self.sentiwords_dict[term]
except:
return 0
def booster(self, term):
try:
return self.boosterwords_dict[term]
except:
return 0
def cek_negationword(self, prev_term, prev_term2):
#jika kata sebelumnya (index-1) adalah kata negasi, negasikan nilai -+nya
if prev_term in self.negasi or prev_term2+" "+prev_term in self.negasi:
# print prev_term
self.score = -abs(self.score) if self.score>0 else abs(self.score)
def cek_boosterword(self,term):
booster_score = self.booster(term)
if booster_score !=0 and self.score>0: self.score += booster_score
if booster_score !=0 and self.score<0: self.score -= booster_score
def cek_consecutive_term(self, prev_term):
if self.prev_score>0 and self.score >=3: self.score+=1
if self.prev_score<0 and self.score <=-3: self.score-=1
def plural_to_singular(self, term):
return re.sub(r'([A-Za-z]+)\-\1', r'\1',term)
def classify(self):
result = "neutral"
try:
if self.mean_conf:
mean_p = np.mean(self.mean_pos)
mean_n = np.mean(self.mean_neg)
print(mean_p, mean_n)
if mean_p > mean_n:
result = "positive"
elif mean_p < mean_n and not self.is_tanya:
result = "negative"
elif mean_p < mean_n and self.is_tanya:
result = "neutral"
else:
if abs(self.sentences_max_pos) > abs(self.sentences_max_neg):
result = "positive"
elif abs(self.sentences_max_pos) < abs(self.sentences_max_neg):
result = "negative"
elif abs(self.sentences_max_pos) == abs(self.sentences_max_neg):
result = "neutral"
except:
print("error ",self.sentences_max_pos, self.sentences_max_neg)
return result
def cek_neutral_term(self,terms,i):
if terms[i-1] in self.neutral_term or terms[i+1] in self.neutral_term: self.score=1
def main(self,sentence):
self.neutral_term = ['jika','kalau']
sentences = sentence.split('.')
self.sentences_max_neg = -1
self.sentences_max_pos = 1
self.sentences_score = []
self.sentences_text = []
for sentence in sentences:
self.max_neg = -1
self.max_pos = 1
self.mean_neg = [1]
self.mean_pos = [1]
self.sentence_score=[]
terms = sentence.split()
# terms = re.split(r'[\s,.]',sentence)
terms_length = len(terms)
self.is_tanya = False
self.sentence_text = ''
# print self.max_pos, self.max_neg
#SEMUA KALIMAT YANG MEMILIKI TANDA SERU MEMILIKI +ve minimal 2
self.prev_score = 0
self.pre_max_pos = []
self.pre_max_neg = []
for i,term in enumerate(terms):
# repeated_term = ''
is_extra_char = False
plural = ''
self.score = 0
# if re.search(r'[A-Za-z\-.]+',term):
# print term
if re.search(r'([A-Za-z])\1{3,}',term):
is_extra_char = True
# repeated_term =term
if re.search(r'([A-Za-z]+)\-\1',term):
plural = term
term = self.plural_to_singular(term)
#GET SENTI SCORE#
self.score = self.senti(term)
# print "senti score",term, self.score
#NEGATION HANDLER#
if self.negation_conf and self.score !=0 and i>0:self.cek_negationword(terms[i-1],terms[i-2])
# print "negation score",term, self.score
#BOOSTERWORD HANDLER#
if self.booster_conf and self.score !=0 and i>0 and i< (terms_length 1):self.cek_boosterword(terms[i-1])
if self.booster_conf and self.score !=0 and i>=0 and i<(terms_length-1):self.cek_boosterword(terms[i+1])
# print "booster score",term, self.score
# CEK neutral term
if self.score!=0 and i>1 and i<(terms_length-2): self.cek_neutral_term(terms,i)
# if self.score!=0 and i>0 and i<(terms_length-4): self.cek_neutral_term(terms,i)
self.prev_score = self.score
if self.mean_conf and self.score>0: self.mean_pos.append(self.score)
if self.mean_conf and self.score<0: self.mean_neg.append(abs(self.score))
#GET MAX SCORE +ve/-ve
self.max_pos= self.score if self.score > self.max_pos else self.max_pos
self.max_neg= self.score if self.score < self.max_neg else self.max_neg
#insert score info current term
self.pre_max_pos.append(self.max_pos)
self.pre_max_neg.append(self.max_neg)
# print self.pre_max_pos, self.pre_max_neg
if plural !='': term = plural
self.sentence_text += ' {}'.format(term)
if self.score != 0:term = "{} [{}]".format(term, self.score)
self.sentence_score.append(term)
self.sentences_text.append(self.sentence_text)
self.sentences_score.append(" ".join(self.sentence_score))
if self.is_tanya:
self.max_neg = -1
self.sentences_max_pos = self.max_pos if self.max_pos > self.sentences_max_pos else self.sentences_max_pos
self.sentences_max_neg = self.max_neg if self.max_neg < self.sentences_max_neg else self.sentences_max_neg
# print self.sentences_max_pos, self.sentences_max_neg
sentence_result = self.classify()
# print self.sentences_text
return {"classified_text":". ".join(self.sentences_score),"tweet_text":". ".join(self.sentences_text),"sentence_score":self.sentences_score,"max_positive":self.sentences_max_pos,"max_negative":self.sentences_max_neg,"kelas":sentence_result}
config = dict()
config["negation"] = True
config["booster"] = True
senti = sentistrength(config)
print(senti.main("bagus"))
the OUTPUT is like this:
{'classified_text': 'bagus [4]', 'tweet_text': ' bagus', 'sentence_score': ['bagus [4]'], 'max_positive': 4, 'max_negative': -1, 'kelas': 'positive'}
I have thousands of texts that I want to find sentiment labels for. Is there a way to loop print so that it doesn't have to print each text one by one.
And is there a way to get the output to be a dataframe with commas as column separators? So the output look like this:
|'classified_text': 'bagus [4]'| 'tweet_text': ' bagus'| 'sentence_score': ['bagus [4]']
column continuation:
|'max_positive': 4 | 'max_negative': -1 | 'kelas': 'positive'|

Related

N Puzzle with Depth First Search

I'm trying solve N Puzzle with Depth First Search using python 3.
With 3 x 3 puzzle it run good and fast but with 4 x 4 puzzle, it runs too slow and can't find solution with error: "MemoryError".
I also use "h(n) = depth + number of wrong tiles" to evaluate priority of each node.
I'm a newbie to python so hope you can help me with this
Here is my code:
import sys
import getopt
import random
import time
class State:
def __init__(self, parent, board, move, depth):
self.parent = parent
self.previousMove = move
self.board = board
self.map = ''.join(str(e) for e in board)
self.depth = depth
self.cost = self.calculateCost()
def calculateCost(self):
pos = 1
count = 0
for tile in self.board:
if tile == pos:
count += 1
pos += 1
return self.depth + 8 - count
class Puzzle:
def __init__(self, k, customBoard = None):
self.k = k
self.n = k*k - 1
self.sizeOfBoard = k*k
self.timeOfSolving = 0
self.timeOfGenerateSuccessors = 0
self.maxDeepSearch = 0
self.inititalState = State(None, self.createInitialBoard(customBoard), 'Start', 0)
self.goalBoard = self.createGoalBoard()
self.finalState = None
self.stateStorage = set() # Store states that have visited
self.path = [] # Store states that lead to goalstate
self.stack = []
def isSolvable(self, board):
# count invertion in puzzle's board
invCount = 0
for i in range(0, self.sizeOfBoard - 1):
if board[i] == 0:
continue
for j in range(i+1, self.sizeOfBoard):
if board[j] == 0:
continue
if board[i] > board[j]:
invCount += 1
# print(invCount)
if (invCount % 2 == 0):
return True
return False
def createInitialBoard(self, customBoard):
print("Creating initial state")
if customBoard is None:
board = []
lstAddSuccess = []
while 1:
board.clear()
lstAddSuccess.clear()
for count in range(0, self.k*self.k):
newTile = random.randint(0, self.n)
while newTile in lstAddSuccess:
newTile = random.randint(0, self.n)
lstAddSuccess += [newTile]
board += [newTile]
if self.isSolvable(board):
break
else:
board = [int(e) for e in customBoard]
if not self.isSolvable(board):
print("Cant find solution with this puzzle! Exiting...")
exit(-1)
return board
def createGoalBoard(self):
board = []
for count in range(1, self.n + 1):
board += [count]
board += [0]
return board
def printBoard(self, board):
for row in range(0, self.sizeOfBoard, self.k):
# for col in range(row, row + self.k):
print(board[row:row + self.k])
def generateSuccessors(self, currentState):
indexOfZero = currentState.board.index(0)
rowIndexOfZero = indexOfZero % self.k
colIndexOfZero = indexOfZero // self.k
lstSuccessors = []
# Slide to zero to up
if colIndexOfZero != 0:
newState = currentState.board.copy()
newState[indexOfZero] = newState[indexOfZero - self.k]
newState[indexOfZero - self.k] = 0
lstSuccessors.append(
State(currentState, newState, 'up', currentState.depth + 1))
# Slide zero to down
if colIndexOfZero != self.k - 1:
newState = currentState.board.copy()
newState[indexOfZero] = newState[indexOfZero + self.k]
newState[indexOfZero + self.k] = 0
lstSuccessors.append(
State(currentState, newState, 'down', currentState.depth + 1))
# slide zero to left
if rowIndexOfZero != 0:
newState = currentState.board.copy()
newState[indexOfZero] = newState[indexOfZero - 1]
newState[indexOfZero - 1] = 0
lstSuccessors.append(
State(currentState, newState, 'left', currentState.depth + 1))
# Slide zero to right
if rowIndexOfZero != self.k - 1:
newState = currentState.board.copy()
newState[indexOfZero] = newState[indexOfZero + 1]
newState[indexOfZero + 1] = 0
lstSuccessors.append(
State(currentState, newState, 'right', currentState.depth + 1))
lstSuccessorsCost = [ele.cost for ele in lstSuccessors]
lstSuccessorsInOrderOfCost = []
for i in range(0, len(lstSuccessorsCost)):
lstSuccessorsInOrderOfCost.append(lstSuccessors[lstSuccessorsCost.index(min(lstSuccessorsCost))])
lstSuccessorsCost[lstSuccessorsCost.index(min(lstSuccessorsCost))] = 100
return lstSuccessorsInOrderOfCost
def solvePuzzle(self, currentState):
self.stack.append(currentState)
self.stateStorage.add(currentState.map)
while len(self.stack) > 0:
currentState = self.stack.pop()
if currentState.board == self.goalBoard:
# find path
# self.printBoard(currentState.board)
self.finalState = currentState
print("Solving " + str(self.n) + " puzzle done!")
return
start_time_gen = time.time()
lstSuccessor = self.generateSuccessors(currentState)
end_time_gen = time.time()
timeOfGen = end_time_gen - start_time_gen
self.timeOfGenerateSuccessors += timeOfGen
for successor in lstSuccessor[::-1]:
if successor.map not in self.stateStorage:
self.stack.append(successor)
self.stateStorage.add(successor.map)
if successor.depth > self.maxDeepSearch:
self.maxDeepSearch += 1
print("Cant solve puzzle! Exiting...")
exit(-1)
def solve(self):
start_time = time.time()
self.solvePuzzle(self.inititalState)
end_time = time.time()
self.timeOfSolving = end_time - start_time
print("Running time: " + str(self.timeOfSolving))
print("Max Search Dept: " + str(self.maxDeepSearch))
print("Final State Dept: " + str(self.finalState.depth))
def printInitialBoard(self):
self.printBoard(self.inititalState.board)
def printPath(self):
if self.finalState is None:
print("No solution found!")
return
path = []
state = self.finalState
while (state is not None):
if state.previousMove is not None:
path.append(state.previousMove)
state = state.parent
print("path: "),
print(path[::-1])
def main(argv):
# if (len(argv) != 1 or int(argv[0]) not in range(1, 10000)):
# print("Input must be k of integer, which is k*k matrix of puzzle")
# exit()
# eight_puzzle = Puzzle(int(argv[0]))
k = int(input("Enter size of k * k puzzle, k = "))
while k not in range(2, 100):
print("k must be in range 2 - 100")
k = int(input("Enter size of k * k puzzle, k = "))
print("""
Choose:
1. Randome puzzle
2. Custome puzzle
""")
file = input()
if int(file) == 1:
puzzle = Puzzle(k)
elif int(file) == 2:
board = input("Enter puzzle: ")
puzzle = Puzzle(k ,list(board.split(" ")))
puzzle.printInitialBoard()
puzzle.solve()
puzzle.printPath()
if __name__ == "__main__":
main(sys.argv[1:])

Python: Recursion problems

I am trying to make a sudoku solver that solves boards very quickly. At the moment my solver works on easy boards but never terminates on harder boards. I believe it has something to do with my recursion because easy boards do not require recursion and hard boards do. Any help is appreciated.
import sys
def rowno(i):
return i // 9
def colno(i):
return i % 9
def boxno(i):
return (i // 9 // 3 )*3 + (i // 3) % 3
def isNeighbor(i, j):
if rowno(i) == rowno(j) or colno(i) == colno(j) or boxno(i) == boxno(j):
return True
else:
return False
def getFileName():
if sys.platform == "win32":
filename = input("Filename? ")
else:
filename = sys.argv[-1]
return filename
solutionlist = []
class Board(object):
def __init__(self, puzzle):
self.puzzle = puzzle
self.board = [Cell(int(value), idx) for idx, value in enumerate(puzzle)]
self.change = False
def printAll(self):
print [cell.candidates for cell in self.board]
#return str(" ")
def update(self):
self.change = False
l = [cell for cell in self.board if len(cell.candidates) == 1]
for i in l:
for j in xrange(81):
if isNeighbor(i.dex, j) and i.dex != j:
old = self.board[j].candidates
self.board[j].delCandidate(i.value)
if len(old) != len(self.board[j].candidates):
self.change = True
def toString(self):
str1 = ''.join(str(e.value) for e in self.board)
return str1
def solved(self):
for cell in self.board:
if len(cell.candidates) != 1:
return False
return True
def solve(self):
self.change = True
while self.change == True:
self.update()
if self.solved():
solutionlist.append(self.board)
return
l = [cell for cell in self.board if len(cell.candidates) > 1]
for i in l:
for j in i.candidates:
newBoard = Board(self.toString())
curLen = 12
curCell = -1
for u in l:
if len(u.candidates)<curLen:
curLen=len(u.candidates)
curCell = u.dex
for c in newBoard.board[curCell].candidates:
newBoard.board[curCell].candidates = [int(c)]
newBoard.board[curCell].value = int(c)
newBoard.solve()
return
def __repr__(self):
l = [cell.value for cell in self.board]
return str(l)
class Cell(object):
def __init__(self, value, dex):
self.value = value
self.dex = dex
if value == 0:
self.candidates = [1,2,3,4,5,6,7,8,9]
else:
self.candidates = [int(value)]
def __str__(self):
return str(self.value)
def delCandidate(self, value):
# deletes value from candidate list
#return self.candidate.remove(value);
self.candidates = [x for x in self.candidates if x != value]
if len(self.candidates) == 1:
self.value = self.candidates[0]
easy = "700583006006001405052006083300200958500078060648010300060802500003150072215600030"
twosol = "000805200800000401705040009000100702040000000006430000030900000010006080000000000"
hard = "040090008000000070060000120030020000005839060080600700050170600000043000003000200"
#easy solution: 794583216836721495152496783371264958529378164648915327967832541483159672215647839
b = Board(hard)
print b
b.solve()
print "end of the line"
for i in solutionlist:
print [cell.value for cell in i]
print "\n"
One major issue is the line for i in l: in the solve method. Since you're recursing, you only need to fill in one cell - the recursion will take care of the rest. So instead of for i in l:, just recurse on the one cell that is the best candidate (curCell):
l = [cell for cell in self.board if len(cell.candidates) > 1]
if len(l) > 0:
newBoard = Board(self.toString())
curLen = 12
curCell = -1
for u in l:
if len(u.candidates)<curLen:
curLen=len(u.candidates)
curCell = u.dex
for c in newBoard.board[curCell].candidates:
newBoard.board[curCell].candidates = [int(c)]
newBoard.board[curCell].value = int(c)
newBoard.solve()

How to make my Python code read from the second line for my lexer

I am busy building a small compiler that reads the file, finds keywords and then does what the keyword specifies. I have an issue that it starts reading the file from the begining each time and icould not find a way to solve this problem with out nested if statements.
swift.py:
from sys import *
import re
tokens = ["PRINT"]
def open_file(filename):
with open (filename, "r") as filecontents:
data = filecontents.read().replace('\n', ' ')
return data
def lex(filecontents):
words = filecontents.split(" ")
filecontents = list(filecontents)
word = []
states = 0
statesRun = False
statesBool = True
string = ""
stringAmount = 0
toks = ""
i = 0.0
for i in range(len(words)):
if words[int(i)].upper() == tokens[0].upper():
word.append("PRINT")
for char in filecontents:
toks += char
if char == "\"":
if states == 0:
statesRun = True
if char == "\"" and statesBool == False:
states = 1
string += char
statesRun = False
statesBool = False
elif states == 1:
statesRun = False
if statesRun:
string += char
stringAmount += 1
word.append("STRING:" + string)
string = ""
statesBool = True
statesRun = False
states = 0
return word
def parse(toks):
i = 0
while(i < len(toks)):
if toks[i].upper() + " " + toks[i+1][0:6].upper() == "PRINT STRING":
print(toks[i+1][7:])
i+=2
class core():
data = open_file(argv[1])
toks = lex(data)
parse(toks)
core()
test.swift:
print "Hello"
print "jobs"
input in cmd:
python swift.py test.swift
I have researched programming languages, compilers, interpreters, parsers, lexers and syntax. I based this code of of this youtube seris(episode 1 - 2)
episode 2
Works now thanks to Markku K!
from sys import *
import re
lines = []
tokens = ["PRINT"]
def open_file(filename):
data = open(filename, "r")
for line in data.readlines():
lines.append(line.replace('\n', ''))
with open (filename, "r") as filecontents:
data = filecontents.read().replace('\n', ' ')
return data
def lex(filecontents):
words = filecontents.split(" ")
filecontents = list(filecontents)
word = []
states = 0
statesRun = False
statesBool = True
string = ""
stringAmount = 0
toks = ""
i = 0.0
z = 0
for i in range(len(words)):
if words[int(i)].upper() == tokens[0].upper():
word.append("PRINT")
for char in lines[z]:
toks += char
if char == "\"":
if states == 0:
statesRun = True
if char == "\"" and statesBool == False:
states = 1
string += char
statesRun = False
statesBool = False
elif states == 1:
statesRun = False
if statesRun:
string += char
stringAmount += 1
word.append("STRING:" + string)
string = ""
statesBool = True
statesRun = False
states = 0
z += 1
return word
def parse(toks):
i = 0
while(i < len(toks)):
if toks[i].upper() + " " + toks[i+1][0:6].upper() == "PRINT STRING":
print(toks[i+1][7:])
i+=2
def run():
data = open_file(argv[1])
toks = lex(data)
parse(toks)
run()

Linebreak not working - Python

Somehow the linebreaks are not working as they should.
This is what I get:
Expected:
O meu u2 2 post
http://www.yahoo.com
1 Gosto, 0 Nao gosto
<BLANKLINE>
O meu u2 post
http://www.altavista.com
1 Gosto, 0 Nao gosto
Got:
'O meu u2 2 post\nhttp://www.yahoo.com\n1 Gosto, 0 Nao Gosto\n\nO meu u2\nhttp://www.yahoo.com\n1 Gosto, 0 Nao Gosto'
This is the code used in the function.
The important parts should be the str and showRecentComments functions
class Comments():
def __init__(self, u=None, text='', link=None):
self.u = u
self.text = text
self.link = link
self.topo = None
self.fim = None
def __str__(self):
actual = self.topo
s = ''
if actual == None:
return None
while actual != None:
if actual.seg == None:
s += str(actual)
actual = actual.seg
else:
s += str(actual) + '\n' + '\n'
actual = actual.seg
return s
def add(self,comment):
if self.topo == None:
self.topo = comment
self.fim = comment
else:
comment.seg = self.topo
self.topo.ant = comment
self.topo = comment
def remove(self,comment):
actual = self.topo
if (self.topo == self.fim) and (self.topo == comment):
self.topo = None
self.fim = None
while actual!=None:
if actual == comment:
if self.topo==comment:
actual.seg.ant = None
self.topo = actual.seg
elif self.fim==comment:
actual.ant.seg = None
self.fim = actual.ant
else:
actual.seg.ant = actual.ant
actual.ant.seg = actual.seg
break
else:
actual = actual.seg
def countLike(self):
count = 0
actual = self.topo
while actual != None:
if len(actual.likeList) >= 1:
count += 1
actual = actual.seg
else:
actual = actual.seg
return count
def showRecentComments(self,n):
count = 1
actual = self.topo
sC = ''
if actual == None:
return None
while actual != None:
if count < n:
sC += str(actual) + '\n' + '\n'
count += 1
actual = actual.seg
elif count == n:
sC += str(actual)
count += 1
actual = actual.seg
elif count > n:
break
return sC
Regards, Nelson Gregório
It looks like you're looking at the representation of the string, which will show you the newline characters as \n. If you print or write to e.g. stdout (sys.stdout.write(s)) the string instead, the newlines will be expanded.

Parsing Data from live website in Python Enumerate problem!

The following script is supposed to fetch a specific line number and parse it from a live website. It works for like 30 loops but then it seems like enumerate(f) stops working correctly... the "i" in the for loop seems to stop at line 130 instead of like 200 something. Could this be due to the website I'm trying to fetch data from or something else? Thanks!!
import sgmllib
class MyParser(sgmllib.SGMLParser):
"A simple parser class."
def parse(self, s):
"Parse the given string 's'."
self.feed(s)
self.close()
def __init__(self, verbose=0):
"Initialise an object, passing 'verbose' to the superclass."
sgmllib.SGMLParser.__init__(self, verbose)
self.divs = []
self.descriptions = []
self.inside_div_element = 0
def start_div(self, attributes):
"Process a hyperlink and its 'attributes'."
for name, value in attributes:
if name == "id":
self.divs.append(value)
self.inside_div_element = 1
def end_div(self):
"Record the end of a hyperlink."
self.inside_div_element = 0
def handle_data(self, data):
"Handle the textual 'data'."
if self.inside_div_element:
self.descriptions.append(data)
def get_div(self):
"Return the list of hyperlinks."
return self.divs
def get_descriptions(self, check):
"Return a list of descriptions."
if check == 1:
self.descriptions.pop(0)
return self.descriptions
def rm_descriptions(self):
"Remove all descriptions."
self.descriptions.pop()
import urllib
import linecache
import sgmllib
tempLine = ""
tempStr = " "
tempStr2 = ""
myparser = MyParser()
count = 0
user = ['']
oldUser = ['none']
oldoldUser = [' ']
array = [" ", 0]
index = 0
found = 0
k = 0
j = 0
posIndex = 0
a = 0
firstCheck = 0
fCheck = 0
while a < 1000:
print a
f = urllib.urlopen("SITE")
a = a+1
for i, line in enumerate(f):
if i == 187:
print i
tempLine = line
print line
myparser.parse(line)
if fCheck == 1:
result = oldUser[0] is oldUser[1]
u1 = oldUser[0]
u2 = oldUser[1]
tempStr = oldUser[1]
if u1 == u2:
result = 1
else:
result = user is oldUser
fCheck = 1
user = myparser.get_descriptions(firstCheck)
tempStr = user[0]
firstCheck = 1
if result:
array[index+1] = array[index+1] +0
else:
j = 0
for z in array:
k = j+2
tempStr2 = user[0]
if k < len(array) and tempStr2 == array[k]:
array[j+3] = array[j+3] + 1
index = j+2
found = 1
break
j = j+1
if found == 0:
array.append(tempStr)
array.append(0)
oldUser = user
found = 0
print array
elif i > 200:
print "HERE"
break
print array
f.close()
Perhaps the number of lines on that web page are fewer than you think? What does this give you?:
print max(i for i, _ in enumerate(urllib.urlopen("SITE")))
Aside: Your indentation is stuffed after the while a < 1000: line. Excessive empty lines and one-letter names don't assist the understanding of your code.
enumerate is not broken. Instead of such speculation, inspect your data. Suggestion: replace
for i, line in enumerate(f):
by
lines = list(f)
print "=== a=%d linecount=%d === % (a, len(lines))
for i, line in enumerate(lines):
print " a=%d i=%d line=%r" % (a, i, line)
Examine the output carefully.

Categories