Splitting an SMS Reply - python

When replying to an SMS, I have a limit of 160 characters. I currently have code set up to take a reply (which can be >160) and split it into a list of multiple texts each <160. It's also set up so that it keeps words whole. I included it:
repl='message to be sent. may be >160'
texts=[]
words=repl.split()
curtext=''
for word in words:
#for the first word, drop the space
if len(curtext)==0:
curtext+=word
#check if there's enough space left in the current message
elif len(curtext)<=155-(len(word)+1):
curtext+=' '+word
#not enough space. make a new message
else:
texts.append(curtext)
curtext=word
if curtext!='':
texts.append(curtext)
return texts
However, I now want to modify it so that it appends "reply m for more" to end of every second message. Any ideas on how to do this?
(I'm writing code in Python)

reply = "text to be sent ...."
texts = []
count = 0
current_text = []
for word in reply.split():
if count + len(word) < (160 if len(texts) % 2 == 0 else (160-17)):
current_text.append(word)
count += (len(word) + 1)
else:
count = 0
if len(texts) % 2 != 0):
#odd-numbered text gets additional message...
texts.append(" ".join(current_text) + "\nreply m for more")
else:
texts.append(" ".join(current_text))
current_text = []

def sms_calculator(msg_text):
sms_lst=[]
if len(msg_text) == 0:
return sms_lst
l_m_text = (msg_text.split())
if len(max(l_m_text, key=len))> 160:
return sms_lst
sms_string=l_m_text[0]
for i in range(1,len(l_m_text)):
if len(sms_string +' '+ l_m_text[i]) < 160 :
sms_string=sms_string +' '+ l_m_text[i]
else:
sms_lst.append(sms_string)
sms_string = l_m_text[i]
sms_lst.append(sms_string)
return sms_lst

Related

diff list of multiline strings with difflib without knowing which were added, deleted or modified

I have two lists of multiline strings and I try to get the the diff lines for these strings. First I tried to just split all lines of each string and handled all these strings as one big "file" and get the diff for it but I had a lot of bugs. I cannot just diff by index since I do not know, which multiline string was added, which was deleted and which one was modified.
Lets say I had the following example:
import difflib
oldList = ["one\ntwo\nthree","four\nfive\nsix","seven\neight\nnine"]
newList = ["four\nfifty\nsix","seven\neight\nnine","ten\neleven\ntwelve"]
oldAllTogether = []
for string in oldList:
oldAllTogether.extend(string.splitlines())
newAllTogether = []
for string in newList:
newAllTogether.extend(string.splitlines())
diff = difflib.unified_diff(oldAllTogether,newAllTogether)
So I somehow have to find out, which strings belong to each other.
I had to implmenent my own code in order to get the desired output. It is basically the same as Differ.compare() with the difference that we have a look at multiline blocks instead of lines. So the code would be:
diffString = ""
oldList = ["one\ntwo\nthree","four\nfive\nsix","seven\neight\nnine"]
newList = ["four\nfifty\nsix","seven\neight\nnine","ten\neleven\ntwelve"]
a = oldList
b = newList
cruncher = difflib.SequenceMatcher(None, a, b)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'replace':
best_ratio, cutoff = 0.74, 0.75
oldstrings = a[alo:ahi]
newstrings = b[blo:bhi]
for j in range(len(newstrings)):
newstring = newstrings[j]
cruncher.set_seq2(newstring)
for i in range(len(oldstrings)):
oldstring = oldstrings[i]
cruncher.set_seq1(oldstring)
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_old, best_new = cruncher.ratio(), i, j
if best_ratio < cutoff:
#added string
stringLines = newstring.splitlines()
for line in stringLines: diffString += "+" + line + "\n"
else:
#replaced string
start = False
for diff in difflib.unified_diff(oldstrings[best_old].splitlines(),newstrings[best_new].splitlines()):
if start:
diffString += diff + "\n"
if diff[0:2] == '##':
start = True
del oldstrings[best_old]
#deleted strings
stringLines = []
for string in oldstrings:
stringLines.extend(string.splitlines())
for line in stringLines: diffString += "-" + line + "\n"
elif tag == 'delete':
stringLines = []
for string in a[alo:ahi]:
stringLines.extend(string.splitlines())
for line in stringLines:
diffString += "-" + line + "\n"
elif tag == 'insert':
stringLines = []
for string in b[blo:bhi]:
stringLines.extend(string.splitlines())
for line in stringLines:
diffString += "+" + line + "\n"
elif tag == 'equal':
continue
else:
raise ValueError('unknown tag %r' % (tag,))
which result in the following:
print(diffString)
four
-five
+fifty
six
-one
-two
-three
+ten
+eleven
+twelve

How to replace special characters in string - selenium python

I have a piece of code as follows. I want to take a header and remove the special symbols " !##$%^&* " from it, but I've tried everything but still can't. Hope everyone can help, thank you very much
try:
title = driver.find_element(By.XPATH,'/html/body/main/section[2]/div/div/article/div[3]/p[1]/span').text
print(title)
if title.count("#") > 0:
titles.append(title)
titles[number] = title[0:title.index('#')]
number += 1
else:
titles.append(title)
number += 1
if titles[number-1] == '':
titles[number-1] = f"Invalid Title"
banned_char = '<>:"/\|?*'
for character in banned_char:
if title.count(character) > 0:
titles[number-1] = title[title.replace('<>:"/\|?*',' ')]
except:
titles.append(f'Failed Title number {number}')
number+=1
print(f'Download {number} have no title.')
I see two mistakes in your code
replace searchs exactly string '<>:"/\|?*' and you should replace every char separatelly .replace('<',' ').replace('>',' ').replace(':',' ') (or run it in for-loop)
you have to assign title = title.replace(), not title[title.replace(...)
banned_char = '<>:"/\|?*'
for character in banned_char:
title = title.replace(character,' ')
# --- after loop ---
titles[number-1] = title

Trying to add elements to list but getting out of range errors

import re
sifrelenmisdizi = []
kelimeler = []
bulunankelimeler = []
input = input("Lütfen Şifrelenmiş Veriyi giriniz : ")
def sifrecoz(message): #im cracking password here
encrypted = ""
for i in range(25):
for char in message:
value = ord(char) + 1
valuex = value % 123
if (valuex <= 0):
valuex = 97
encrypted += chr(valuex)
elif (valuex == 33):
encrypted += chr(32)
else:
encrypted += chr(valuex)
message = encrypted
sifrelenmisdizi.append(encrypted)
encrypted = ""
def kelime_getir(dosya_adi): # here im taking words on "kelimeler.txt"
with open(dosya_adi, 'r', encoding='utf-8') as input_file:
dosya_icerigi = input_file.read()
kelime_listesi = dosya_icerigi.split()
index = 0
while index <= 1164053:
kelimeler.append(kelime_listesi[index]) #here im taking that issue
index += 1
return kelimeler
sifrecoz(input)
kelime_getir("kelimeler.txt")
for i in range(len(kelimeler)):
for j in range(len(sifrelenmisdizi)):
x = re.split("\s", sifrelenmisdizi[j])
for k in range(len(x)):
if (kelimeler[i] == x[k]):
bulunankelimeler.append(kelimeler[i])
print("Kırılmış şifreniz : ",bulunankelimeler)
# selam daktilo dalga = ugnco eblujmp ebmhb
Here I am coding a password cracking program with Caesar decryption of encrypted data and compare with "kelimeler" list.
I'm trying to add words to "kelimeler" list but I'm taking out of range error.
This is my word list:
[URL=https://dosya.co/31174l7qq8zh/kelimeler.txt.html]kelimeler.txt - 16.9 MB[/URL]
It appears that the function kelime_getir is expected to return a list of all the words in the file (which has one word per line).
Therefore:
def kelime_getir(dosya_adi):
with open(dosya_adi, encoding='utf-8') as txt:
return list(map(str.strip, txt))
...is all you need

putting functions that read user input files into and loop using exceptions

The program is reading a file of keywords with number values attached to them. Then it is reading a file of a couple thousand tweets containing the latitude and longitude and the text of the tweet. You have to sort the tweets into specific regions and then calculate a sentiment average for each region based on the keywords and values of the first document. The user has to input these to two files and it has to have a try statement with exception errors. The functions work alone to calculate the proper values but when i go to put it in the try statement i get these errors:
Traceback (most recent call last):for line 129 main() and line 16 sortKets(keys). And last error line 56 keyword[lines[0]] = int(lines[1]) IndexError: list index out of range
is there anything i can do to fix it?
List item
eastern = []
central = []
mountain = []
pacific = []
keyword = {}
easternsum =[]
centralsum= []
mountainsum = []
pacificsum = []
def main() :
done = False
while not done:
try:
keys = input("Enter file: ")
readkeys(keys)
sortKeys(keys)
tweets = input("Enter second file: ")
readtweets(tweets)
sorttweet(tweets)
calcsentiment()
print("The eastern amount of tweets is",len(easternsum))
print("The eastern happiness score is",sum(easternsum)/len(easternsum))
print("The central amount of tweets is",len(centralsum))
print("The central happiness score is",sum(centralsum)/len(centralsum))
print("The mountain amount of tweets is",len(mountainsum))
print("The mountain happiness score is",sum(mountainsum)/len(mountainsum))
print("The pacific amount of tweets is",len(pacificsum))
print("The pacific happiness score is",sum(pacificsum)/len(pacificsum))
done = True
except IOError:
print("Error, file not found.")
except ValueError:
print("Invalid file.")
except RuntimeError as error:
print("Error", str(error))
def readkeys(keys):
keys = open(keys, "r")
def readtweets(tweets):
tweets = open(tweets, "r")
def sortKeys(keys):
for line in keys :
lines = line.split(",")
keyword[lines[0]] = int(lines[1])
def sorttweet(tweets) :
for line in tweets :
stuff = line.split(" ",5)
long = float(stuff[0].strip("[,"))
lat = float(stuff[1].strip('],'))
tweet = stuff[5]
if 24.660845 < long < 49.189787 and -87.518395 < lat < -67.444574 :
eastern.append(tweet)
if 24.660845 < long < 49.189787 and -101.998892 < lat < -87.518395 :
central.append(tweet)
if 24.660845 < long < 49.189787 and -115.236428 < lat < -101.998892 :
mountain.append(tweet)
if 24.660845 < long < 49.189787 and -125.242264 < lat < -115.236428 :
pacific.append(tweet)
def calcsentiment():
for tweet in eastern :
tweetlist = tweet.split()
count = 0
tweetV = 0
for word in tweetlist:
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
easternsum.append(tweetV / count)
for tweet in central:
tweetlist2 = tweet.split()
count = 0
tweetV = 0
for word in tweetlist2 :
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
centralsum.append(tweetV / count)
for tweet in mountain:
tweetlist3 = tweet.split()
count = 0
tweetV = 0
for word in tweetlist3 :
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
mountainsum.append(tweetV / count)
for tweet in pacific:
tweetlist4 = tweet.split()
count = 0
tweetV = 0
for word in tweetlist4 :
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
pacificsum.append(tweetV / count)
calcsentiment()
main()
You have a problem here:
def sortKeys(keys):
for line in keys :
lines = line.split(",")
keyword[lines[0]] = int(lines[1])
when you split the line, you don't get 2 tokens, just one.
That happens when the line you are trying to split does not contain a ',' character.
Try in python console something line "xxxx".split(",") and you will see the result is ["xxxx"], so a list with just one element, while in your code lines[1] tries to access the second element of a list.

Python Wiki Path Searching

On a personal whim I have written some code to search for the shortest series of links between any two Wikipedia articles. It turned out to be very brute force and takes a long long time to find the goal if it's more than a link or two deep, but it works! I will eventually keep track of and make use of the link paths and stuff, but I wanted to get the search working optimally first. Is there a faster way to do this or a good way to cut some major corners here?
import urllib2
from bs4 import BeautifulSoup
Start = 'http://en.wikipedia.org/wiki/Alan_Reid_%28politician%29'
End = 'http://en.wikipedia.org/wiki/Ayr'
#Using BeautifulSoup, this grabs the page
def soup_request(target):
request = urllib2.Request(target)
request.add_header("User-Agent", "Mozilla/5.0")
page = urllib2.urlopen(target)
soup = BeautifulSoup(page)
return soup
#This will grab all Wiki links off a given page
def get_links(Start):
soup = soup_request(Start)
Wiki_links = []
#Finds all links
for url in soup.findAll('a'):
result = url.get('href')
try:
if str(result)[:5] == '/wiki':
Wiki_links.append(result)
except:
pass
for q in range(len(Wiki_links)):
Wiki_links[q] = 'http://en.wikipedia.org'+str(Wiki_links[q])
print "Got new links from",Start
return Wiki_links
#This will check all the given links to see if the title matches the goal webpage
def check_links(Links,End):
goalsoup = soup_request(End)
goaltitle = goalsoup.html.title
Found = False
count = 0
for q in Links:
if Found:
break
length = len(Links)
#Runs through all the given links and checks their titles for correct one
if q is not None:
count += 1
soup = soup_request(q)
print "Checked",count,"links out of",length
try:
title = soup.html.head.title
if title == goaltitle:
Found = True
print "Found it!"
break
except:
print 'doh'
pass
return Found
#Top function to do all the stuff in the right order, applying a maximum depth of how deep into the links
def wiki_crawl(Start, End, depth):
Old_Links = [Start]
count = depth
while count > 0:
New_Links = []
for q in range(len(Old_Links)):
New_Links.extend(get_links(Old_Links[q]))
Found = check_links(New_Links,End)
if Found:
print "All done."
break
Old_Links = New_Links
count -= 1
print "_______________________________________________________________ROUND DONE"
if not Found:
print "Did not find the page, you must go deeper!"
wiki_crawl(Start, End, 2)
Here are some functions to take info from wiki. The only problems with it is that sometimes it takes out a space from the info on the webpage.
def take_out_parenthesis(st):
string = list(st)
for a in string:
if a == '(':
del string[st.find(a)]
if a == ')':
del string[st.find(a) - 1]
return ''.join(string)
def take_out_tags(string):
st = list(string)
odd = ['<', '>']
times = 0
for a in string:
if a in odd:
times += 1
times /= 2
for b in range(times):
start = string.find('<') - 1
end = string.find('>')
bet = end - start + 1
for a in range(bet):
del st[start]
string = ''.join(st)
return string
def take_out_brackets(string):
st = list(string)
odd = ['[', ']']
times = 0
for a in string:
if a in odd:
times += 1
times /= 2
for b in range(times):
start = string.find('[') - 1
end = string.find(']')
bet = end - start + 1
for a in range(bet):
del st[start]
string = ''.join(st)
return string
def take_from_web_page(text):
n = 0
url = text.replace(" ", "_")
search = "http://en.wikipedia.org/wiki/%s" % url
page = urllib2.urlopen(search).read()
start = page.find('<p><b>') + 6
end = page.find('</a>.', start) + 5
new_page = page[start:end]
for a in new_page:
if a == '<':
if new_page[n - 1] != ' ':
lst = list(new_page)
lst.insert(n, ' ')
new_page = ''.join(lst)
n += 1
n += 1
return take_out_parenthesis(take_out_brackets(take_out_tags(new_page)))

Categories