Find, replace and print out updated string - python

I am trying to create a python script that has 10-20 lines of fixed data in a string with 2 special character's that need to be replaced with a different, random string using randomword()
import random, string
def randomword(length):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
junk = """
random_string1 = {};
random_string2 = {};
random_string3 = {};
random_string4 = {};
random_string5 = {};
"""
stra = string.replace(junk, '{}', randomword(40))
print (stra)
The two special characters in the string are {}, I would like to iterate through the string to find those characters and replace them with a different random string generated by randomword()
Above is as far as I got, this piece of code replaces all of the occurrences
of {} with a random string, but they have the same values, I would like to have differing values for each {}.
I don't know how to put this into a loop. Any help is appreciated.

use randint(97, 122)
import random
def randomword(length):
s = ""
for _ in range(length):
a = random.randint(97, 122)
s += chr(a)
return s
def replace(original_string, string_to_replace):
result_string = ""
while original_string.find(string_to_replace) >= 0:
pos = original_string.find(string_to_replace)
result_string += original_string[0: pos]
result_string += randomword(40)
next_pos = pos+2
original_string = original_string[next_pos:]
result_string += original_string[:]
return result_string
junk = """
random_string1 = {};
random_string2 = {};
random_string3 = {};
random_string4 = {};
random_string5 = {};
"""
stra = replace(junk, "{}")
print (stra)

Since you are already using the default placeholder for string formatting '{}', you can do:
>>> print(junk.format(*(randomword(40) for _ in range(junk.count('{}')))))
random_string1 = lazbdzezssemtsfknajawriafozpjwizigykvmac;
random_string2 = pxhkyrnjiqsvcivcppqqpbwuocsvbfauygdrwpuj;
random_string3 = ewhrsryjtfwtmulmqfqxzrzvyspiefrddpzrxkvq;
random_string4 = sqiulddoevddtieymjiexnmzezrdayvwigmsmgld;
random_string5 = evscqvrccknkulpkqchodcjlognsnrcxqcsexnrv;

Could use regex:
>>> print(re.sub('{}', lambda _: randomword(40), junk))
random_string1 = emgdidmmghkunqkwdfcnvsffdnfhvqrybhqdfklh;
random_string2 = mgggwchtzpuhntnfxzpsmkarkwnlghcwicnvxmpt;
random_string3 = liemdzegmzfpuozktclxnmfmavjkaxqhrfaldqkn;
random_string4 = ugchyijmsvzmeaiyzyaiudrnnfskzdboukvdwiye;
random_string5 = rfazvtvhygfixielqzbuhlnzvjjtrkhsdetomjri;

Related

how to convert bytes to binary using python

so i want convert bytes to binary in python, but when i run it, there's none in the result and i got error:
'NoneType' object is not iterable
here's the code i tried
//func biner
def biner(password):
print(password)
password[0]
for my_byte in password:
print(f'{my_byte:0>8b}', end=' ')
//func to use the result from func biner
def skalar(key, biner):
if len(key) <= key_bytes:
for x in range(len(key),key_bytes):
key = key + "0"
jumbin2 = len(key)
print(jumbin2)
jumbin = biner
print(biner)
hasilenkrip = ''
for a in jumbin:
hasilenkrip += a * jumbin2
print(hasilenkrip)
//how i called the func
enc = b'l\x87\x87\xe6'
ky = maru
tes = biner(enc)
tes1 = skalar(ky, tes)
print(tes1)
Your function currently returns None because there's no return statement. Perhaps instead of using print, you should modify your functions to return an array of outputs.
I suspect what you have in mind is something like this.
# //func biner
def biner(password):
print(password)
password[0]
return ' '.join(f'{my_byte:0>8b}' for my_byte in password)
# //func to use the result from func biner
def skalar(key, biner):
if len(key) <= key_bytes:
for x in range(len(key),key_bytes):
key = key + "0"
jumbin2 = len(key)
print(jumbin2)
jumbin = biner
print(biner)
hasilenkrip = ''
for a in jumbin:
hasilenkrip += a * jumbin2
return hasilenkrip
# print(hasilenkrip)
# //how i called the func
enc = b'l\x87\x87\xe6'
ky = maru
tes = biner(enc)
tes1 = skalar(ky, tes)
print(tes1)
some_bytes = int(input('Enter bytes: '))
print(f'{some_bytes>>4:04b}{some_bytes&0xf:04b} ({some_bytes})')

Markov Analysis, Formatting

I have a program that reads in a big chunk of text from a text file and then randomizes the content to display back as a short story based on the content of the text. The program works but the last part, where I am displaying the material is super clunky and not efficient and I was wondering if anyone had any ideas on how I could more efficiently take in the text and then display it as a string to the user but allow it to span multiple lines (wrap text, essentially) so that it is not just a giant string of text continuing to the right of the console.
from __future__ import print_function, division
import sys
import random
# global variables
suffix_map = {} # map from prefixes to a list of suffixes
prefix = () # current tuple of words
big_list = []
def process_file(filename, order=2):
"""Reads a file and performs Markov analysis.
filename: string
order: integer number of words in the prefix
returns: map from prefix to list of possible suffixes.
"""
fp = open(filename)
for line in fp:
for word in line.rstrip().split():
process_word(word, order)
def process_word(word, order=3):
"""Processes each word.
word: string
order: integer
During the first few iterations, all we do is store up the words;
after that we start adding entries to the dictionary.
"""
global prefix
if len(prefix) < order:
prefix += (word,)
return
try:
suffix_map[prefix].append(word)
except KeyError:
# if there is no entry for this prefix, make one
suffix_map[prefix] = [word]
prefix = shift(prefix, word)
def random_text(n=300):
"""Generates random wordsfrom the analyzed text.
Starts with a random prefix from the dictionary.
n: number of words to generate
"""
global big_list
# choose a random prefix (not weighted by frequency)
start = random.choice(list(suffix_map.keys()))
for i in range(n):
suffixes = suffix_map.get(start, None)
if suffixes == None:
random_text(n-i)
return
# choose a random suffix
word = random.choice(suffixes)
big_list.append(word + " ")
start = shift(start, word)
def shift(t, word):
"""Forms a new tuple by removing the head and adding word to the tail.
t : tuple of strings
word: string
Returns: tuple of strings
"""
return t[1:] + (word,)
def list_to_str_format():
global big_list
whole = " ".join(str(i) for i in big_list)
# 25 words per line
l1 = big_list[:25]
l2 = big_list[26:50]
l3 = big_list[51:75]
l4 = big_list[76:100]
l5 = big_list[101:125]
l6 = big_list[126:150]
l7 = big_list[151:175]
l8 = big_list[176:200]
l9 = big_list[201:225]
l10 = big_list[226:250]
l11 = big_list[256:275]
l12 = big_list[276:300]
str_1 = " ".join(str(i) for i in l1).capitalize()
str_2 = " ".join(str(i) for i in l2)
str_3 = " ".join(str(i) for i in l3)
str_4 = " ".join(str(i) for i in l4)
str_5 = " ".join(str(i) for i in l5)
str_6 = " ".join(str(i) for i in l6)
str_7 = " ".join(str(i) for i in l7)
str_8 = " ".join(str(i) for i in l8)
str_9 = " ".join(str(i) for i in l9)
str_10 = " ".join(str(i) for i in l10)
str_11 = " ".join(str(i) for i in l11)
str_12 = " ".join(str(i) for i in l12)
print(str_1)
print(str_2)
print(str_3)
print(str_4)
print(str_5)
print(str_6)
print(str_7)
print(str_8)
print(str_9)
print(str_10)
print(str_11)
print(str_12)
def main(filename, n=300, order=3):
try:
n = int(n)
order = int(order)
except ValueError as e:
print('Usage: %d filename [# of words] [prefix length]' % e)
else:
process_file(filename, order)
random_text(n)
list_to_str_format()
print()
main('C:\\Users\\Desktop\\TheBrothersKaramazov.txt')
i allowed myself to change your joining pattern which made a double space. you must import module re
def list_to_str_format(line_length=80):
global big_list
whole = "".join(str(i) for i in big_list)
regex = re.compile('(.*?(\s))*')
while whole != "":
break_pos = regex.match(whole[:line_length]).end()
print(whole[:break_pos])
whole = whole[break_pos:]

Python Regex matching wrong strings

I have the following python script which does regex matching using 'AND', 'OR' features as well:
class PyBoolReException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return str(self.value)
class PyBoolRe:
def __init__(self, boolstr):
# Require whitespace before words?
self.__needspace = True
# whitespace re
self._wspre = re.compile('^\s*$')
# create regexp string
self.__rexplist = []
oparct = boolstr.count('(')
clparct = boolstr.count(')')
if oparct != clparct:
raise PyBoolReException, 'Mismatched parantheses!'
self.__parse(boolstr)
# if NOT is one of the members, reverse
# the list
# print self.__rexplist
if '!' in self.__rexplist:
self.__rexplist.reverse()
s = self.__makerexp(self.__rexplist)
# print s
self.__rexp = re.compile(s)
def match(self, data):
""" Match the boolean expression, behaviour
is same as the 'match' method of re """
return self.__rexp.match(data)
def search(self, data):
""" Search the boolean expression, behaviour
is same as the 'search' method of re """
return self.__rexp.search(data)
def __parse(self, s):
""" Parse the boolean regular expression string
and create the regexp list """
# The string is a nested parantheses with
# any character in between the parens.
scopy = s[:]
oparmatch, clparmatch = False, False
# Look for a NOT expression
index = scopy.rfind('(')
l = []
if index != -1:
oparmatch = True
index2 = scopy.find(')', index)
if index2 != -1:
clparmatch = True
newstr = scopy[index+1:index2]
# if the string is only of whitespace chars, skip it
if not self._wspre.match(newstr):
self.__rexplist.append(newstr)
replacestr = '(' + newstr + ')'
scopy = scopy.replace(replacestr, '')
self.__parse(scopy)
if not clparmatch and not oparmatch:
if scopy: self.__rexplist.append(scopy)
def is_inbetween(self, l, elem):
""" Find out if an element is in between
in a list """
index = l.index(elem)
if index == 0:
return False
if index>2:
if index in range(1, len(l) -1):
return True
else:
return False
else:
return True
def __makenotexpr(self, s):
""" Make a NOT expression """
if s.find('!') == 0:
return ''.join(('(?!', s[1:], ')'))
else:
return s
def __makerexp(self, rexplist):
""" Make the regular expression string for
the boolean match from the nested list """
is_list = True
if type(rexplist) is str:
is_list = False
elem = rexplist
elif type(rexplist) is list:
elem = rexplist[0]
if type(elem) is list:
elem = elem[0]
eor = False
if not is_list or len(rexplist) == 1:
eor = True
word_str = '.*'
s=''
# Implementing NOT
if elem == '!':
return ''.join(('(?!', self.__makerexp(rexplist[1:]), ')'))
# Implementing OR
elif elem.find(' | ') != -1:
listofors = elem.split(' | ')
for o in listofors:
index = listofors.index(o)
in_bet = self.is_inbetween(listofors, o)
if o:
o = self.__makenotexpr(o)
if in_bet:
s = ''.join((s, '|', word_str, o, '.*'))
else:
s = ''.join((s, word_str, o, '.*'))
# Implementing AND
elif elem.find(' & ') != -1:
listofands = elem.split(' & ')
for a in listofands:
index = listofands.index(a)
in_bet = self.is_inbetween(listofands, a)
if a:
a = self.__makenotexpr(a)
s = ''.join((s, word_str, a, '.*'))
else:
if elem:
elem = self.__makenotexpr(elem)
s = ''.join((elem, '.*'))
if eor:
return s
else:
return ''.join((s, self.__makerexp(rexplist[1:])))
When the search phrase is as follows:
p = PyBoolRe('Python | Perl')
s1 = 'Guido invented Python'
s2 = 'Guido Perl'
if p.match(s1):
print 'Match found for first string'
else:
print 'No match found for first string'
if p.match(s2):
print 'Match found for second string'
else:
print 'No match found for second string'
Then both s1 & s2 match
But when the search phrase is:
p = PyBoolRe('Guido & (Python | Perl)')
s1 = 'Guido invented Python'
s2 = 'Guido Perl is great'
Then it should match if s1 or s2 has "Guido Python" or "Guido Perl". s2 has that but it does not match it. On the other hand, it matches s1, which it should not. Why is that?
Please help!! How can I get it to work??
Your generated expression is
.*Python.*|.*Perl.*.*Guido.*
while it should look like
(?=.*Guido.*)(?:.*Python.*|.*Perl.*)
So the parser needs some revision.
1) x|y should be enclosed into (?:...) (at least when used inside another block). Otherwise, | unluckily takes the global priority in the regexp.
2) x & y should be converted into (?=x)y (trailing context may be used to express the and between regular expressions)

Python String Converter; how do I convert a string in python

Suppose there is a string as ("Hello! Python is a great language!") then how would I convert Hello! into hijkefghlmnolmnoopqr! If h=hijk e=efgh l=lmno o=opqrs and so for rest of the string?
subs = {}
alpha = 'abcdefghijklmnopqrstuvwxyzabc'
for i in range(26):
subs[alpha[i]] = alpha[i:i+4]
s = "Hello! Python is a great language!"
s = ''.join(subs.get(c, c) for c in s.lower())
LETTERS = "abcdefghijklmnopqrstuvwxyzabc"
def translate_char(ch):
i = LETTERS.find(ch.lower())
if i == -1:
return ch
else:
return LETTERS[i:i + 4]
def translate_string(s):
return "".join(translate_char(ch) for ch in s)
print(translate_string("Hello!")) # => hijkefghlmnolmnoopqr!

How to ensure all string literals are unicode in python

I have a fairly large python code base to go through. It's got an issue where some string literals are strings and others are unicode. And this causes bugs. I am trying to convert everything to unicode. I was wondering if there is a tool that can convert all literals to unicode. I.e. if it found something like this:
print "result code %d" % result['code']
to:
print u"result code %d" % result[u'code']
If it helps I use PyCharm (in case there is an extension that does this), however I am would be happy to use a command like too as well. Hopefully such a tool exists.
You can use tokenize.generate_tokens break the string representation of Python code into tokens. tokenize also classifies the tokens for you. Thus you can identify string literals in Python code.
It is then not hard to manipulate the tokens, adding 'u' where desired:
import tokenize
import token
import io
import collections
class Token(collections.namedtuple('Token', 'num val start end line')):
#property
def name(self):
return token.tok_name[self.num]
def change_str_to_unicode(text):
result = text.splitlines()
# Insert a dummy line into result so indexing result
# matches tokenize's 1-based indexing
result.insert(0, '')
changes = []
for tok in tokenize.generate_tokens(io.BytesIO(text).readline):
tok = Token(*tok)
if tok.name == 'STRING' and not tok.val.startswith('u'):
changes.append(tok.start)
for linenum, s in reversed(changes):
line = result[linenum]
result[linenum] = line[:s] + 'u' + line[s:]
return '\n'.join(result[1:])
text = '''print "result code %d" % result['code']
# doesn't touch 'strings' in comments
'handles multilines' + \
'okay'
u'Unicode is not touched'
'''
print(change_str_to_unicode(text))
yields
print u"result code %d" % result[u'code']
# doesn't touch 'strings' in comments
u'handles multilines' + u'okay'
u'Unicode is not touched'
Try this (uses regex), and it's shorter than #unutbu's solution.
But there's s loop hole, the strings containing # won't work with this.
import re
scode = '''
print "'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'"""
# so are "comments"'''
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
fcode = '\n'.join(
[re.sub(x1,repl,i)
if not '#' in i
else re.sub(x1,repl,i[:i.find('#')])+i[i.find('#'):]
for i in scode.splitlines()])
print fcode
Outputs:
print u"'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
For # I have this (and it's longer than #unutbu's solution :| )
import re
scode = '''print "'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
'#### Hi' # 'Hi' '''
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def in_string(text,index):
curr,in_l,in_str,level = '',0,False,[]
for c in text[:index+1]:
if c == '"' or c == "'":
if in_str and curr == c:
instr = False
curr = ''
in_l -= 1
else:
instr = True
curr = c
in_l += 1
level.append(in_l)
return bool(level[index])
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
def handle_hashes(i):
if i.count('#') == 1:
n = i.find('#')
else:
n = get_hash_out_of_string(i)
return re.sub(x1,repl,i[:n]) + i[n:]
def get_hash_out_of_string(i):
n = i.find('#')
curr = i[:]
last = (len(i)-1)-''.join(list(reversed(i))).find('#')
while in_string(curr,n) and n < last:
curr = curr[:n]+' '+curr[n+1:]
n = curr.find('#')
return n
fcode = '\n'.join(
[re.sub(x1,repl,i)
if not '#' in i
else handle_hashes(i)
for i in scode.splitlines()])
print fcode
Output:
print u"'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
u'#### Hi' # 'Hi'

Categories