Python compression string not quite right - python

I have the following code that is self explanatory in the docstring. How do I get it to not flag single letters with a 1, thereby turning a single digit into 2 in the final compressed string?
For example in the docstring it turns AAABBBBCDDDD -> A3B4C1D4 but I want it to turn into A3B4CD4. I'm new at this so it's any comments are greatly appreciated.
class StringCompression(object):
'''
Run Length Compression Algorithm: Given a string of letters, such as
nucleotide sequences, compress it using numbers to flag contiguous repeats.
Ex: AAABBBBCDDDD -> A3B4C1D4
>>>x = StringCompression('AAAAbC')
>>>x.compress()
'A4bC'
'''
def __init__(self, string):
self.string = string
def compress(self):
'''Executes compression on the object.'''
run = ''
length = len(self.string)
if length == 0:
return ''
if length == 1:
return self.string #+ '1'
last = self.string[0]
count = 1
i = 1
while i < length:
if self.string[i] == self.string[i - 1]:
count += 1
else:
run = run + self.string[i - 1] + str(count)
count = 1
i += 1
run = (run + self.string[i - 1] + str(count))
return run

Here's an alternative solution using itertools.groupby and a generator:
from itertools import chain, groupby
x = 'AAABBBBCDDDD'
def compressor(s):
for i, j in groupby(s):
size = len(list(j))
yield (i, '' if size==1 else str(size))
res = ''.join(chain.from_iterable(compressor(x)))
print(res)
A3B4CD4

Now it works the way I wanted it to. Thanks!
class StringCompression(object):
'''
Run Length Compression Algorithm: Given a string of letters, such as
nucleotide sequences, compress it using numbers to flag contiguous repeats.
Ex: AAABBBBCDDDD -> A3B4CD4
Notice that single letter do not get a 1 flag to prevent expansion.
>>>x = StringCompression('AAAAbC')
>>>x.compress()
'A4bC'
'''
def __init__(self, string):
self.string = string
def compress(self):
'''Executes compression on the object.'''
run = ''
length = len(self.string)
if length == 0:
return ''
if length == 1:
return self.string #+ '1'
last = self.string[0]
count = 1
i = 1
while i < length:
if self.string[i] == self.string[i - 1]:
count += 1
else:
run = run + self.string[i - 1] + str(count)
count = 1
i += 1
run = (run + self.string[i - 1] + str(count))
compressed_string = ''
for i in run:
if i != '1':
compressed_string += i
return compressed_string

Related

Using Python, how to print output string as -> aaa3bb2c1ddddd5 when Input string is aaabbcddddd

Using Python, how to print output string as -> aaa3bb2c1ddddd5 when Input string is aaabbcddddd
I want to concatenate actual character value and number of times a character is repeated in a string
def mycode(myString):
lenstr = len(myString)
print('length of string is '+str(lenstr));
for ele in myString:
count=0
for character in myString:
if character == ele:
count = count+1
totalstr = ele+str(count)
return totalstr
If the string is always sorted and grouped together like that, then you can use a collections.Counter to do it.
from collections import Counter
inp = "aaabbcddddd"
counter = Counter(inp)
out = "".join(k * v + str(v) for k,v in counter.items())
Or in one line:
print(''.join(k * v + str(v) for k,v in Counter(inp).items()))
Output:
aaa3bb2c1ddddd5
Or you can do it manually:
inp = "aaabbcddddd"
last = inp[0]
out = inp[0]
count = 1
for i in inp[1:]:
if i == last:
count += 1
else:
out += str(count)
count = 1
last = i
out += i
out += str(count)
print(out)
Here is a one line solution using a regex replacement with callback:
inp = "aaabbcddddd"
output = re.sub(r'((\w)\2*)', lambda m: m.group(1) + str(len(m.group(1))), inp)
print(output) # aaa3bb2c1ddddd5
Another one-liner:
import itertools
test = 'aaabbcddddd'
out = ''.join(f"{(g := ''.join(ig))}{len(g)}" for _, ig in itertools.groupby(test))
assert out == 'aaa3bb2c1ddddd5'
def char_counter_string(string):
prev_char = None
char_counter = 0
output = ''
for char_index in range(len(string)+1):
if char_index == len(string):
output += str(char_counter)
break
if string[char_index] != prev_char and prev_char is not None:
output += str(char_counter)
char_counter = 0
output += string[char_index]
char_counter += 1
prev_char = string[char_index]
return output
if __name__ == '__main__':
print(char_counter_string('aaabbcddddd'))
you can do like..
Code:
Time Complexity: O(n)
input_string="aaabbcddddd"
res=""
count=1
for i in range(1, len(input_string)):
if input_string[i] == input_string[i-1]:
count += 1
else:
res+=input_string[i-1]*count + str(count)
count = 1
res+=input_string[-1]*count + str(count)
print(res) #aaa3bb2c1ddddd5
Here's another way, ...
Full disclosure: ... as long as the run of characters is 10 or less, it will work. I.e., if there are 11 of anything in row, this won't work (the count will be wrong).
It's just a function wrapping a reduce.
from functools import reduce
def char_rep_count(in_string):
return reduce(
lambda acc, inp:
(acc[:-1]+inp+str(int(acc[-1])+1))
if (inp==acc[-2])
else (acc+inp+"1"),
in_string[1:],
in_string[0]+"1"
)
And here's some sample output:
print(char_rep_count("aaabbcdddd"))
aaa3bb2c1dddd4
I think this fulfils the brief and is also very fast:
s = 'aaabbcddddd'
def mycode(myString):
if myString:
count = 1
rs = [prev := myString[0]]
for c in myString[1:]:
if c != prev:
rs.append(f'{count}')
count = 1
else:
count += 1
rs.append(prev := c)
rs.append(f'{count}')
return ''.join(rs)
return myString

At leetcode, using Python3, how come this function be faster than the two simple others?

The exercise is at this Link.
How can a solution with ".split()" be faster than any solution running only a while counting the last word size? Like:
class Solution:
def lengthOfLastWord(self, s: str) -> int:
for word in s.split(' ')[::-1]:
if word != '':
return len(word)
Faster than:
class Solution:
def lengthOfLastWord(self, s: str) -> int:
firstLetter = len(s) -1
for i in range(firstLetter, -1, -1):
if s[i] != ' ':
firstLetter = i
break
for i in range(firstLetter, -1, -1):
if s[i] == ' ':
return firstLetter - i
return firstLetter+1
or
class Solution:
def lengthOfLastWord(self, s: str) -> int:
# trim the trailing spaces
p = len(s) - 1
while p >= 0 and s[p] == ' ':
p -= 1
# compute the length of last word
length = 0
while p >= 0 and s[p] != ' ':
p -= 1
length += 1
return length

How to replace all "&int-int" with the respective string slices in an input string?

I have a school project question (for Python) that goes like this:
Given a string_input such as "abcd&1-4efg", the function must remove the "&1-4" and insert the string slice from 1 to 4 where the "&1-4" was.
eg. if string_input = "abcd&1-4efg",
"&1-4" is removed.
The remaining characters are indexed as follows: a=0, b=1, c=2, d=3, e=4, f=5, g=6
The new string becomes:
"abcdbcdeefg"
I've managed to write a long chunk of code to do this, but I'm wondering if anyone has any more efficient solutions?
Things to note:
The instructions can include double digits (eg. &10-15)
If the index isn't found, the returned string should print "?" for every missing index
(eg. "abcd&5-10efgh" would return "abcdfgh???efgh")
Intructions can be back-to-back (eg. "&10-15abcdef&1-5&4-5pqrs")
The code I've written is:
def expand(text):
text += "|"
import string
digits_dash = string.digits + "-"
idx_ref_str = ""
replace_list = []
record_val = False
output_to_list = []
instruct = ""
and_idx_mark = 0
#builds replace_list & idx_ref_list
for idx in range(len(text)):
if text[idx] == "&" and record_val==True:
output_to_list.append(instruct)
output_to_list.append(and_idx_mark)
replace_list.append(output_to_list)
output_to_list, instruct, inst_idx, and_idx_mark = [],"",0,0
and_idx_mark = idx
continue
elif text[idx] == "&":
record_val = True
and_idx_mark = idx
continue
#executes if currently in instruction part
if record_val == True:
#adds to instruct
if text[idx] in digits_dash:
instruct += text[idx]
#take info, add to replace list
else:
output_to_list.append(instruct)
output_to_list.append(and_idx_mark)
replace_list.append(output_to_list)
output_to_list, instruct, inst_idx, and_idx_mark, record_val = [],"",0,0,False
#executes otherwise
if record_val == False:
idx_ref_str += text[idx]
idx_ref_str = idx_ref_str[:-1]
text = text[:-1]
#converts str to int indexes in replace list[x][2]
for item in replace_list:
start_idx = ""
end_idx = ""
#find start idx
for char in item[0]:
if char in string.digits:
start_idx += char
elif char == "-":
start_idx = int(start_idx)
break
#find end idx
for char in item[0][::-1]:
if char in string.digits:
end_idx = char + end_idx
elif char == "-":
end_idx = int(end_idx)
break
start_end_list = [start_idx,end_idx]
item+=start_end_list
#split text into parts in list
count = 0
text_block = ""
text_block_list = []
idx_replace = 0
for char in text:
if char == "&":
text_block_list.append(text_block)
text_block = ""
count += len(replace_list[idx_replace][0])
idx_replace +=1
elif count > 0:
count -= 1
else:
text_block += char
text_block_list.append(text_block)
#creates output str
output_str = ""
for idx in range(len(text_block_list)-1):
output_str += text_block_list[idx]
#creates to_add var to add to output_str
start_repl = replace_list[idx][1]
end_repl = replace_list[idx][1] + len(replace_list[idx][0])
find_start = replace_list[idx][2]
find_end = replace_list[idx][3]
if end_idx >= len(idx_ref_str):
gap = end_idx + 1 - len(idx_ref_str)
to_add = idx_ref_str[find_start:] + "?" * gap
else:
to_add = idx_ref_str[find_start:find_end+1]
output_str += to_add
output_str += text_block_list[-1]
return output_str
Here's how I would do it. Always open to criticism.
import re
s = 'abcd&1-4efg'
c = re.compile('&[0-9]+-[0-9]+')
if (m := c.search(s)):
a, b = m.span()
left = s[:a]
right = s[b:]
o = [int(x) for x in m.group(0)[1:].split('-')]
mid = (left+right)[o[0]:o[1]+1]
print(left + mid + right)

Kattis Polish Notation challenge in Python

I'm trying to do the polish notation challenge on kattis.com. Thing is, I feel I have done everything they asked for and I've tried fixing everything I could think of. I even looked up some other's solutions and while theirs are more clean I want to continue on mine as I am learning.
Why is it that for example this person's code works but not mine?
Here is my current code:
import sys
case = 1
valid_ints = set([str(i) for i in range(-10,11)])
def simplify(index, myLine, processed):
while index+1 > 0:
if (myLine[index] == "+" or myLine[index] == "-" or myLine[index] == "*") and index < len(myLine)-2:
if myLine[index+1] in valid_ints and myLine[index+2] in valid_ints:
try:
processed = myLine[index+3:] + processed
a = str(myLine[index+1] + myLine[index] + myLine[index+2])
processed.insert(0, str(eval(a)))
del myLine[index:]
except:
processed = [myLine[index], myLine[index+1], myLine[index+2]] + processed
del myLine[index:]
elif len(myLine) < 3:
processed = myLine + processed
del myLine[index]
index -= 1
processed = myLine + processed
return processed
for line in sys.stdin:
myLine = line.split()
processed = []
index = len(myLine)-1
savedprocessed = []
processed = simplify(index, myLine, processed)
while True:
if savedprocessed == processed:
break
else:
savedprocessed = []
savedprocessed += processed
processed = simplify(len(processed)-1, processed, [])
result = " ".join(savedprocessed)
print("Case " + str(case) + ": " + result)
case += 1
if case > 5:
break
You're bringing some other language style to Python, that's unnecessary because Python is more flexible.
I've simplified as much as I can here.
Split the input string on white spaces and iterate over the tokens.
For every operator in the expression, push a list onto the stack and append the operator and its operands to the list.
Now pop each list off the stack and process the list
def simplify(exp):
stack1 = []
ops = set('+*-')
for token in exp.split():
if token in ops:
stack1.append([])
stack1[-1].append(token)
stack2 = []
while stack1:
top = stack1.pop()
while len(top) < 3 and stack2:
top.append(stack2.pop())
if any(x.isalpha() for x in top):
simplified = ' '.join(top)
else:
top[0], top[1] = top[1], top[0]
simplified = str(eval(''.join(top)))
stack2.append(simplified)
return simplified
exp = '* - 6 + x -6 - - 9 6 * 0 c'
print(exp)
simplify(exp)
Output;
* - 6 + x -6 - - 9 6 * 0 c
* - 6 + x -6 - - 3 * 0 c

Equivalent strings don't give equivalent bitstrings in Python

I'm currently making an encryption program for an assignment however I cannot decrypt. The cause of this is that the key is a string made from a randomly generated bitstring however when turning the key back into a bitstring I get a different bitstring. I realized this after checking and finding out that the bitstring after turning the key back to binary is shorter than the bitstring used to make the key.
##imports##################################################
from socket import *
import random
##functions################################################
def CCipher(message, k):
output = ""
for x in message:
i = ord(x)
i = (i+k)%128
output += chr(i)
return output
def toBinary(message):
output = ""
for x in message:
i = bin(ord(x))[2:]
output += i
return output
def XOR(bitstring1, bitstring2):
output = ""
if(len(bitstring1) != len(bitstring2)):
print("Use bitstrings of the same length")
return None
for x in range(len(bitstring1)):
if(bitstring1[x] == "1" and bitstring2[x] == "0"):
output += "1"
elif(bitstring1[x] == "0" and bitstring2[x] == "1"):
output += "1"
else:
output += "0"
return output
def randomBinary(k):
output = ""
for x in range(k):
i = random.randint(0,1)
output = output + str(i)
return output
def toString(message):
output = ""
i = ""
n = 0
for x in message:
n += 1
i += x
if(n == 7):
output += chr(int(i,2))
n = 0
i = ""
return output
##server stuff#########################################
serverName = "OmariPC"
serverPort = 12347
clientSocket = socket(AF_INET,SOCK_STREAM)
clientSocket.connect((serverName,serverPort))
##files################################################
nowar = open("NoWar.dat",'r')
trump = open("Trump.dat","w")
##encryption###########################################
message = nowar.read()
mesBin = toBinary(message)
bkey = randomBinary(len(mesBin))
encrypted = XOR(mesBin, bkey)
encrypted = toString(encrypted)
key = toString(bkey)
trump.write(encrypted)
trump.close()
enKey = CCipher(key, 4)
##sending###############################################
clientSocket.send(enKey.encode())
print(len(enKey))
clientSocket.send(encrypted.encode())
print(len(encrypted))
##testing###############################################
if key == toString(bkey):
print(True)
else:
print(False)
if len(toBinary(key)) == len(bkey):
print(True)
else:
print(False)
output:
168
168
True
False
def toBinary(message):
output = ""
for x in message:
i = bin(ord(x))[2:]
output += i
return output
bin(ord(x)) generates strings of variable length. For example:
>>> bin(ord(' '))
0b100000
>>> bin(ord('a'))
0b1100001
You concatenate all of the results of bin() together, so there's no way to separate them later. Meanwhile, your toString() function reads exactly 7 "bits" at a time.
You could make bin() append chunks of a fixed-size instead:
# Pad the left with 0s to always generate a string of length 7.
# (Assumes that all input characters are ASCII.)
assert(ord(x) < 128)
bin(ord(x))[2:].rjust(7, '0')

Categories