Is there a way to ignore escape sequences in python? - python

import re
import math
def encode(n, strng):
for z in range (n):
temp = []
spcPos = []
for i , x in enumerate(strng):
if x == ' ':
spcPos.append(i)
strng = re.sub(' ', '', strng)
for i in range (len(strng)):
temp.append(strng[i - n])
temp2 = []
ezEnu = 0
for i in temp:
if ezEnu in (spcPos):
temp2.append(' ')
ezEnu += 1
temp2.append(i)
ezEnu += 1
temp2 = ''.join(temp2)
temp2 = temp2.split()
temp = []
withSpc = []
withSpc2 = []
oglen = []
for i in temp2:
oglen.append(len(i))
for x in range (math.ceil(n / len(i))):
withSpc.append(i)
withSpc2.append(''.join(withSpc))
withSpc = []
newa = []
for i, x in enumerate(withSpc2):
for y in range(int(oglen[i])):
newa.append(x[y - n])
temp2 = []
ezEnu = 0
for i in newa:
if ezEnu in (spcPos):
temp2.append(' ')
ezEnu += 1
temp2.append(i)
ezEnu += 1
strng = ''.join(temp2)
return(''.join([str(n), ' ', strng]))
How can I take a string, and make it so the escape sequences are treated as regular characters?
I have this function encode that takes a string and encodes it. The problem is when I receive strings that have a / in or any escape sequence, it ignores them and does not treat it as part of the message to encode.
I have no control over the strings that come into the function, they are predefined.
From what I understand r'' only works on new strings.
Thanks :)

depending on what you want exactly you might check whether this is sufficient for your use case.
def encode(n, strng):
return repr(strng)[1:-1]

Related

Creating string based on first letters of each element of the list

Example:
list = [abcc, typpaw, gfssdwww]
expected result = atgbyfcpscpsadwwww
Any ideas?
This is what i made so far:
def lazy_scribe(sources: list):
result: str = ''
i = 0
while i < len(max(sources, key=len)):
for source in sources:
for char in source:
if i <= len(source):
result = result + source[int(i)]
else:
continue
i += 1 / (len(sources))
break
return result
sources = ["python", "java", "golang"]
print(lazy_scribe(sources))
print(len(sources))
result: "pjgyaoyvlhaaononngn". I dont know why there is "y" instead of t (7 char in result string)
If I understand the problem correctly, this should work.
list = ["abcc", "typpaw", "gfssdwww"]
max_len = len(max(list, key=len))
res = ""
char_iterator = 0
while char_iterator < max_len:
for word in list:
if char_iterator < len(word):
res += word[char_iterator]
char_iterator += 1
print(res)
Another possible solution is as follows:
l = ['abcc', 'typpaw', 'gfssdwww']
max_len = len(max(l, key=len))
padded_l = list(zip(*[e + " " * (max_len - len(e)) for e in l]))
''.join([''.join(e) for e in padded_l]).replace(' ', '')
find the longest string in the list
then pad all the strings in the list with blank space
use zip on the result list
join the elements and replace the blank space to get the desired result

Count of sub-strings that contain character X at least once. E.g Input: str = “abcd”, X = ‘b’ Output: 6

This question was asked in an exam but my code (given below) passed just 2 cases out of 7 cases.
Input Format : single line input seperated by comma
Input: str = “abcd,b”
Output: 6
“ab”, “abc”, “abcd”, “b”, “bc” and “bcd” are the required sub-strings.
def slicing(s, k, n):
loop_value = n - k + 1
res = []
for i in range(loop_value):
res.append(s[i: i + k])
return res
x, y = input().split(',')
n = len(x)
res1 = []
for i in range(1, n + 1):
res1 += slicing(x, i, n)
count = 0
for ele in res1:
if y in ele:
count += 1
print(count)
When the target string (ts) is found in the string S, you can compute the number of substrings containing that instance by multiplying the number of characters before the target by the number of characters after the target (plus one on each side).
This will cover all substrings that contain this instance of the target string leaving only the "after" part to analyse further, which you can do recursively.
def countsubs(S,ts):
if ts not in S: return 0 # shorter or no match
before,after = S.split(ts,1) # split on target
result = (len(before)+1)*(len(after)+1) # count for this instance
return result + countsubs(ts[1:]+after,ts) # recurse with right side
print(countsubs("abcd","b")) # 6
This will work for single character and multi-character targets and will run much faster than checking all combinations of substrings one by one.
Here is a simple solution without recursion:
def my_function(s):
l, target = s.split(',')
result = []
for i in range(len(l)):
for j in range(i+1, len(l)+1):
ss = l[i] + l[i+1:j]
if target in ss:
result.append(ss)
return f'count = {len(result)}, substrings = {result}'
print(my_function("abcd,b"))
#count = 6, substrings = ['ab', 'abc', 'abcd', 'b', 'bc', 'bcd']
Here you go, this should help
from itertools import combinations
output = []
initial = input('Enter string and needed letter seperated by commas: ') #Asking for input
list1 = initial.split(',') #splitting the input into two parts i.e the actual text and the letter we want common in output
text = list1[0]
final = [''.join(l) for i in range(len(text)) for l in combinations(text, i+1)] #this is the core part of our code, from this statement we get all the available combinations of the set of letters (all the way from 1 letter combinations to nth letter)
for i in final:
if 'b' in i:
output.append(i) #only outputting the results which have the required letter/phrase in it

generate a sequence with respect to subsequences in python

I try to generate the following sequences.
text = ACCCEBCE
target = 000000D0
a random text of different characters is generated. In the text sequence, if the following subsequences are found, the target is going to be D or E. Otherwise, the target will be 0.
ABC --> D
BCD --> E
I write the following code. It works well if I generate a small number of characters. But it does not give any output if I make timesteps = 1000 etc.
import string
import random as rn
import numpy as np
def is_subseq(x, y):
it = iter(y)
return all(any(c == ch for c in it) for ch in x)
def count(a, b, m, n):
# If both first and second string
# is empty, or if second string
# is empty, return 1
if ((m == 0 and n == 0) or n == 0):
return 1
# If only first string is empty
# and second string is not empty,
# return 0
if (m == 0):
return 0
# If last characters are same
# Recur for remaining strings by
# 1. considering last characters
# of both strings
# 2. ignoring last character
# of first string
if (a[m - 1] == b[n - 1]):
return (count(a, b, m - 1, n - 1) +
count(a, b, m - 1, n))
else:
# If last characters are different,
# ignore last char of first string
# and recur for remaining string
return count(a, b, m - 1, n)
# create a sequence classification instance
def get_sequence(n_timesteps):
alphabet="ABCDE"#string.ascii_uppercase
text = ''.join(rn.choices(alphabet, k=n_timesteps))
print(text)
seq_length=3
subseqX = []
subseqY = []
for i in range(0, len(alphabet) - seq_length, 1):
seq_in = alphabet[i:i + seq_length]
seq_out = alphabet[i + seq_length]
subseqX.append([char for char in seq_in])
subseqY.append(seq_out)
print(seq_in, "\t-->\t",seq_out)
y2 = []
match = 0
countlist=np.zeros(len(subseqX))
for i, val in enumerate(text):
found = False
counter = 0
for g, val2 in enumerate(subseqX):
listToStr = ''.join(map(str, subseqX[g]))
howmany = count(text[:i], listToStr, len(text[:i]),len(listToStr))
if is_subseq(listToStr, text[:i]):
if countlist[g] < howmany:
match = match + howmany
countlist[g] = howmany
temp = g
found = True
if found:
y2.append(subseqY[temp])
else:
y2.append(0)
print("counter:\t", counter)
print(text)
print(y2)
# define problem properties
n_timesteps = 100
get_sequence(n_timesteps)
It might be because of the depth of the recursive function. But I need to generate 1000 or 10000 characters.
How can I fix this problem? Any ideas?
I'm not sure I understand all you're trying to do (lots of code there), but I believe this simplified form of the function should work. It maintains a set of subsequences seen so far. It only extends them by adding the next letter when it is encountered. This allows the flagging to know if the prefix to the sequence up to the current character has been seen before.
def flagSequence(S,letters="ABCDE",seqLen=3):
subSeqs = set()
result = "0"
for c in S[:-1]:
p = letters.index(c)
subSeqs.add(c)
if p>0:
subSeqs.update([s+c for s in subSeqs if s[-1]==letters[p-1]])
if p in range(seqLen-1,len(letters)-1) and letters[p-seqLen+1:p+1] in subSeqs:
result += letters[p+1]
else:
result += "0"
return result
output:
text = "BDBACCBECEECAEAEDCAACBCCDDDBBDEEDABDBDE"
print(text)
print(flagSequence(text))
BDBACCBECEECAEAEDCAACBCCDDDBBDEEDABDBDE
000000000D00D0000ED00D0DDEEE00E00E00E0E
with more letters:
alphabet=string.ascii_uppercase
text = ''.join(rn.choices(alphabet, k=10000))
flags = flagSequence(text,alphabet)
print(text[:60])
print(flags[:60])
CHUJKAMWCAAIBXGIZFHALAWWFDDELXREMOQQVXFPNYJRQESRVEJKIAQILYSJ...
000000000000000000000M000000FM00FN00000G0OZK0RFTS0FKLJ0RJMZT...
with longer sequences:
alphabet=string.ascii_uppercase
text = ''.join(rn.choices(alphabet, k=10000))
flags = flagSequence(text,alphabet,seqLen=10)
print(text[200:260])
print(flags[200:260])
...PMZCDQXAOHVMTRLYCNCJABGGNZYAWIHJJCQKMMAENQFHNQTOQOPPGHVQZXZU...
...00N0000Y000WN000Z0O0K0000O0Z0X00KK00LNN00O000O00P0PQQ00WR0Y0...

Python compare partial string in a list with each other

I am trying to write a code to compare each string in a list to each other and then generate its regex for similarity
list = ["LONDON-UK-L16-N1",
"LONDON-UK-L17-N1",
"LONDON-UK-L16-N2",
"LONDON-UK-L17-N2",
"PARIS-France-L16-N2"]
I am trying to get an output as below
LONDON-UK-L(16|17)-N(1|2)
is that possible? thanks
Update: just to make it clear i am trying to
input: list, or strings
Action: compare list items to each other, and check for similarity (to fix it-first group of a string), and use regex for any other not similar part of item, so instead of having for items, we can have a single output (using regex)
output: regex to match not similar
input:
tez15-3-s1-y2
tez15-3-s2-y2
bro40-55-s1-y2
output:
tez15-3-s(1|2)-y2
,bro40-55-s1-y2
Its not entirely clear from your question what the exact problem is. Since the data you gave as an example is consistent and well ordered, this problem can be solved easily by simply splitting up the items in the list and categorising them.
loc_list = ["LONDON-UK-L16-N1", "LONDON-UK-L17-N1", "LONDON-UK-L16-N2",
"LONDON-UK-L16-N2", "PARIS-France-L16-N2"]
split_loc_list = [location.split("-") for location in loc_list]
locs = {}
for loc in split_loc_list:
locs.setdefault("-".join(loc[0:2]), {}).\
setdefault("L", set()).add(loc[2].strip("L"))
locs.setdefault("-".join(loc[0:2]), {}).\
setdefault("N", set()).add(loc[3].strip("N"))
for loc, vals in locs.items():
L_vals_sorted = sorted(list(map(int,vals["L"])))
L_vals_joined = "|".join(map(str,L_vals_sorted))
N_vals_sorted = sorted(list(map(int,vals["N"])))
N_vals_joined = "|".join(map(str,N_vals_sorted))
print(f"{loc}-L({L_vals_joined})-N({N_vals_joined})")
will output:
LONDON-UK-L(16|17)-N(1|2)
PARIS-France-L(16)-N(2)
Since there were only two tags here ("L" and "N"), I just wrote them into the code. If there are many tags possible, then you can strip by any letter using:
import re
split = re.findall('\d+|\D+', loc[2])
key, val = split[0], split[1]
locs.setdefault("-".join(loc[0:2]), {}).\
setdefault(key, set()).add(val)
Then iterate through all the tags instead of just fetching "L" and "N" in the second loop.
I post this new (second) implementation on this problem, I think more accurate and hope helpful:
import re
data = [
'LONDON-UK-L16-N1',
'LONDON-UK-L17-N1',
'LONDON-UK-L16-N2',
'LONDON-UK-L17-N2',
'LONDON-UK-L18-N2',
'PARIS-France-L16-N2',
]
def merge(data):
data.sort()
data = [y for y in [x.split('-') for x in data]]
for col in range(len(data[0]) - 1, -1, -1):
result = []
def add_result():
result.append([])
if headstr:
result[-1] += headstr.split('-')
if len(list(findnum)) > 1:
result[-1] += [f'{findstr}({"|".join(sorted(findnum))})']
elif len(list(findnum)) == 1:
result[-1] += [f'{findstr}{findnum[0]}']
if tailstr:
result[-1] += tailstr.split('-')
_headstr = lambda x, y: '-'.join(x[:y])
_tailstr = lambda x, y: '-'.join(x[y + 1:])
_findstr = lambda x: re.findall('(\D+)', x)[0] if re.findall('(\D+)', x) else ''
_findnum = lambda x: re.findall('(\d+)', x)[0] if re.findall('(\d+)', x) else ''
headstr = _headstr(data[0], col)
tailstr = _tailstr(data[0], col)
findstr = _findstr(data[0][col])
findnum = []
for row in data:
if headstr + findstr + tailstr != _headstr(row, col) + _findstr(row[col]) + _tailstr(row, col):
add_result()
headstr = _headstr(row, col)
tailstr = _tailstr(row, col)
findstr = _findstr(row[col])
findnum = []
if _findnum(row[col]) not in findnum:
findnum.append(_findnum(row[col]))
else:
add_result()
data = result[:]
return ['-'.join(x) for x in result]
print(merge(data)) # ['LONDON-UK-L(16|17)-N(1|2)', 'LONDON-UK-L18-N2', 'PARIS-France-L16-N2']
I've implemented the following solution:
import re
data = [
'LONDON-UK-L16-N1',
'LONDON-UK-L17-N1',
'LONDON-UK-L16-N2',
'LONDON-UK-L16-N2',
'PARIS-France-L16-N2'
]
def deconstruct(data):
data = [y for y in [x.split('-') for x in data]]
result = dict()
for x in data:
pointer = result
for y in x:
substr = re.findall('(\D+)', y)
if substr:
substr = substr[0]
if not substr in pointer:
pointer[substr] = {0: set()}
pointer = pointer[substr]
substr = re.findall('(\d+)', y)
if substr:
substr = substr[0]
pointer[0].add(substr)
return result
def construct(data, level=0):
result = []
for key in data.keys():
if key != 0:
if len(data[key][0]) == 1:
nums = list(data[key][0])[0]
elif len(data[key][0]) > 1:
nums = '(' + '|'.join(sorted(list(data[key][0]))) + ')'
else:
nums = ''
deeper_result = construct(data[key], level + 1)
if not deeper_result:
result.append([key + nums])
else:
for d in deeper_result:
result.append([key + nums] + d)
return result if level > 0 else ['-'.join(x) for x in result]
print(construct(deconstruct(data)))
# ['LONDON-UK-L(16|17)-N(1|2)', 'PARIS-France-L16-N2']
Don't use 'list' as a variable name... it's a reserved word.
import re
lst = ['LONDON-UK-L16-N1', 'LONDON-UK-L17-N1', 'LONDON-UK-L16-N2', 'LONDON-UK-L16-N2', 'PARIS-France-L16-N2']
def check_it(string):
return re.search(r'[a-zA-Z\-]*L(\d)*-N(\d)*', string)
[check_it(x).group(0) for x in lst]
will output:
['LONDON-UK-L16-N1',
'LONDON-UK-L17-N1',
'LONDON-UK-L16-N2',
'LONDON-UK-L16-N2',
'PARIS-France-L16-N2']
From there, look into groups and define a group to cover the pieces that you want to use for similarity.

Addition of two binaries numbers in Python

Hey guys i have a trouble when i want to add two binaries numbers in Python, i mean i can enter a chain of character in a form of a string but i don't know how to select a specific value in the chain. Here is my code:
chaina = input('Enter your first binary number')
chainb = input('Enter your second binary number')
liste = str()
r = 0
for i in range [-1,chaina]:
t = 0
t = chaina() + chainb() + r
if t == 2 :
r = 1
liste = str(t) + liste
elif t == 0 or t == 1:
r = 0
liste = str(t) + liste
To add two binary numbers chaina and chainb:
bin(eval('0b{} + 0b{}'.format(chaina, chainb)))
Or, if you want the binary number without the leading '0b':
format(eval('0b{} + 0b{}'.format(chaina, chainb)), 'b')
Explanation
Assume for illustration that chaina = '1010' and chainb = '1111'. Then:
>>> '0b{} + 0b{}'.format(chaina, chainb)
'0b1010 + 0b1111'
By applying eval() on this string, we get the same result as if we typed the expression 0b1010 + 0b1111 directly into Python console.
>>> 0b1010 + 0b1111
25
>>> eval('0b1010 + 0b1111')
25
Finally, bin() produces a binary representation of the number passed to it as an argument:
>>> bin(25)
'0b11001'
The same thing is accomplished by calling format() with a 'b' argument:
>>> format(25, 'b')
'11001'
All put together, we are getting the expressions shown above.
Why don't you simply convert them into decimal and add them as you would do with decimals:
y = '0b101010'
z = '0b101010'
print(int(y,2) + int(z,2))
print(bin((int(y,2) + int(z,2))))
Assuming that you want to do a binary sum by hand, you must:
process both numbers starting from the end (reversed will help here)
consistently add bits processing carry until the lengther of both numbers is exhausted
reorder the result bits (here again reversed)
Code could be (assuming that you can be sure that chaina and chainb only consist in 0 and 1 characters, no test for it here):
def binsum(chaina, chainb):
def next0(it):
"""Retrieve next digit from a binary representation, 0 when exhausted"""
try:
return int(next(it))
except StopIteration:
return 0
a = reversed(chaina) # reverse chains to start with lowest order bit
b = reversed(chainb)
r = 0
result = [] # future result
for i in range(n):
t = next0(a) + next0(b) + r # add with carry
if t > 1:
t -= 2
r = 1
else:
r = 0
result.append('1' if t else '0')
if r != 0: # do not forget last carry
result.append('1')
return ''.join(result)
A couple of suggestions
normalize the lengths of the bit strings
l0, l1 = map(len, (str0, str1))
if l0 < l1:
str0 = "0"*(l1-l0) + str0
elif l1 < l0:
str1 = "0"*(l0-l1) + str1
do a loop on the reversed strings elements and construct the binary string bottom up
remainder = 0
result = ""
for bit_0, bit1 in zip(reversed(str0), reversed(str1)):
bit_0, bit_1 = map(int, (bit_0, bit_1))
new_bit, remainder = f(bit_0, bit_1, remainder)
result = str(new_bit) + result
if remainder != 0
...
writing f(bit_0, bit_1, remainder) and treating what to do if remainder is not null at the end of the loop is left as an exercise.

Categories