Rewrite path between nodes as data structure from string in python - python

I am trying to convert a string which shows an optimal path between nodes from a string to a data structure.
At each node A, B, C, ... my scanner either tells me a result of "Triple, Double, Single or none". Dependent on my result the optimal path tells me what node I should go to next.
i.e:
A double B triple
A double B double C double D triple
A double B double C double D double E double
A double B double C single D double
all paths
I want to convert a collection of these strings into a data structure like the following:
checkMap: {
node: "A",
double: {
node: "B",
double: {
node: "C",
double: {
node: "D",
double: {
node: "E"
}
},
single: {
node: "D"
}
}
},
single: {
node: "F"
}
}
This maps out which nodes connect to which and what result (single, double...) takes you there. If there are no nodes continuing that shows it terminates there.
My current attempt:
import csv which contains all routes as a pandas df, and put all the routes into a list and remove the final result (as result at termination point is not needed in structure):
paths = []
for i in df[0].str.split(" "):
if len(i) > 2:
paths.append(i[:-1])
Remove any duplicates (removing last result creates duplicates):
remove = []
for i in range(len(paths)):
for j in range(i + 1, len(paths)):
if paths[i] == paths[j]:
remove.append(j)
break
for i in sorted(remove, reverse=True):
del paths[i]
Then I remove paths which are subsets of other paths to simplify the problem:
remove = []
for i in range(len(paths)):
for j in range(i + 1, len(paths)):
if set(paths[i]).issubset(paths[j]):
remove.append(i)
break
for i in sorted(remove, reverse=True):
del paths[i]
Then it is here I am struggling to achieve the end result. At this point, I have the following list
simplified list
I made a recursive function which outputs each of the paths, but I can't think how to account for when the path splits into two (or more) paths for different results i.e. at A double and A single
def thing(arr, string = "", count = -1):
if len(arr) == 1:
string = string + 'node: "' + arr[0] + '"}'
string = string + count*"}" + "\n"
print(string)
return string
count = count + 1
string = string + 'node: "' + arr[0] + '",\n'
string = string + arr[1] + ': {\n'
thing(arr[2:], string, count)
return string
for i in range(len(paths)):
routes.append(thing(paths[i]))

Related

Find all possible combinations with replacement in a string

Here's the code I have so far:
from itertools import combinations, product
string = "abcd012345"
char = "01268abc"
for i, j in combinations(tuple(range(len(string))), 2):
for char1, char2 in product(char, char):
print(string[:i] + char1 + string[i+1:j] + char2 + string[j+1:])
So, the string is abcd012345, and I change two characters in order to find all possible combinations. The characters are 01268abc. In this example, we got 2880 combinations.
The goal is to set what character(s) will be in a specified place of the string. Example below:
from itertools import combinations, product
string = "abcd012345"
# place 0123456789
char_to change_for_place0 = "ab02"
char_to change_for_place1 = "14ah"
char_to change_for_place2 = "94nf"
char_to change_for_place3 = "a"
char_to change_for_place4 = "9347592"
char_to change_for_place5 = "93478nvg"
char_to change_for_place6 = "b"
char_to change_for_place7 = ""
char_to change_for_place8 = ""
char_to change_for_place9 = "84n"
for i, j in combinations(tuple(range(len(string))), 2):
for char1, char2 in product(char, char):
print(string[:i] + char1 + string[i+1:j] + char2 + string[j+1:])
Note:
some places can be empty and stays the same as in the place 7 and 8.
the number of places will be 64.
the number of characters to change will be 4, not 2 as in the example.
I will enjoy to learn from your solutions and ideas, thank you.
This boils down to adding the current letter of each position inside string to your current replacements for that position and then creating all possible combinations of those options:
from itertools import combinations, product
string = "abcd012345"
# must be of same lenght as (string), each entry correspond to the same index in string
p = ["ab02", "14ah", "94nf", "a", "9347592", "93478nvg", "b", "", "", "84n"]
errmsg = f"Keep them equal lenghts: '{string}' ({len(string)}) vs {p} ({len(p)})"
assert len(p)==len(string), errmsg
# eliminates duplicates from letter in string + replacments due to frozenset()
d = {idx: frozenset(v + string[idx]) for idx, v in enumerate(p)}
# creating this list take memory
all_of_em = [''.join(whatever) for whatever in product(*d.values())]
# if you hit a MemoryError creating the list, write to a file instead
# this uses a generator with limits memory usage but the file is going
# to get BIG
# with open("words.txt","w") as f:
# for w in (''.join(whatever) for whatever in product(*d.values())):
# f.write(w+"\n")
print(*all_of_em, f"\n{len(all_of_em)}", sep="\t")
Output:
2and2g234n 2and2g2348 2and2g2344 2and2g2345 2and27b34n
[...snipp...]
249d99234n 249d992348 249d992344 249d992345
100800
If you value the order of letters in your replacements, use
d = {idx: (v if string[idx] in v else string[idx]+v) for idx, v in enumerate(p)}
instead:
abcd012345 abcd012348 [...] 2hfa2gb344 2hfa2gb34n 115200
The difference in amounts is due to duplicate 9 in "9347592" wich is removed using frozensets.
To get only those that changed fewer then 5 things:
# use a generator comprehension to reduce memory usage
all_of_em = (''.join(whatever) for whatever in product(*d.values()))
# create the list with less then 5 changes from the generator above
fewer = [w for w in all_of_em if sum(a != b for a, b in zip(w, string)) < 5]

Replace all occurrences of the substring in string using string slicing

I want to replace all substring occurrences in a string, but I wish not to use the replace method. At the moment, experiments have led me to this:
def count_substrings_and_replace(string, substring, rpl=None):
string_size = len(string)
substring_size = len(substring)
count = 0
_o = string
for i in range(0, string_size - substring_size + 1):
if string[i:i + substring_size] == substring:
if rpl:
print(_o[:i] + rpl + _o[i + substring_size:])
count += 1
return count, _o
count_substrings_and_replace("aaabaaa", "aaa", "ddd")
but I have output like this:
dddbaaa
aaabddd
not dddbddd.
Update 1:
I figured out that I can only replace correctly with a string of the same length of substring. For example for count_substrings_and_replace("aaabaaa", "aaa", "d") I got output: (2, 'dbaad') not dbd
Update 2:
Issue described in update 1 did appear because of string comparing relative to the original string (line 8) that does not change throughout the process.
Fixed:
def count_substrings_and_replace(string, substring, rpl=None):
string_size = len(string)
substring_size = len(substring)
count = 0
_o = string
for i in range(0, string_size - substring_size + 1):
if _o[i:i + substring_size] == substring:
if rpl:
_o = _o[:i] + rpl + _o[i + substring_size:]
count += 1
return count, _o
count_substrings_and_replace("aaabaaa", "aaa", "d")
Output: (2, dbd)
You never update the value of _o when a match is found, you're only printing out what it'd look like if it was to be replaced. Instead, inside that innermost if statement should be two lines like:
_o = _o[:i] + rpl + _o[i + substring_size:]
print(_o)
That would print the string every time a match is found and replaced, moving the print statement to run after the for loop would make it only run once the entire string was parsed and replaced appropriately.
Just my mistake. I had to pass the value to the variable on each iteration not print:
_o = _o[:i] + rpl + _o[i + substring_size:]

Is there any way to convert the string back to previous one?

The Question Description:
You write all your passwords in a diary so that you don't forget them. But clearly, this is too risky, so you came up with a simple plan, you will simply write it by shifting all the alphabets by a certain step. For eg: if you decide your step to be 3, then 'a' will become 'd', and 'k' will become 'n' and so for all alphabets. The last alphabets will simply circle back to 'a'. In this case, 'y' will become 'b' and so on. Now you just have to remember the step size, can then you can check the password anytime you want.
Input:
A list of two elements.
The first element will be a string consisting of only alphabets that are taken from the diary and the second element will be the step size.
Output:
A string denoting the password
Sample input: ['ytLvei', 4]
Sample output: upHrae
Explanation:
The password was 'upHrae'. Upon changing it by step of 4,
u became y,
p became t,
H became L,
r became v,
a became e,
e became i,
and thus what was written in the diary was ytKveh
Sample input: ['banana', 7]
Sample output: utgtgt
To get the above output I have written the code is given below.
import at
n = ast.literal_eval(input())
n1 = n[0]
step = n[1]
def enc_dec(string,step):
result = ''
for i in string:
temp = ''
if i=='':
result = result+i
elif i.isupper():
temp = chr((ord(i) - step - 65) % 26 + 65)
else:
temp = chr((ord(i) - step - 97) % 26 + 97)
result = result + temp
return result
print(enc_dec(n1,step))
My sample test cases i.e.
Sample input1: ['banana', 7]
Sample output2: utgtgt
Sample input2: ['ytLvei', 4]
Sample output2: upHrae
Both the above sample inputs are passing but when I try to convert the 'upHare' to 'ytLvei' I am getting another output i.e. 'qlCnwz'.
Can anyone tell me why this is happening?
This is because you are changing it up by step of 4. You need to change it down by 4. Your code doesn't work either-- you can't add and subtract ints by strs in python!
Here is a new code that should work:
letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
def convert_str(string, step):
converted = ""
for char in string:
if char != "Z":
converted += letters[letters.index(char) + step]
else:
converted += letters[step-1]
return converted
in_str = input("string: ")
print(convert_str(in_str, 4))
This will make upHare to ytLevi.
To convert down, use negative numbers:
print(convert_str(in_str, -4))
The cipher as described rotates the alphabet in one direction to encode, and another to decode; they aren't interchangeable operations. If you aren't allowed to specify a negative step when testing your input, then define different functions for encoding/decoding:
def encode(message: str, step: int) -> str:
return enc_dec(message, step)
def decode(message: str, step: int) -> str:
return enc_dec(message, -step)
Now you can do:
>>> print(decode('upHrae', 4))
ytLvei
This should work even when the cases are different lower or higher
import ast
n = ast.literal_eval(input())
string = n[0]
step = n[1]
letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
l1='abcdefghijklmnopqrstuvwxyz'
l2='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
converted = ""
def convert_str(string, step):
converted = ""
for char in string:
if char.islower():
converted += l1[l1.index(char) + step]
elif char.isupper():
converted += l2[l2.index(char) + step]
elif char != "Z":
converted += letters[letters.index(char) + step]
print(converted)
else:
converted += letters[step-1]
return converted
print(convert_str(string, -step))

Is there a way to replace the first and last three characters in a list of sequences using Python?

I am attempting to use Python to replace certain characters in a list of sequences that will be sent out for synthesis. The characters in question are the first and last three of each sequence. I am also attempting to add a * between each character.
The tricky part is that the first and last character need to be different from the other two.
For example: the DNA sequence TGTACGTTGCTCCGAC would need to be changed to /52MOErT/*/i2MOErG/*/i2MOErT/*A*C*G*T*T*G*C*T*C*C*/i2MOErG/*/i2MOErA/*/32MOErC/
The first character needs to be /52MOEr_/ and the last needs to be /32MOEr_/, where the _ is the character at that index. For the example above it would be T for the first and C for the last. The other two, the GT and GA would need to be /i2MOEr_/ modifications.
So far I have converted the sequences into a list using the .split() function. The end result was ['AAGTCTGGTTAACCAT', 'AATACTAGGTAACTAC', 'TGTACGTTGCTCCGTC', 'TGTAGTTAGCTCCGTC']. I have been playing around for a bit but I feel I need some guidance.
Is this not as easy to do as I thought it would be?
You can just use the divide and conquer algorithm. Here's my solution to achieve your goal.
dna = "TGTACGTTGCTCCGAC"
dnaFirst3Chars = '/52MOEr' + dna[0] + '/*/i2MOEr' + dna[1] + '/*/i2MOEr' + dna[2] + '/*'
dnaMiddle = '*'.join(dna[3:-3])
dnaLast3Chars = '*/i2MOEr' + dna[-3] + '/*i2MOEr' + dna[-2] + '/*/32MOEr' + dna[-1] + '/'
dnaTransformed = dnaFirst3Chars + dnaMiddle + dnaLast3Chars
print(dnaTransformed)
Output:
/52MOErT/*/i2MOErG/*/i2MOErT/*A*C*G*T*T*G*C*T*C*C*/i2MOErG/*i2MOErA/*/32MOErC/
UPDATE:
For simplicity, you can transform the above code in a function like this:
def dna_transformation(dna):
""" Takes a DNA string and returns the transformed DNA """
dnaFirst3Chars = '/52MOEr' + dna[0] + '/*/i2MOEr' + dna[1] + '/*/i2MOEr' + dna[2] + '/*'
dnaMiddle = '*'.join(dna[3:-3])
dnaLast3Chars = '*/i2MOEr' + dna[-3] + '/*i2MOEr' + dna[-2] + '/*/32MOEr' + dna[-1] + '/'
return dnaFirst3Chars + dnaMiddle + dnaLast3Chars
print(dna_transformation("TGTACGTTGCTCCGAC")) # call the function
Output: /52MOErT/*/i2MOErG/*/i2MOErT/*A*C*G*T*T*G*C*T*C*C*/i2MOErG/*i2MOErA/*/32MOErC/
Assuming there's a typo in your expected result and it should actually be
/52MOErT/*/i2MOErG/*/i2MOErT/*A*C*G*T*T*G*C*T*C*C*/i2MOErG/*/i2MOErA/*/32MOErC/ the code below will work:
# python3
def encode_sequence(seq):
seq_front = seq[:3]
seq_back = seq[-3:]
seq_middle = seq[3:-3]
front_ix = ["/52MOEr{}/", "/i2MOEr{}/", "/i2MOEr{}/"]
back_ix = ["/i2MOEr{}/", "/i2MOEr{}/", "/32MOEr{}/"]
encoded = []
for base, index in zip(seq_front, front_ix):
encoded.append(index.format(base))
encoded.extend(seq_middle)
for base, index in zip(seq_back, back_ix):
encoded.append(index.format(base))
return "*".join(encoded)
Read through the code and make sure you understand it. Essentially we're just slicing the original string and inserting the bases into the format you need. Each element of the final output is added to a list and joined by the * character at the end.
If you need to dynamically specify the number and name of the bases you extract from the front and back of the sequence you can use this version. Note that the {} braces tell the string.format function where to insert the base.
def encode_sequence_2(seq, front_ix, back_ix):
seq_front = seq[:len(front_ix)]
seq_back = seq[-len(back_ix):]
seq_middle = seq[len(front_ix):-len(back_ix)]
encoded = []
for base, index in zip(seq_front, front_ix):
encoded.append(index.format(base))
encoded.extend(seq_middle)
for base, index in zip(seq_back, back_ix):
encoded.append(index.format(base))
return "*".join(encoded)
And here's the output:
> seq = "TGTACGTTGCTCCGAC"
> encode_sequence(seq)
/52MOErT/*/i2MOErG/*/i2MOErT/*A*C*G*T*T*G*C*T*C*C*/i2MOErG/*/i2MOErA/*/32MOErC/
If you have a list of sequences to encode you can iterate over the list and encode each:
encoded_list = []
for seq in dna_list:
encoded_list.append(encode_sequence(seq))
Or with a list comprehension:
encoded_list = [encode_sequence(seq) for seq in dna_list)]

Remove consecutive duplicate characters of even count from the given string

I'm trying to solve a problem where I get the string as input and then delete the duplicate characters of even count.
Input:azxxzyyyddddyzzz
Output: azzz
can you help me with this.
My Attempt is working fine for removing duplicate characters but I'm stuck at how to remove duplicate characters of even count
# Utility function to convert string to list
def toMutable(string):
temp = []
for x in string:
temp.append(x)
return temp
# Utility function to convert string to list
def toString(List):
return ''.join(List)
# Function to remove duplicates in a sorted array
def removeDupsSorted(List):
res_ind = 1
ip_ind = 1
# In place removal of duplicate characters
while ip_ind != len(List):
if List[ip_ind] != List[ip_ind-1]:
List[res_ind] = List[ip_ind]
res_ind += 1
ip_ind+=1
# After above step string is efgkorskkorss.
# Removing extra kkorss after string
string = toString(List[0:res_ind])
return string
# Function removes duplicate characters from the string
# This function work in-place and fills null characters
# in the extra space left
def removeDups(string):
# Convert string to list
List = toMutable(string)
# Sort the character list
List.sort()
# Remove duplicates from sorted
return removeDupsSorted(List)
# Driver program to test the above functions
string = "geeksforgeeks"
print removeDups(string)
Here's an attempt with itertools.groupby. I'm not sure if it can be done with better time complexity.
from itertools import groupby
def rm_even(s):
to_join = []
for _, g in groupby(s):
chars = list(g)
if len(chars) % 2:
to_join.extend(chars)
if to_join == s:
return ''.join(to_join)
return rm_even(to_join)
Demo:
>>> rm_even('azxxzyyyddddyzzz')
>>> 'azzz'
>>> rm_even('xAAAAx')
>>> ''
Count the letters with Counter and remove the ones that have even count:
from collections import Counter
word = 'azxxzyyyddddyzzz'
count = Counter(word) # Counter({'z': 5, 'y': 4, 'd': 4, 'x': 2, 'a': 1})
for key, value in count.items():
if value%2 == 0:
word = word.replace(key, "")
print(word) # 'azzzzz'
def remove_even_dup(string):
spans = []
for idx, letter in enumerate(string):
if not len(spans) or spans[-1][0] != letter:
spans.append((letter, {idx}))
else:
spans[-1][1].add(idx)
# reverse the spans so we can use them as a stack
spans = list(reversed(spans))
visited = []
while len(spans):
letter, indexes = spans.pop()
if len(indexes) % 2 != 0:
visited.append((letter, indexes))
else:
# if we have any previous spans we might need to merge
if len(visited):
prev_letter, prev_indexes = visited[-1]
next_letter, next_indexes = spans[-1]
# if the previous one and the next one have the same letter, merge them
if prev_letter == next_letter:
# remove the old
visited.pop()
spans.pop()
# add the new to spans to be visited
spans.append((letter, prev_indexes | next_indexes))
to_keep = { idx for _, indexes in visited for idx in indexes }
return ''.join(letter for idx, letter in enumerate(string) if idx in to_keep)
I used Collection because it is easy to delete and we have to convert into the string.
import java.util.*;
public class RemoveEvenCount {
public static void main(String[] args) {
//String a="azxxzyyyddddyzzz";
String a="xAAAAx";
ArrayList a2=new ArrayList<>();
for(int i=0;i<a.length();i++)
{
a2.add(a.charAt(i));
}
a2=removeData(a2);
System.out.print(a2);
}
public static ArrayList removeData(ArrayList a2)
{
if(a2.size()==2)
{
if(a2.get(0)==a2.get(1))
return null;
}
for(int i=0;i<a2.size();i++)
{
int count =1;
for(int j=0;j<a2.size()-1;j++)
{
if(a2.get(j)==a2.get(j+1))
{
count++;
}else if(count%2==0)
{
for(int k=0;k<count;k++)
{
a2.remove(j-k);
}
return removeData(a2);
}
Count=1
}
}
return a2;
}
}

Categories