Python, find all the possible letter combinations in given morse code

Python, find all the possible letter combinations in given morse code - python

I had to find all the possible letter combinations in a given morse code. The length of the decoded word can be maximum 10 letters. The given file with the letters and the morse code to it looks like this:
A .-
B -...
C -.-.
D -..
E .
F ..-.
G --.
H ....
I ..
J .---
K -.-
L .-..
M --
N -.
O ---
P .--.
Q --.-
R .-.
S ...
T -
U ..-
V ...-
W .--
X -..-
Y -.--
Z --..
The given morse code is this:
morse = '-.----.-.-...----.-.-.-.----.-'
My code looks like this:
def morse_file_to_dict(filename):
with open(filename) as file:
return dict(line.strip().split() for line in file)
def word_to_morse(s, my_dict):
return ''.join([my_dict[w] for w in s])
def adding_to_set(given_morse, my_set, my_dict, word='', start=0):
for char in my_dict:
if my_dict[char] == given_morse[start:start + len(my_dict[char])] and len(word) < 10:
start = start + len(my_dict[char])
word = word + char
adding_to_set(given_morse, my_set, my_dict, word, start)
if word_to_morse(word, my_dict) == given_morse:
my_set.add(word)
words = set()
morse = '-.----.-.-...----.-.-.-.----.-'
pairs = morse_file_to_dict('morse_alphabet.txt')
adding_to_set(morse, words, pairs)
print(len(words))
print(words)
My output is:
5
{'KMCBMQRKMK', 'KMCBMGKRMQ', 'KMCBMGCKMK', 'KMNCEJCCMQ', 'KMCDAMCCMQ'}
BUT, the answer should be: 10571 words, not 5
What should i change to get all of them?
Thank you for your time and answer!

I would suggest using recursion and a dictionary to map morse code to letters (not letters to morse code):
morseFile="""A .-
B -...
C -.-.
D -..
E .
F ..-.
G --.
H ....
I ..
J .---
K -.-
L .-..
M --
N -.
O ---
P .--.
Q --.-
R .-.
S ...
T -
U ..-
V ...-
W .--
X -..-
Y -.--
Z --.."""
morse = {code:letter for line in morseFile.split("\n") for letter,code in [line.split()]}
The function can be built as a generator to avoid storing all the possibilities in a big list:
def decode(coded,maxLen=10):
if not maxLen: return
for size in range(1,min(4,len(coded))+1):
code = coded[:size]
if code not in morse: continue
remaining = coded[size:]
if not remaining: yield morse[code]
for rest in decode(remaining,maxLen-1):
yield morse[code] + rest
output:
print(sum(1 for _ in decode("-.----.-.-...----.-.-.-.----.-")))
10571
for string in decode("-.----.-.-...----.-.-.-.----.-"):
if len(string)<9: print(string)
YQLWGCYQ
YQLWQRYQ
YQLJNCYQ
YQLJKRYQ
YQLJCNYQ
YQLJCKWQ
YQLJCKJK
YQLJCCMQ
YQLJCCOK

Here is a working solution. I made changes from codes and suggestions in comments and answers. (The Morse to translate is different too)
def word_to_morse(s, my_dict):
return ''.join([my_dict[w] for w in s])
def adding_to_set(given_morse, my_set, my_dict, word='', start=0):
for char in my_dict:
if my_dict[char] == given_morse[start:start + len(my_dict[char])] and len(word) < 10:
new_start = start + len(my_dict[char])
new_word = word + char
adding_to_set(given_morse, my_set, my_dict, new_word, new_start)
if word_to_morse(new_word, my_dict) == given_morse:
my_set.add(new_word)
words = set()
# the morse code I want to decrypt
morse = '.-.--...-....-.'
# adding morse alphabet here
pairs={'A': '.-', 'B': '-...', 'C': '-.-.',
'D': '-..', 'E': '.', 'F': '..-.',
'G': '--.', 'H': '....', 'I': '..',
'J': '.---', 'K': '-.-', 'L': '.-..',
'M': '--', 'N': '-.', 'O': '---',
'P': '.--.', 'Q': '--.-', 'R': '.-.',
'S': '...', 'T': '-', 'U': '..-',
'V': '...-', 'W': '.--', 'X': '-..-',
'Y': '-.--', 'Z': '--..',
}
adding_to_set(morse, words, pairs)
print(len(words))
print(words)

c++ solution:
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
char buffer[26];
int l=0;
char *Morse[26];
//initializing Morse Code array
void initMorse(){
Morse[0] = "._" ;
Morse[1] = "_...";
Morse[2] = "_._." ;
Morse[3] = "_.." ;
Morse[4] = "." ; //E
Morse[5] = ".._." ;
Morse[6] = "__." ;
Morse[7] = "...." ; //H
Morse[8] = ".." ; //I
Morse[9] = ".___" ; //J
Morse[10] = "_._" ; //K
Morse[11] = "._.." ;
Morse[12] = "__" ; //M
Morse[13] = "_." ;
Morse[14] = "___" ; //O
Morse[15] = ".__." ; //P
Morse[16] = "__._" ;
Morse[17] = "._." ; //R
Morse[18] = "..." ;
Morse[19] = "_" ;
Morse[20] = ".._" ;
Morse[21] = "..._" ; //V
Morse[22] = ".__" ;
Morse[23] = "_.._" ;
Morse[24] = "_.__" ;
Morse[25] = "__.." ; //Z
}
int solution(char *s,int strt,char **Morse,int len){
int i,j,noMatch=0,k,prev,tem;
int mlen;
if(strt!=len)
for(i=0;i<26;i++){
mlen=strlen(Morse[i]);
if(strt+mlen<=len){
for(j=strt,k=0;j<strt+mlen&&k<mlen;j++,k++){
if(Morse[i][k]==s[j])
continue;
else {
noMatch=1;
break;
}
}
}
else{
continue;
}
if(noMatch==0){
//print pattern when complete string matched
if(strt+mlen==len){
buffer[l]=i+65;
printf("%s\n",buffer);
buffer[l]=0;
}
else{
noMatch=0;
buffer[l]=i+65;
l++;
solution(s,strt+mlen,Morse,len);
l--; // while backtracking
buffer[l]=0; // clearing buffer just upto the previous location
}
}
else{
noMatch=0;
}
}
else{
buffer[l]=0;
}
return 1;
}
int main() {
char s[100];
printf("Enter the input string of Morse code:\n");
scanf("%s",s);
initMorse();
printf("Possible translations are:\n");
solution(s,0,Morse,strlen(s));
for
return 0;
}

Related

Converting unicode to gsm encoding in golang

I am working on migrating my project in python to golang and I have a use case for converting utf-8 encoding to corresponding gsm ones if possible. I am very new to go, it will be really helpful to get some documentation or examples around it.
For example: Python snippet
ằ as unicode -> a after gsm encoding
for character in text:
if is_gsm(character):
transliterated_text += character.encode('utf-8')
continue
if is_nonascii_utf8(character):
transliterated_char = unidecode.unidecode(character)
if transliterated_char == '?' or transliterated_char == '':
gsm = False
break
if transliterated_char != rc:
character = transliterated_char
transliterated_text += character
else:
transliterated_text += character.encode('utf-8')
if gsm and is_gsm(transliterated_text.decode('utf-8')):
text = transliterated_text.decode('utf-8')
Thanks

You can do it in this way:
package main
import (
"fmt"
"regexp"
"strings"
)
var utf8GsmChars = map[string]string{
`#`: "\x00", `£`: "\x01", `$`: "\x02",
`¥`: "\x03", `è`: "\x04", `é`: "\x05",
`ù`: "\x06", `ì`: "\x07", `ò`: "\x08",
`Ç`: "\x09", `Ø`: "\x0B", `ø`: "\x0C",
`Å`: "\x0E", `Δ`: "\x10", `_`: "\x11",
`Φ`: "\x12", `Γ`: "\x13", `Λ`: "\x14",
`Ω`: "\x15", `Π`: "\x16", `Ψ`: "\x17",
`Σ`: "\x18", `Θ`: "\x19", `Ξ`: "\x1A",
`Æ`: "\x1C", `æ`: "\x1D", `ß`: "\x1E",
`É`: "\x1F", `Ä`: "\x5B", `Ö`: "\x5C",
`Ñ`: "\x5D", `Ü`: "\x5E", `§`: "\x5F",
`¿`: "\x60", `ä`: "\x7B", `ö`: "\x7C",
`ñ`: "\x7D", `ü`: "\x7E", `à`: "\x7F",
`^`: "\x1B\x14", `{`: "\x1B\x28",
`}`: "\x1B\x29", `\`: "\x1B\x2F",
`[`: "\x1B\x3C", `~`: "\x1B\x3D",
`]`: "\x1B\x3E", `|`: "\x1B\x40",
`€`: "\x1B\x65",
}
var gsmUtf8Chars = map[string]string{
"\x00": "\x40",
"\x01": "\xC2\xA3",
"\x02": "\x24",
"\x03": "\xC2\xA5",
"\x04": "\xC3\xA8",
"\x05": "\xC3\xA9",
"\x06": "\xC3\xB9",
"\x07": "\xC3\xAC",
"\x08": "\xC3\xB2",
"\x09": "\xC3\x87",
"\x0B": "\xC3\x98",
"\x0C": "\xC3\xB8",
"\x0E": "\xC3\xB8",
"\x0F": "\xC3\xA5",
"\x10": "\xCE\x94",
"\x11": "\x5F",
"\x12": "\xCE\xA6",
"\x13": "\xCE\x93",
"\x14": "\xCE\xA0",
"\x15": "\xCE\xA9",
"\x16": "\xCE\xA0",
"\x17": "\xCE\xA8",
"\x18": "\xCE\xA3",
"\x19": "\xCE\x98",
"\x1A": "\xCE\x9E",
"\x1C": "\xC3\x86",
"\x1D": "\xC3\xA6",
"\x1E": "\xC3\x9F",
"\x1F": "\xC3\x89",
"\x20": "\x20",
"\x24": "\xC2\xA4",
"\x40": "\xC2\xA1",
"\x5B": "\xC3\x84",
"\x5C": "\xC3\x96",
"\x5D": "\xC3\x91",
"\x5E": "\xC3\x9C",
"\x5F": "\xC2\xA7",
"\x60": "\xC2\xBF",
"\x7B": "\xC3\xA8",
"\x7C": "\xC3\xB6",
"\x7D": "\xC3\xB1",
"\x7E": "\xC3\xBC",
"\x7F": "\xC3\xA0",
}
func UTF8ToGsm0338(text string) string {
var s string = text
for k, v := range utf8GsmChars {
s = strings.Replace(s, k, v, -1)
}
re := regexp.MustCompile("[\\x{0080}-\\x{10FFFF}]")
s = re.ReplaceAllString(s, "?")
return s
}
func GSM0338ToUTF8(text string) string {
var s string = text
for k, v := range gsmUtf8Chars {
s = strings.Replace(s, k, v, -1)
}
return s
}
func main() {
s := "Hello World"
gsm := UTF8ToGsm0338(s)
utf8 := GSM0338ToUTF8(gsm)
fmt.Printf("word before: %s\nword after gsm: %s\nword after utf8: %s\n", s, gsm, utf8)
}

why are the spaces in between the words not showing up

I have a morse program but the spaces in between the words are not showing does any one have any ideas? Prefer the simplest way to do so
sample input:
APRIL FOOLS DAY
output for encode_Morse function:
' .- .--. .-. .. .-.. ..-. --- --- .-.. ... -.. .- -.-- '
output for the decode_Morse function:
APRILFOOLSDAY
MORSE_CODES={'A':' .- ','B':' -... ','C':' -.-. ',
'D':' -.. ','E':' . ','F':' ..-. ','G':' --. ',
'H':' .... ','I':' .. ','J':' .--- ','K':' -.- ',
'L':' .-.. ','M':' -- ','N':' -. ','O':' --- ',
'P':' .--. ','Q':' --.- ','R':' .-. ',
'S':' ... ','T':' - ','U':' ..- ','V':' ...- ',
'W':' .-- ','X':' -..- ','Y':' -.-- ','Z':' --.. '}
##Define functions here
def encode_Morse(my_msg):
#my_msg=my_msg.upper()
my_msg_Morse=""
for letter in my_msg:
if letter!=" " and letter not in MORSE_CODES:
my_msg_Morse+="*"
elif letter!=" ":
my_msg_Morse+= MORSE_CODES[letter]
else:
my_msg_Morse+=" "
return my_msg_Morse+""
def decode_Morse(my_msg):
string=""
for word in my_msg.split(" "):
for ch in word.split():
if ch!=" " and ch!="*":
string=string + list(MORSE_CODES.keys())[list(MORSE_CODES.values()).index(" "+ch+" ")]
elif ch==" ":
string+=" "
string=string+""
return string

The split function absorbes your delimiter
I propose :
def decode_Morse(my_msg):
words = []
for word in my_msg.split(" "):
string = ""
for ch in word.split():
string=string + list(MORSE_CODES.keys())[list(MORSE_CODES.values()).index(" "+ch+" ")]
words.append(string)
return " ".join(words)

I propse you this solution:
MORSE_CODES={
'A':'.-','B':'-...','C':'-.-.',
'D':'-..','E':'.','F':'..-.','G':'--.',
'H':'....','I':'..','J':'.---','K':'-.-',
'L':'.-..','M':'--','N':'-.','O':'---',
'P':'.--.','Q':'--.-','R':'.-.',
'S':'...','T':'-','U':'..-','V':'...-',
'W':'.--','X':'-..-','Y':'-.--','Z':'--..'
}
R_MORSE_CODES = {v:k for k,v in MORSE_CODES.items()}
def encode_morse(msg):
words = msg.split()
return " ".join(" ".join(MORSE_CODES.get(c, '*') for c in w) for w in words)
def decode_morse(msg):
words = msg.split(" ")
return " ".join("".join(R_MORSE_CODES.get(c, '?') for c in w.split()) for w in words)
# Original message
msg = "APRIL FOOLS DAY"
enc_msg = encode_morse(msg)
print(enc_msg)
# .- .--. .-. .. .-.. ..-. --- --- .-.. ... -.. .- -.--
dec_msg = decode_morse(enc_msg)
print(dec_msg)
# APRIL FOOLS DAY
Deviating from your solution, I
do not use spaces in the translation table between characters and morse codes.
use one space character to seperate single morse codes and two space to mark word separation
For back translation i reverse the dictionary keys and values to another translation table called R_MORSE_CODES for better readability.
Using one and two spaces is sufficient to allow compatibility to decode a morse code back to its original message, as long as any unknown characters appear.

How would I replace text while preserving the original spacing in Python?

I want to use Python to find-and-replace certain key terms while also preserving the original spacing in a text file.
So, I have a (Fortran) text file:
c mat card 4063
m4063 40000.66c 1.296214e+25 $ 1963.563456 g
1001.80c 2.041536e+25 $ 34.170479 g
mt4063 h/zr.10t zr/h.10t
c
c mat card 4064
m4064 40000.66c 1.292081e+25 $ 1957.303433 g
1001.80c 2.035028e+25 $ 34.061540 g
mt4064 h/zr.10t zr/h.10t
c
c mat card 4065
m4065 40000.66c 1.283016e+25 $ 1943.571491 g
1001.80c 2.020751e+25 $ 33.822573 g
mt4065 h/zr.10t zr/h.10t
c
and here my attempted Python script:
FE_ID = [4064, 4065]
C12_TEMP_DICT = {'10': '40000.66c', '20': '40000.67c', '30': '40000.68c'}
H1_TEMP_DICT = {'10': ' 1001.80c', '20': ' 1001.81c', '30': ' 1001.82c'}
ZRH_TEMP_DICT = {'10': 'zr/h.10t', '20': 'zr/h.20t', '30': 'zr/h.30t'}
HZR_TEMP_DICT = {'10': 'h/zr.10t', '20': 'h/zr.20t', '30': 'h/zr.30t'}
temp = input("Temperature = ")
new_file = open('new_text.i', 'w')
for line in open('matcards.i','r'):
entries = line.split()
if entries[0] != 'c':
if entries[0] in [f'm{f}' for f in FE_ID]:
for entry in entries:
if entry in list(C12_TEMP_DICT.values()):
entry == C12_TEMP_DICT[temp]
elif entry in list(H1_TEMP_DICT.values()):
entry == H1_TEMP_DICT[temp]
line = ' '.join(entries)
elif entries[0] in [f'mt{f}' for f in FE_ID]:
for entry in entries:
if entry in list(ZRH_TEMP_DICT.values()):
entry = ZRH_TEMP_DICT[temp]
if entry in list(HZR_TEMP_DICT.values()):
entry = HZR_TEMP_DICT[temp]
line = ' '.join(entries)
new_file.write(line)
How would I preserve the original file's spacing in my Python script, as if I was manually "find-and-replacing" it?

You can use python's str.ljust function for what you want. It basically pads the string with spaces or other characters you specify.
FE_ID = [4064, 4065]
C12_TEMP_DICT = {'10': '40000.66c', '20': '40000.67c', '30': '40000.68c'}
H1_TEMP_DICT = {'10': ' 1001.80c', '20': ' 1001.81c', '30': ' 1001.82c'}
ZRH_TEMP_DICT = {'10': 'zr/h.10t', '20': 'zr/h.20t', '30': 'zr/h.30t'}
HZR_TEMP_DICT = {'10': 'h/zr.10t', '20': 'h/zr.20t', '30': 'h/zr.30t'}
def adjust_spaces(data: dict):
for k, v in data.items():
modified_k = k.ljust(len(v))
del data[k]
data[modified_k] = v
adjust_spaces(C12_TEMP_DICT)
adjust_spaces(H1_TEMP_DICT)
adjust_spaces(ZRH_TEMP_DICT)
adjust_spaces(HZR_TEMP_DICT)
temp = input("Temperature = ")
new_file = open('new_text.i', 'w')
for line in open('matcards.i','r'):
entries = line.split()
if entries[0] != 'c':
if entries[0] in [f'm{f}' for f in FE_ID]:
for entry in entries:
if entry in list(C12_TEMP_DICT.values()):
entry == C12_TEMP_DICT[temp]
elif entry in list(H1_TEMP_DICT.values()):
entry == H1_TEMP_DICT[temp]
line = ' '.join(entries)
elif entries[0] in [f'mt{f}' for f in FE_ID]:
for entry in entries:
if entry in list(ZRH_TEMP_DICT.values()):
entry = ZRH_TEMP_DICT[temp]
if entry in list(HZR_TEMP_DICT.values()):
entry = HZR_TEMP_DICT[temp]
line = ' '.join(entries)
new_file.write(line)

replace trademark symbol (™) when alone

I'm trying to remove trademark symbol (™) but only in the case it's not followed by any other symbol for instance I might have â€™ which is a bad encoding of quotation mark (') so I don't want to remove trademark symbol (™) and hence broking the pattern that i'm using to replace xx™ with quotation mark.
dict = {};
chars = {
'\xe2\x84\xa2': '', # ™
'\xe2\x80\x99': "'", # â€™
}
def stats_change(char, number):
if dict.has_key(char):
dict[char] = dict[char]+number
else:
dict[char] = number # Add new entry
def replace_chars(match):
char = match.group(0)
stats_change(char,1)
return chars[char]
i, nmatches = re.subn("(\\" + '|\\'.join(chars.keys()) + ")", replace_chars, i)
count_matches += nmatches
Input: foo™ oof
Output: foo oof
Input: oâ€™f oof
Output: o'f oof
Any suggestions ?

How to decrypt in Python?

I am trying to decrypt a file in Python that I encrypted with another program. Some letters are correctly decrypted while others are not. I am not sure what is going on. All I essentially did was reverse the code for the decryption files. I think it has to do with the way it is iterating through the text, but I am not sure how to fix it.
Here is my decryption code:
decryption_library = {'%':'A','9':'a','#':'B','#':'b','1':'C','2':'c','3':'D','4':'d',
'5':'E','6':'e','7':'F','8':'f','0':'G','}':'g','{':'H',']':'h','[':'I',',':'i',
'.':'J','>':'j','<':'K','/':'k','0':'L','\-':'l','\"':'M',':':'m',';':'N',
'+':'n','$':'O','-':'o','$':'Q','%':'q','^':'R','&':'r','*':'S',
'(':'s',')':'T','~':'t','`':'U','5':'u','\\':'V','+':'v','=':'W','7':'w',
'~':'X',')':'x','2':'Y','*':'y',']':'Z','8':'z'}
orig_file = open('ENCRYPTED_Plain_Text_File.txt','r')
file_read = orig_file.read()
orig_file.close()
encrypt_file = open('DECRYPTED_Plain_Text_File.txt','w')
for ch in file_read:
if ch in decryption_library:
encrypt_file.write(decryption_library[ch])
else:
encrypt_file.write(ch)
encrypt_file.close()
encrypt_file = open('ENCRYPTED_Plain_Text_File.txt','r')
file_read = encrypt_file.read()
encrypt_file.close()
codes_items = decryption_library.items()
for ch in file_read:
if not ch in decryption_library.values() or ch == '.' or ch == ',' or ch == '!':
print(ch)
else:
for k,v in codes_items:
if ch == v and ch != '.':
print(k,end='')
Here is the encrypted text:
)]6 ^-94 ;-~ )9/6+
#2 ^$#5^) 7^$*)
)7- &-94( 4,+6&}64 ,+ 9 *6\-\--7 7--4,
%+4 (-&&* [ 2-5\-4 +-~ ~&9+6\- #-~]
%+4 #6 -+6 ~&9+6\-6&, \--+} [ (~--4
%+4 \---/64 4-7+ -+6 9( 89& 9( [ 2-5\-4
)- 7]6&6 ,~ #6+~ ,+ ~]6 5+46&}&-7~];
Here is what it should be:
The Road Not Taken
BY ROBERT FROST
Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;
Here is what it decrypts to:
xZe Road NoX xakev
BY RQBuRx wRQyx
xwo roads diverged iv a yeVoVoow woodi
qvd sorry I YouVod voX XraveVo boXZ
qvd be ove XraveVoeri Voovg I sXood
qvd Voooked dowv ove as zar as I YouVod
xo wZere iX bevX iv XZe uvdergrowXZN

Your decryption_library is not correct. F.e for index ')' you have value 'T' and also 'x'

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python, find all the possible letter combinations in given morse code - python

Related

Converting unicode to gsm encoding in golang

why are the spaces in between the words not showing up

How would I replace text while preserving the original spacing in Python?

replace trademark symbol (™) when alone

How to decrypt in Python?

Categories

Resources