Generate alphanumeric strings sequentially - python

I'm trying to create a loop to generate and print strings as follows:
Alphanumeric characters only:
0-9 are before A-Z, which are before a-z,
Length goes up to 4 characters.
So, it would print:
all strings from 0-z
then from 00-zz
then from 000-zzz
then from 0000-zzzz
then it stops.

from string import digits, ascii_uppercase, ascii_lowercase
from itertools import product
chars = digits + ascii_uppercase + ascii_lowercase
for n in range(1, 4 + 1):
for comb in product(chars, repeat=n):
print ''.join(comb)
This first makes a string of all the numbers, uppercase letters, and lowercase letters.
Then, for each length from 1-4, it prints every possible combination of those numbers and letters.
Keep in mind this is A LOT of combinations -- 62^4 + 62^3 + 62^2 + 62.

I dislike the answer given before me using product since looking at its implementation in the python documentation it seem to span the entire thing into a list in memory before starting to yield the results.
This is very bad for your case since, as agf himself said, the number of permutation here is huge (well over a million). For this case the yield statement was created - so that huge lists could be dynamically generated rather than spanned in memory (I also disliked the wasteful range where xrange is perfectly applicable).
I'd go for a solution like this:
def generate(chars, length, prefix = None):
if length < 1:
return
if not prefix:
prefix = ''
for char in chars:
permutation = prefix + char
if length == 1:
yield permutation
else:
for sub_permutation in generate(chars, length - 1, prefix = permutation):
yield sub_permutation
This way, all that spans in memory is a recursive stack "n" deep, where "n" is the length of your permutations (4 in this case) and only a single element is returned each time.
chars is the set of chars to choose from, length is 4 and the use is rather similar to products, except that it doesn't span the whole list in memory during run time.

I coded this today. It does exactly what you want and more. It's extendable as well
def lastCase (lst):
for i in range(0, len(lst)):
if ( lst[i] != '_' ):
return False
return True
l = [''] * 4 #change size here if needed. I used 4
l[0] = '0'
index = 0
while ( not lastCase(l) ):
if ( ord(l[index]) > ord('_') ):
l[index] = '0'
index += 1
while( l[index] == '_' ):
l[index] = '0'
index += 1
if (l[index] == ''):
l[index] = '0'
#print or process generated string
print(''.join(l))
l[index] = chr(ord(l[index]) +1)
if ( ord(l[index]) > ord('9') and ord(l[index]) < ord('A') ):
l[index] = 'A'
elif ( ord(l[index]) > ord('Z') and ord(l[index]) < ord('_') ):
l[index] = '_'
index = 0
print (''.join(l))

from string import digits, ascii_uppercase, ascii_lowercase
from itertools import product
chars = digits + ascii_uppercase + ascii_lowercase
def give_me_next(lst):
lst = lst[::-1]
change_next = False
change = True
n = 0
for x in lst:
if change_next == True:
change_next = False
pos = chars.find(x)
try:
a = chars[pos+1]
lst = list(lst)
lst[n] = a
lst = "".join(lst)
x = a
except:
lst = list(lst)
lst[n] = '0'
lst = "".join(lst)
change_next = True
x = '0'
pos = chars.find(x)
try:
a = chars[pos+1]
if change == True:
lst = list(lst)
lst[n] = a
lst = "".join(lst)
change = False
except:
lst = list(lst)
lst[n] = '0'
lst = "".join(lst)
change_next = True
n = n + 1
lst = lst[::-1]
return lst
a= give_me_next('zzzzz')
while True:
a = give_me_next(a)
print a

This seems like the simplest solution to me:
from string import digits, ascii_uppercase, ascii_lowercase
chars = digits + ascii_uppercase + ascii_lowercase
all_str = [''.join([a]) for a in chars] \
+ [''.join([a,b]) for a in chars for b in chars] \
+ [''.join([a,b,c]) for a in chars for b in chars for c in chars] \
+ [''.join([a,b,c,d]) for a in chars for b in chars for c in chars for d in chars]
print(all_str)
print("Number of strings:", len(all_str))
Example for strings with maximum 2 characters.
Of course, there may be a way to generalize to any max number of characters per string, but since you have a specific need for strings up to 4 characters, it's fine.

Related

Extracting number from alphanumeric string and adding them

Given string str containing alphanumeric characters. The task is to calculate the sum of all the numbers present in the string.
Example 1:
Input:
str = 1abc23
Output: 24
Explanation: 1 and 23 are numbers in the
a string which is added to get the sum as
24.
Example 2:
Input:
str = geeks4geeks
Output: 4
Explanation: 4 is the only number, so the
the sum is 4.
I broke down the problem into smaller parts, for first I just want to extract the numbers.
s = "a12bc3d"
number = ""
for i in range(0, len(s)):
if s[i].isdigit():
n=0
number = number + s[i]
while s[i].isdigit():
n = n+1
if s[i + n].isdigit():
number = number + s[i+n] + " "
else:
break
i = i + n + 1
else:
continue
print(number)
my output from the above code is 12 23 but it should be 12 3, as the for loop is starting from the initial point making 2 coming twice, I have tried to move the for loop forward by updating i = i + n + 1 but it's not working out like that.
It will be great if someone gives me a direction, any help is really appreciated.
A slightly simpler approach with regex:
import re
numbers_sum = sum(int(match) for match in re.findall(r'(\d+)', s))
Use itertools.groupby to break the string into groups of digits and not-digits; then convert the digit groups to int and sum them:
>>> from itertools import groupby
>>> def sum_numbers(s: str) -> int:
... return sum(int(''.join(g)) for d, g in groupby(s, str.isdigit) if d)
...
>>> sum_numbers("1abc23")
24
>>> sum_numbers("geeks4geeks")
4
you can use regex.
import re
s='a12bc3d'
sections = re.split('(\d+)',s)
numeric_sections = [int(x) for x in sections if x.isdigit()]
sum_ = sum(numeric_sections)
print(sum_)
I appreciate the solutions with regex and group-by. And I got the solution using logic as well.
`s = "4a7312cfh86"
slist = [i for i in s]
nlist = []
for i in range(len(slist)):
if slist[i].isdigit() and (i != (len(slist) - 1)):
if not slist[i + 1].isdigit():
nlist.append(slist[i])
else:
slist[i + 1] = slist[i] + slist[i + 1]
elif slist[i].isdigit() and (i == (len(slist) - 1)):
nlist.append(slist[i])
def addingElement(arr):
if len(arr) == 0:
return 0
return addingElement(arr[1:]) + int(arr[0])
print(addingElement(nlist))
Output - 7402

count characters occurences in string

I want to find out how often does "reindeer" (in any order) come in a random string and what is the left over string after "reindeer" is removed. I need to preserve order of the left over string
So for example
"erindAeer" -> A (reindeer comes 1 time)
"ierndeBeCrerindAeer" -> ( 2 reindeers, left over is BCA)
I thought of sorting and removing "reindeer", but i need to preserve the order . What's a good way to do this?
We can replace those letters after knowing how many times they repeat, and Counter is convenient for counting elements.
from collections import Counter
def leftover(letter_set, string):
lcount, scount = Counter(letter_set), Counter(string)
repeat = min(scount[l] // lcount[l] for l in lcount)
for l in lcount:
string = string.replace(l, "", lcount[l] * repeat)
return f"{repeat} {letter_set}, left over is {string}"
print(leftover("reindeer", "ierndeBeCrerindAeer"))
print(leftover("reindeer", "ierndeBeCrerindAeere"))
print(leftover("reindeer", "ierndeBeCrerindAee"))
Output:
2 reindeer, left over is BCA
2 reindeer, left over is BCAe
1 reindeer, left over is BCerindAee
Here is a rather simple approach using collections.Counter:
from collections import Counter
def purge(pattern, string):
scount, pcount = Counter(string), Counter(pattern)
cnt = min(scount[x] // pcount[x] for x in pcount)
scount.subtract(pattern * cnt)
return cnt, "".join(scount.subtract(c) or c for c in string if scount[c])
>>> purge("reindeer", "ierndeBeCrerindAeer")
(2, 'BCA')
Here is the code in Python:
def find_reindeers(s):
rmap = {}
for x in "reindeer":
if x not in rmap:
rmap[x] = 0
rmap[x] += 1
hmap = {key: 0 for key in "reindeer"}
for x in s:
if x in "reindeer":
hmap[x] += 1
total_occ = min([hmap[x]//rmap[x] for x in "reindeer"])
left_over = ""
print(hmap, rmap)
for x in s:
if (x in "reindeer" and hmap[x] > total_occ * rmap[x]) or (x not in "reindeer"):
left_over += x
return total_occ, left_over
print(find_reindeers("ierndeBeCrerindAeer"))
Output for ierndeBeCrerindAeer:
(2, "BCA")
You can do it by using count and replace string function:
import queue
word = "reindeer"
given_string = "ierndeBeCrerindAeer"
new_string = ""
counter = 0
tmp = ""
letters = queue.Queue()
for i in given_string:
if not i in word:
new_string += i
else:
letters.put(i)
x = 0
while x < len(word):
while not letters.empty():
j = letters.get()
if j == word[x]:
tmp += j
# print(tmp)
break
else:
letters.put(j)
x = x +1
if tmp == word:
counter += 1
tmp = ""
x = 0
print(f"The word {word} occurs {counter} times in the string {given_string}.")
print("The left over word is",new_string)
Output will be:
The word reindeer occurs 2 times in the string ierndeBeCrerindAeer.
The left over word is BCA
It's easy to use queue here so that we don't repeat the elements that are already present or found.
Hope this answers your question, Thank you!

Creating a function that translates number to letter

I have written this function which is supposed to go through a user-provided string like 1-3-5, and output a corresponding series of letters, where A is assigned to 1, B is assigned to 2, C is assigned to 3, etc. So in the case of 1-3-5 the output would be ACE. For 2-3-4, it should print BCD. For ?-3-4 or --3-4 it should still print BCD. Here is the code I have written so far:
def number_to_letter(encoded):
result = ""
start = 0
for char in range(len(encoded)):
if encoded[char] == '-':
i = encoded.index("-")
sub_str = encoded[start:i]
if not sub_str.isdigit():
result += ""
else:
letter = chr(64 + int(sub_str))
if 0 < int(sub_str) < 27:
result += letter
else:
result += ""
start += len(sub_str) + 1
return result
print(num_to_let('4-3-25'))
My output is D, when it should be DCY. I am trying to do this without using any lists or using the split function, just by finding the - character in the sub-string and converting the numbers before it into a letter. What can I do?
You can try doing something like this:
def number_to_letter(encoded):
result = ""
buffer = ""
for ch in encoded:
if ch == '-':
if buffer and 0 < int(buffer) < 27:
result += chr(64 + int(buffer))
buffer = ""
elif ch.isdigit():
buffer += ch
else:
if buffer and 0 < int(buffer) < 27:
result += chr(64 + int(buffer))
return result
print(number_to_letter('1-3-5'))
output:
ACE
Explanation:
we loop for each character and add it to some buffer. when we encounter - (delimiter) we try to parse the buffer and reset it. And we do the same parsing at the end one more time and return the result.
The way the validation works is that, whenever we populate the buffer we check for number validity (using .isdigit()) and when we parse the buffer we check for the range constraints.
import string
alphabet = list(string.ascii_lowercase)
combination = "1-2-3"
def seperate(s, sep='-'):
return [s[:s.index(sep)]] + seperate(s[s.index(sep)+1:]) if sep in s else [s]
combination = seperate(combination)
print("".join([alphabet[int(i)-1] for i in combination]))
the approach of this code is to find the first '-' and then store where it is so next time we can look for the first '-' after the last one
when the comments in my code talk about a cycle means going through the loop (While looping:) once
def number_to_letter(encoded):
letterString = ""
startSubStr = 0
endSubStr = 0
looping = True
while looping:
if endSubStr > (len(encoded)-4):# if we're at the last number we don't look for '-'. we go to the end of the str and end the loop
endSubStr = len(encoded)
looping = False
else:
endSubStr = encoded.index('-', startSubStr) #find the first '-' after the '-' found in the last cycle
number = int(encoded[startSubStr:endSubStr]) #get the number between the '-' found in the last cycle through this loop and the '-' found in this one
if number < 27:
letter = chr(64 + int(number))
letterString += letter
startSubStr = endSubStr + 1 #set the start of the substring to the end so the index function doesn't find the '-' found in this cycle again
return letterString
print(number_to_letter("23-1-1-2")) #>>> WAAB
result:
WAAB
I see you don't want to use split, how about filter? ;)
import itertools
s = '1-2-3'
values = [''.join(e) for e in filter(
lambda l: l != ['-'],
[list(g) for k, g in itertools.groupby(
[*s], lambda s: s.isnumeric()
)
]
)
]
That will essentially do what .split('-') does on s. Also list(s) will behave the same as [*s] if you wanna use that instead.
Now you can just use ord and chr to construct the string you require-
start_pivot = ord('A') - 1
res = ''.join([chr(int(i) + start_pivot) for i in values])
Output
>>> s = '2-3-4'
>>> values = [''.join(e) for e in filter(
...: lambda l: l != ['-'],
...: [list(g) for k, g in itertools.groupby(
...: [*s], lambda s: s.isnumeric()
...: )
...: ]
...: )
...: ]
>>> start_pivot = ord('A') - 1
>>> res = ''.join([chr(int(i) + start_pivot) for i in values])
>>> res
'BCD'
No lists, no dicts. What about RegExp?
import re
def get_letter(n):
if int(n) in range(1,27): return chr(int(n)+64)
def number_to_letter(s):
return re.sub(r'\d+', lambda x: get_letter(x.group()), s).replace('-','')
print(number_to_letter('1-2-26')) # Output: ABZ
No lists, okay. But what about dicts?
def abc(nums):
d = {'-':'','1':'A','2':'B','3':'C','4':'D','5':'E','6':'F','7':'G','8':'H','9':'I','0':'J'}
res = ''
for n in nums: res += d[n]
return res
print(abc('1-2-3-9-0')) # Output: ABCIJ
Here is a corrected version:
def abc(nums):
d = {'-':'','1':'A','2':'B','3':'C','4':'D','5':'E','6':'F','7':'G','8':'H','9':'I','0':'J'}
res = ''
for n in nums:
if n in d:
res += d[n]
return res
print(abc('?-2-3-9-0')) # Output: BCIJ

Python Optimization : Find the most occured sequence of 4 letters inside a 1000 letters string randomly generated

I'm here to ask help about my program.
I realise a program that raison d'ĂȘtre is to find the most occured four letters string on a x letters bigger string which have been generated randomly.
As example, if you would know the most occured sequence of four letters in 'abcdeabcdef' it's pretty easy to understand that is 'abcd' so the program will return this.
Unfortunately, my program works very slow, I mean, It take 119.7 seconds, for analyze all possibilities and display the results for only a 1000 letters string.
This is my program, right now :
import random
chars = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
string = ''
for _ in range(1000):
string += str(chars[random.randint(0, 25)])
print(string)
number = []
for ____ in range(0,26):
print(____)
for ___ in range(0,26):
for __ in range(0, 26):
for _ in range(0, 26):
test = chars[____] + chars[___] + chars[__] + chars[_]
print('trying :',test, end = ' ')
number.append(0)
for i in range(len(string) -3):
if string[i: i+4] == test:
number[len(number) -1] += 1
print('>> finished')
_max = max(number)
for i in range(len(number)-1):
if number[i] == _max :
j, k, l, m = i, 0, 0, 0
while j > 25:
j -= 26
k += 1
while k > 25:
k -= 26
l += 1
while l > 25:
l -= 26
m += 1
Result = chars[m] + chars[l] + chars[k] + chars[j]
print(str(Result),'occured',_max, 'times' )
I think there is ways to optimize it but at my level, I really don't know. Maybe the structure itself is not the best. Hope you'll gonna help me :D
You only need to loop through your list once to count the 4-letter sequences. You are currently looping n*n*n*n. You can use zip to make a four letter sequence that collects the 997 substrings, then use Counter to count them:
from collections import Counter
import random
chars = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
s = "".join([chars[random.randint(0, 25)] for _ in range(1000)])
it = zip(s, s[1:], s[2:], s[3:])
counts = Counter(it)
counts.most_common(1)
Edit:
.most_common(x) returns a list of the x most common strings. counts.most_common(1) returns a single item list with the tuple of letters and number of times it occurred like; [(('a', 'b', 'c', 'd'), 2)]. So to get a string, just index into it and join():
''.join(counts.most_common(1)[0][0])
Even with your current approach of iterating through every possible 4-letter combination, you can speed up a lot by keeping a dictionary instead of a list, and testing whether the sequence occurs at all first before trying to count the occurrences:
counts = {}
for a in chars:
for b in chars:
for c in chars:
for d in chars:
test = a + b + c + d
print('trying :',test, end = ' ')
if test in s: # if it occurs at all
# then record how often it occurs
counts[test] = sum(1 for i in range(len(s)-4)
if test == s[i:i+4])
The multiple loops can be replaced with itertools.permutations, though this improves readability rather than performance:
length = 4
for sequence in itertools.permutations(chars, length):
test = "".join(sequence)
if test in s:
counts[test] = sum(1 for i in range(len(s)-length) if test == s[i:i+length])
You can then display the results like this:
_max = max(counts.values())
for k, v in counts.items():
if v == _max:
print(k, "occurred", _max, "times")
Provided that the string is shorter or around the same length as 26**4 characters, then it is much faster still to iterate through the string rather than through every combination:
length = 4
counts = {}
for i in range(len(s) - length):
sequence = s[i:i+length]
if sequence in counts:
counts[sequence] += 1
else:
counts[sequence] = 1
This is equivalent to the Counter approach already suggested.

Addition of chars adding one character in front

what I'm trying to implement is a function that increments a string by one character, for example:
'AAA' + 1 = 'AAB'
'AAZ' + 1 = 'ABA'
'ZZZ' + 1 = 'AAAA'
I've implemented function for the first two cases, however I can't think of any solution for the third case.
Here's my code :
def new_sku(s):
s = s[::-1]
already_added = False
new_sku = str()
for i in s:
if not already_added:
if (i < 'Z'):
already_added = True
new_sku += chr((ord(i)+1)%65%26 + 65)
else:
new_sku += i
return new_sku[::-1]
Any suggestions ?
If you're dealing with bijective numeration, then you probably have (or should have) functions to convert to/from bijective representation anyway; it'll be a lot easier just to convert to an integer, increment it, then convert back:
def from_bijective(s, digits=string.ascii_uppercase):
return sum(len(digits) ** i * (digits.index(c) + 1)
for i, c in enumerate(reversed(s)))
def to_bijective(n, digits=string.ascii_uppercase):
result = []
while n > 0:
n, mod = divmod(n - 1, len(digits))
result += digits[mod]
return ''.join(reversed(result))
def new_sku(s):
return to_bijective(from_bijective(s) + 1)
How about ?
def new_sku(s):
s = s[::-1]
already_added = False
new_sku = str()
for i in s:
if not already_added:
if (i < 'Z'):
already_added = True
new_sku += chr((ord(i)+1)%65%26 + 65)
else:
new_sku += i
if not already_added: # carry still left?
new_sku += 'A'
return new_sku[::-1]
Sample run :-
$ python sku.py Z
AA
$ python sku.py ZZZ
AAAA
$ python sku.py AAA
AAB
$ python sku.py AAZ
ABA
You have to think of 'AAA', 'ZZZ', ... as representation of the value you manipulate.
First, parse the value:
val = sum(pow(26, i) * (ord(v) - ord('A') + 1) for i, v in enumerate(value[::-1]))
Then, add value to it:
val = val + 1
Edit
The final value is given by:
res = ""
while val > 0:
val, n = divmod(val - 1, 26)
res = chr(n+ord('A')) + res
The lack of representation for zero requires the value passed to divmod to be decremented at each turn, which i have not found a way of doing with a list comprehension.
Edit
Rather than ord() and chr(), it is possible to use string.ascii_uppercase.index() and string.ascii_uppercase[]
You can make use of some recursion here:
def new_sku(s):
s = s[::-1]
new_s = ''
return expand(s.upper(), new_s)[::-1]
import string
chars = string.ascii_uppercase
def expand(s, new_s, carry_forward=True):
if not s:
new_s += 'A' if carry_forward else ''
return new_s
new_s += chars[(ord(s[0]) - ord('A') + carry_forward) % 26]
# Slice the first character, and expand rest of the string
if s[0] == 'Z':
return expand(s[1:], new_s, carry_forward)
else:
return expand(s[1:], new_s, False)
print new_sku('AAB')
print new_sku('AAZ')
print new_sku('ZZZ')
print new_sku('aab')
print new_sku('aaz')
print new_sku('zzz')
Output:
AAC
ABA
AAAA
AAC
ABA
AAAA
I would implement this like a base-26 addition with carry.
So start from the right of the string, add 1. If it reaches Z, wrap to A and bump the next left most character up one. If the left most character reaches Z, add an A to the left of the string.
s = ["Z","Z","Z"]
done = 0
index = len(s) - 1
while done == 0:
if s[index] < "Z":
s[index] = chr(ord(s[index]) + 1)
done = 1
else:
s[index] = "A"
if index == 0:
s = ["A"] + s
done = 1
else:
index = index - 1
print s
Just check if the string is all Zs, and if it is, replace it by a string with length len(s) + 1, consisting of just As:
if s == "Z" * len(s):
return "A" * (len(s) + 1)
alp='ABCDEFGHIJKLMNOPQRSTUVWXYZA'
def rec(s):
if len(s)==0:return 'A'
last_letter=s[-1]
if last_letter=='Z':return rec(s[:-1])+'A'
return s[:-1]+alp[(alp.find(last_letter)+1)]
result
>>> rec('AAA')
'AAB'
>>> rec('AAZ')
'ABA'
>>> rec('ZZZ')
'AAAA'
>>> rec('AZA')
'AZB'
How about this? As a simple way to handle the string getting longer you can prepend a leading '#' and strip it if it wasn't incremented:
>>> def new_sku(s):
def increment(s):
if s.endswith('Z'):
return increment(s[:-1])+'A'
else:
return s[:-1]+chr(ord(s[-1])+1)
t = increment('#'+s)
return t.lstrip('#')
>>> new_sku('AAA')
'AAB'
>>> new_sku('AAZ')
'ABA'
>>> new_sku('ZZZ')
'AAAA'
If the recursion worries you then you can flatten it the way you already did but still use the '#' character added and stripped.
You can use a for-else loop:
from string import ascii_uppercase as au
def solve(strs):
lis = []
for i, c in enumerate(strs[::-1], 1):
ind = au.index(c) + 2
lis.append(au[(ind%26)-1])
if ind <= 26:
break
else:
# This will execute only if the for-loop didn't break.
lis.append('A')
return strs[:-1*i] + "".join(lis[::-1])
print solve('AAA')
print solve('AAZ')
print solve('ZZZ')
print solve('AZZZ')
print solve('ZYZZ')
print solve('ZYYZZ')
output:
AAB
ABA
AAAA
BAAA
ZZAA
ZYZAA
We can see there are 3 conditions totally, you can iterate the string and process one of the conditions.
You can use the string.ascii_uppercase instead of chr and ord
import string
def add(s):
s = list(s)[::-1]
for index, char in enumerate(s):
if char != "Z":
s[index] = string.ascii_uppercase[string.ascii_uppercase.index(char) + 1]
return s[::-1]
elif char == "Z" and (index != len(s) - 1):
s[index] = "A"
elif char == "Z" and (index == len(s) - 1):
s[index] = "A"
return ["A"] + s[::-1]

Categories