All strings with list of characters? - python

I'm making a specialized utility similar to John the Ripper, and I'd like to use a loop that returns all strings up to x characters that can be formed from the string. For example, if the "seed" string is abcd, it should return:
a
b
c
d
aa
ab
ac
and so on. If the character limit is 10, it would generate aaaaaaaaaa, abcddcbaaa, and so on. Is there a simple for loop to do this, or is it more complicated than that?

I'll self-plagiarize from this answer and add a maximum length:
from itertools import product
def multiletters(seq, max_length):
for n in range(1, max_length+1):
for s in product(seq, repeat=n):
yield ''.join(s)
which gives
>>> list(multiletters("abc", 2))
['a', 'b', 'c', 'aa', 'ab', 'ac', 'ba', 'bb', 'bc', 'ca', 'cb', 'cc']
>>> list(multiletters("abcd", 4))[:8]
['a', 'b', 'c', 'd', 'aa', 'ab', 'ac', 'ad']
and so on.

def all_strings(alphabet, length_limit=None):
n_letters = len(alphabet)
length = 0
n_strings = 1
buf = []
while True:
for i in xrange(0, n_strings):
k = i
for j in xrange(length - 1, -1, -1):
buf[j] = alphabet[k % n_letters]
k /= n_letters
yield ''.join(buf)
length += 1
if length == length_limit:
break
n_strings *= n_letters
buf.append(alphabet[0])
for s in all_strings('abcd', length_limit=4):
print s

As pointed out in the comment's use itertools.premutations or even better take a look #DSM's answer, as this one misses the doubles:
In [194]: from itertools import chain, permutations
In [195]: s = 'abcd'
In [196]: map(''.join,chain.from_iterable(permutations(s,x)
for x in range(1,len(s)+1)))
Out[196]:
['a',
'b',
'c',
'd',
'ab',
'ac',
'ad',
'ba',
'bc',
'bd',
...
'dbca',
'dcab',
'dcba']
Anyway, here's a version of #DSM's answer that returns a list:
from itertools import product
def ms(seq, max_length):
return [''.join(s) for n in range(1, max_length+1)
for s in product(seq,repeat=n)]

Use itertools.permuataions.
for i in range(2,4):
tuples = itertools.permutations('abca' , i)
print( list(tuples))
The example code sequence generates:
[('a', 'b'), ('a', 'c'), ('a', 'a'), ('b', 'a'), ('b', 'c'), ('b', 'a'), ('c', 'a'), ('c', 'b'), ('c', 'a'), ('a', 'a'), ('a', 'b'), ('a', 'c')]
[('a', 'b', 'c'), ('a', 'b', 'a'), ('a', 'c', 'b'), ('a', 'c', 'a'), ('a', 'a', 'b'), ('a', 'a', 'c'), ('b', 'a', 'c'), ('b', 'a', 'a'), ('b', 'c', 'a'), ('b', 'c', 'a'), ('b', 'a', 'a'), ('b', 'a', 'c'), ('c', 'a', 'b'), ('c', 'a', 'a'), ('c', 'b', 'a'), ('c', 'b', 'a'), ('c', 'a', 'a'), ('c', 'a', 'b'), ('a', 'a', 'b'), ('a', 'a', 'c'), ('a', 'b', 'a'), ('a', 'b', 'c'), ('a', 'c', 'a'), ('a', 'c', 'b')]

Related

Cartesian Product with optional lists

I am creating a program, in python, that allows me to generate NFT Art based on the given assets.
Obviously the number of arts that can be generated varies according to the assets (layers and layer images) and this is precisely the problem, how can I calculate the possible combinations also counting the optional layers?
To be clearer:
for example I have 4 layers:
l1 = ["A","B"]
l2 = ["C"]
l3 = ["D","E"] #optional
l4 = ["F","G"] #optional
where l3 and l4 are optional. So the combinations I expect are:
1. ["A","C"]
2. ["B","C"]
3. ["A","C","D"]
4. ["A","C","E"]
5. ["B","C","D"]
6. ["B","C","E"]
7. ["A","C","F"]
8. ["A","C","G"]
9. ["B","C","F"]
10. ["B","C","G"]
11. ["A","C","D","F"]
12. ["A","C","D","G"]
13. ["A","C","E","F"]
14. ["A","C","E","G"]
15. ["B","C","D","F"]
16. ["B","C","D","G"]
17. ["B","C","E","F"]
18. ["B","C","E","G"]
How can I get to that? I tried with itertools.product but obviusly it take into account all lists
One way to do this is with the powerset recipe from the itertools docs. Chaining together the products of 'required lists' with every subset of the 'optional-list-set' gives a generator that produces each possibility once:
def powerset(iterable):
"""powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"""
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
def product_with_optional(required_sequences, optional_sequences):
return chain.from_iterable(product(*required_sequences, *optionals)
for optionals in powerset(optional_sequences))
optional_combinations = product_with_optional(required_sequences=[l1, l2],
optional_sequences=[l3, l4])
which gives:
1 ('A', 'C')
2 ('B', 'C')
3 ('A', 'C', 'D')
4 ('A', 'C', 'E')
5 ('B', 'C', 'D')
6 ('B', 'C', 'E')
7 ('A', 'C', 'F')
8 ('A', 'C', 'G')
9 ('B', 'C', 'F')
10 ('B', 'C', 'G')
11 ('A', 'C', 'D', 'F')
12 ('A', 'C', 'D', 'G')
13 ('A', 'C', 'E', 'F')
14 ('A', 'C', 'E', 'G')
15 ('B', 'C', 'D', 'F')
16 ('B', 'C', 'D', 'G')
17 ('B', 'C', 'E', 'F')
18 ('B', 'C', 'E', 'G')
I'm assuming the ordering of the optional layers matter, so you can just iteratively create all combinations of the optional layers, then use itertools.product on the layers + optional_layers to generate the lists.
import itertools
from pprint import pprint
l1 = ["A","B"]
l2 = ["C"]
l3 = ["D","E"] #optional
l4 = ["F","G"] #optional
layers = [l1, l2]
optional_layers = [l3, l4]
results = []
results += itertools.product(*layers)
for i in range(len(optional_layers) + 1):
comb = itertools.combinations(optional_layers, r=i)
for c in comb:
results += itertools.product(*layers, *c)
pprint(results)
Output
[('A', 'C'),
('B', 'C'),
('A', 'C'),
('B', 'C'),
('A', 'C', 'D'),
('A', 'C', 'E'),
('B', 'C', 'D'),
('B', 'C', 'E'),
('A', 'C', 'F'),
('A', 'C', 'G'),
('B', 'C', 'F'),
('B', 'C', 'G'),
('A', 'C', 'D', 'F'),
('A', 'C', 'D', 'G'),
('A', 'C', 'E', 'F'),
('A', 'C', 'E', 'G'),
('B', 'C', 'D', 'F'),
('B', 'C', 'D', 'G'),
('B', 'C', 'E', 'F'),
('B', 'C', 'E', 'G')]

How can I rearrange a set of values into new pattern on python and print results

so I will do my best to explain what I'm looking for,
at the moment I have a 100 item list that I want to repetitively shuffle using a set pattern to first check if the pattern will eventually bring me back to where I began
and 2 to print the result of each loop to a text file.
so using a 3 item list as my example
[a,b,c]
and the shuffle pattern [3 1 2]
where the 3rd item becomes the first.
the first item becomes the second
and the second item becomes the 3rd
on a loop would generate the following patterns
[a,b,c]
[3,1,2]
[c,a,b]
[b,c,a]
[a,b,c]
but I have a list at the moment of 100 items that I need to find every single arrangement for a few different patterns I would like to test out.
does anyone know of a way to do this in python please.
You can define function and call this function multi times like below:
>>> def func(lst, ptr):
... return [lst[idx-1] for idx in ptr]
>>> lst = ['a','b','c']
>>> ptr = [3,1,2]
>>> for _ in range(5):
... lst = func(lst, ptr)
... print(lst)
['c', 'a', 'b']
['b', 'c', 'a']
['a', 'b', 'c']
['c', 'a', 'b']
['b', 'c', 'a']
You could use numpy advanced integer indexing if your list contains a numeric type:
import numpy as np
original_list=[1,2,3]
numpy_array = np.array(original_list)
pattern = [2,1,0]
print(numpy_array[pattern])
>>> array([3, 2, 1])
def rearrange(pattern : list,L:list):
new_list = []
for i in pattern :
new_list.append(L[i-1])
return new_list
print(rearrange([3,1,2],['a','b','c']))
output :
['c', 'a', 'b']
Itertools could be what you need.
import itertools
p = itertools.permutations(['a','b','c', 'd'])
list(p)
Output:
[('a', 'b', 'c', 'd'),
('a', 'b', 'd', 'c'),
('a', 'c', 'b', 'd'),
('a', 'c', 'd', 'b'),
('a', 'd', 'b', 'c'),
('a', 'd', 'c', 'b'),
('b', 'a', 'c', 'd'),
('b', 'a', 'd', 'c'),
('b', 'c', 'a', 'd'),
('b', 'c', 'd', 'a'),
('b', 'd', 'a', 'c'),
('b', 'd', 'c', 'a'),
('c', 'a', 'b', 'd'),
('c', 'a', 'd', 'b'),
('c', 'b', 'a', 'd'),
('c', 'b', 'd', 'a'),
('c', 'd', 'a', 'b'),
('c', 'd', 'b', 'a'),
('d', 'a', 'b', 'c'),
('d', 'a', 'c', 'b'),
('d', 'b', 'a', 'c'),
('d', 'b', 'c', 'a'),
('d', 'c', 'a', 'b'),
('d', 'c', 'b', 'a')]
​

How to simplify this code about iteration combination in python

So i have this code below that i created. This code will list every possible combination of a certain value, in this example is "a" "b" "c" "d". If i use this code, the result would be like this: a, aa, ab, ac, ad, aaa, aab, aac, aad, aba, abb, abc, etc. How to simplify this for loop code, so that i can input more values without createing more for loop?
n = "abcd"
for c in n:
print(c)
for c1 in n:
print(c+c1)
for c2 in n:
print(c+c1+c2)
for c3 in n:
print(c+c1+c2+c3)
https://docs.python.org/2/library/itertools.html
Itertools permutations and combinations is what you're after.
itertools.permutations([1, 2, 3])
from itertools import combinations
n = "abcd"
print ([''.join(l) for i in range(len(n)) for l in combinations(n, i+1)])
output:
['a', 'b', 'c', 'd', 'ab', 'ac', 'ad', 'bc', 'bd', 'cd', 'abc', 'abd', 'acd', 'bcd', 'abcd']
edit:
from itertools import combinations_with_replacement
n = "abcd"
comb = []
for i in range(1, len(n)+1):
comb += list(combinations_with_replacement(n, i))
print([''.join(c) for c in comb])
output:
['a', 'b', 'c', 'd', 'aa', 'ab', 'ac', 'ad', 'bb', 'bc', 'bd', 'cc', 'cd', 'dd', 'aaa', 'aab', 'aac', 'aad', 'abb', 'abc', 'abd', 'acc', 'acd', 'add', 'bbb', 'bbc', 'bbd', 'bcc', 'bcd', 'bdd', 'ccc', 'ccd', 'cdd', 'ddd', 'aaaa', 'aaab', 'aaac', 'aaad', 'aabb', 'aabc', 'aabd', 'aacc', 'aacd', 'aadd', 'abbb', 'abbc', 'abbd', 'abcc', 'abcd', 'abdd', 'accc', 'accd', 'acdd', 'addd', 'bbbb', 'bbbc', 'bbbd', 'bbcc', 'bbcd', 'bbdd', 'bccc', 'bccd', 'bcdd', 'bddd', 'cccc', 'cccd', 'ccdd', 'cddd', 'dddd']
To get the same result of 340 elements you can use itertools.product:
product(n, repeat=1)
product(n, repeat=2)
...
product(n, repeat=4)
To print the result you can use the following loop:
from itertools import product
n = "abcd"
for i in range(1, 5):
for prod in product(n, repeat=i):
print(''.join(prod))
To get an extra level you can easily increase the 5 in range. Note: the order of printing is slightly different than in your code.
You can use itertools module to solve your problem. You need combinations_with_replacement function with all possible lengths:
import itertools as it
n = "abcd"
result = []
for l in range(len(n)):
result += list(it.combinations_with_replacement(n, l+1))
print(result)
[('a',), ('b',), ('c',), ('d',), ('a', 'a'), ('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'), ('b', 'c'), ('b', 'd'), ('c', 'c'), ('c', 'd'), ('d', 'd'), ('a', 'a', 'a'), ('a', 'a', 'b'), ('a', 'a', 'c'), ('a', 'a', 'd'), ('a', 'b', 'b'), ('a', 'b', 'c'), ('a', 'b', 'd'), ('a', 'c', 'c'), ('a', 'c', 'd'), ('a', 'd', 'd'), ('b', 'b', 'b'), ('b', 'b', 'c'), ('b', 'b', 'd'), ('b', 'c', 'c'), ('b', 'c', 'd'), ('b', 'd', 'd'), ('c', 'c', 'c'), ('c', 'c', 'd'), ('c', 'd', 'd'), ('d', 'd', 'd')]

Set of HUGE permutation object (in Python or R)

Aim: I'd like to get (or be able to work with) a set of all possible permutations obtained from a list of strings.
Example in Python:
import pandas as pd
import itertools
list1 = ['A', 'A', 'B', 'B']
# Get all permutations
list1_perm = list(itertools.permutations(list1))
len(list1_perm)
24
list1_perm
[('A', 'A', 'B', 'B'),
('A', 'A', 'B', 'B'),
('A', 'B', 'A', 'B'),
('A', 'B', 'B', 'A'),
('A', 'B', 'A', 'B'),
('A', 'B', 'B', 'A'),
('A', 'A', 'B', 'B'),
('A', 'A', 'B', 'B'),
('A', 'B', 'A', 'B'),
('A', 'B', 'B', 'A'),
('A', 'B', 'A', 'B'),
('A', 'B', 'B', 'A'),
('B', 'A', 'A', 'B'),
('B', 'A', 'B', 'A'),
('B', 'A', 'A', 'B'),
('B', 'A', 'B', 'A'),
('B', 'B', 'A', 'A'),
('B', 'B', 'A', 'A'),
('B', 'A', 'A', 'B'),
('B', 'A', 'B', 'A'),
('B', 'A', 'A', 'B'),
('B', 'A', 'B', 'A'),
('B', 'B', 'A', 'A'),
('B', 'B', 'A', 'A')]
Since for my analysis ('A', 'A', 'B', 'B') is the same as ('A', 'A', 'B', 'B'), (although the 'A' may have changed the position), I do:
# Get set of permutations
set1_perm = set(itertools.permutations(list1))
len(set1_perm)
6
set1_perm
{('A', 'A', 'B', 'B'),
('A', 'B', 'A', 'B'),
('A', 'B', 'B', 'A'),
('B', 'A', 'A', 'B'),
('B', 'A', 'B', 'A'),
('B', 'B', 'A', 'A')}
Now this is great, but the list I want to work with has 481 strings, with 5 unique strings with different frequencies:
len(real_list)
481
len(set(real_list))
5
# Count number of times each unique value appears
pd.Series(real_list).value_counts()
A 141
B 116
C 80
D 78
E 66
This is not a problem for itertools.permutations(real_list), but when I want to get the set, it takes ages. This is because the number of permutations is 9.044272819E+1082.
What I want to do is:
First I want to know the number of unique elements in that permutation space, i.e. the length of the set. To get the number of unique elements it might be possible to do it analytically, however since the frequency of each unique element is different I don't how to do that.
Second I would like to be able to get a sample of those unique elements in the set of permutations.
I'd appreciate any help provided.
Best,
Alejandro
Calculating the number of unique permutations is simply a matter of applying a formula - we know that were we to have n distinct elements, we would have n! permutations. Then to account for repeated permutations we must divide by each count of permutations of repeated letters. This is a multinomial coefficient.
So a simple implementation to generate the unique count may look something like
from math import factorial
from functools import reduce
from collections import Counter
def perm_cnt(l):
denom = reduce(lambda x,y: x*factorial(y), Counter(l).values())
return factorial(len(l)) // denom
Then sampling from the unique permutations is probably most simply achieved by just ensuring your sample values remain unique, as opposed to trying to generate all of the unique values and then sampling. There is a recipe in the itertools module, random_permutation, which could be useful for this.
def random_permutation(iterable, r=None):
"Random selection from itertools.permutations(iterable, r)"
pool = tuple(iterable)
r = len(pool) if r is None else r
return tuple(random.sample(pool, r))
So creating a unique sample might look something like
def uniq_sample(l, size):
s = set()
perm_size = perm_cnt(l)
cnt = 0
while cnt < min(perm_size, size):
samp = random_permutation(l)
if samp not in s:
s.add(samp)
cnt += 1
return s
Demo
>>> perm_cnt(list1)
6
>>> perm_cnt(['a']*3 + ['b']*5 + ['d']*2)
2520
>>> perm_cnt(np.random.randint(10, size=20))
105594705216000
>>> uniq_sample(list1, 4)
{('A', 'A', 'B', 'B'),
('B', 'A', 'A', 'B'),
('B', 'A', 'B', 'A'),
('B', 'B', 'A', 'A')}

Transforming nested list in Python

Assuming I have a structure like this:
a = [
('A',
['D',
'E',
'F',
'G']),
('B',
['H']),
('C',
['I'])
]
How can I transform it into:
a = [
('A', 'D'),
('A', 'E'),
('A', 'F'),
('A', 'G'),
('B', 'H'),
('C', 'I'),
]
Thanks for your time!
Try:
>>> a = [('A', ['D', 'E', 'F', 'G']), ('B', ['H']), ('C', ['I'])]
>>> [(k,j) for k, more in a for j in more]
[('A', 'D'), ('A', 'E'), ('A', 'F'), ('A', 'G'), ('B', 'H'), ('C', 'I')]
This handles only one level of nesting of course.
Here's a simple solution:
data = [
('A',
['D',
'E',
'F',
'G']),
('B',
['H']),
('C',
['I'])
]
result = []
for x in data:
for y in x[1]:
result.append((x[0], y))
(Side comment) Why on earth did you indent like that? Isn't the following more readable?
a = [
('A', ['D', 'E', 'F', 'G']),
('B', ['H']),
('C', ['I'])
]

Categories