Wordlist Generator. Split file Size. How?

Wordlist Generator. Split file Size. How? - python

I am trying to get this python script to create a new file and continue generating word combinations once a certain file size is reached.
f=open('wordlist', 'w')
def xselections(items, n):
if n==0: yield []
else:
for i in xrange(len(items)):
for ss in xselections(items, n-1):
yield [items[i]]+ss
# Numbers = 48 - 57
# Capital = 65 - 90
# Lower = 97 - 122
numb = range(48,58)
cap = range(65,91)
low = range(97,123)
choice = 0
while int(choice) not in range(1,8):
choice = raw_input('''
1) Numbers
2) Capital Letters
3) Lowercase Letters
4) Numbers + Capital Letters
5) Numbers + Lowercase Letters
6) Numbers + Capital Letters + Lowercase Letters
7) Capital Letters + Lowercase Letters
: ''')
choice = int(choice)
poss = []
if choice == 1:
poss += numb
elif choice == 2:
poss += cap
elif choice == 3:
poss += low
elif choice == 4:
poss += numb
poss += cap
elif choice == 5:
poss += numb
poss += low
elif choice == 6:
poss += numb
poss += cap
poss += low
elif choice == 7:
poss += cap
poss += low
bigList = []
for i in poss:
bigList.append(str(chr(i)))
MIN = raw_input("What is the min size of the word? ")
MIN = int(MIN)
MAX = raw_input("What is the max size of the word? ")
MAX = int(MAX)
for i in range(MIN,MAX+1):
for s in xselections(bigList,i): f.write(''.join(s) + '\n')

You can encapsulate the file rotation behavior in a class. When you are writing some data, the write method will first check if the write would exceed the file size limit; then it calls the rotate method which closes the current file and opens a new one, incrementing the sequence number on the filename:
import os
class LimitWriter(object):
def __init__(self, basepath, bytelimit):
self._basepath = basepath
self._bytelimit = bytelimit
self._sequence = 0
self._output = None
self._bytes = 0
self._rotate()
def _rotate(self):
if self._output:
self._output.close()
path = '%s.%06d' % (self._basepath, self._sequence)
self._output = open(path, 'wb')
self._bytes = 0
self._sequence += 1
def write(self, data):
size = len(data)
if (self._bytes + size) > self._bytelimit:
self._rotate()
self._bytes += size
self._output.write(data)
out = LimitWriter('wordlist', 1024 * 1024 * 1)
for i in range(MIN,MAX+1):
for s in xselections(bigList,i):
out.write(''.join(s) + '\n')
Would output a series of files which are smaller than 1MB:
1.0M wordlist.000000
1.0M wordlist.000001
252K wordlist.000002
Update - A few more tips on using some of the built-in power of Python to help make your code a bit shorter and easier to follow. I've included comments explaining each part.
Here are the docs on the modules I use below: itertools, string.
import itertools
import os
from string import digits, lowercase, uppercase
# PUT LimitWriter CLASS DEFINITION HERE
LIMIT = 1024 * 1024 * 1
choice = 0
while int(choice) not in range(1,8):
choice = raw_input('''
1) Numbers
2) Capital Letters
3) Lowercase Letters
4) Numbers + Capital Letters
5) Numbers + Lowercase Letters
6) Numbers + Capital Letters + Lowercase Letters
7) Capital Letters + Lowercase Letters
: ''')
MIN = int(raw_input("What is the min size of the word? "))
MAX = int(raw_input("What is the max size of the word? "))
# replace your ranges and large if/else with this
choices = {
1: digits,
2: uppercase,
3: lowercase,
4: uppercase + lowercase,
5: digits + lowercase,
6: digits + uppercase + lowercase,
7: uppercase + lowercase
}
# pick one of the sets with the user's choice
chars = choices[int(choice)]
out = LimitWriter('wordlist', LIMIT)
# generate all permutations of the characters from min to max
for length in range(MIN, MAX+1):
for tmp in itertools.permutations(chars, length):
out.write(''.join(tmp) + '\n')

Here's the final working code. Change the variable mbXY inside function generate_wordlist to establish the size cap of each file provided it gets bigger than this size. This file has been updated to run under Python 3.2
import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation
if os.name == 'nt':
def clear_console():
subprocess.call("cls", shell=True)
return
else:
def clear_console():
subprocess.call("clear", shell=True)
return
def generate_phone_numbers(area_code):
f = open('phones.txt', 'w')
for i in range(2010000, 9999999):
f.write(area_code + str(i) + '\n')
def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
mb1 = 1024000
mb10 = 1024000 * 10
mb100 = 1024000 * 100
mb250 = 1024000 * 250
mb500 = 1024000 * 500
gb1 = 1024000 * 1000
file_size_limit = mb10
out = file_writer(lst_name, file_size_limit)
for curr_length in range(min_digit, max_digit + 1):
for curr_digit in itertools.product(lst_chars, repeat=curr_length):
out.write(''.join(curr_digit) + '\n')
class file_writer(object):
def __init__(self, basepath, bytelimit):
self._basepath = basepath
self._bytelimit = bytelimit
self._sequence = 0
self._output = None
self._bytes = 0
self._rotate()
def _rotate(self):
if self._output:
self._output.close()
path = '%s.%06d' % (self._basepath, self._sequence)
self._output = open(path, 'wb')
self._bytes = 0
self._sequence += 1
def write(self, data):
size = len(data)
if (self._bytes + size) > self._bytelimit:
self._rotate()
self._bytes += size
self._output.write(bytes(data, "utf-8"))
choice = 0
while int(choice) not in range(1,6):
clear_console()
print ('')
print (' wgen - Menu')
choice = input('''
1. Phone numbers.
2. Numbers.
3. Numbers + Lowercase.
4. Numbers + Lowercase + Uppercase.
5. Numbers + Lowercase + Uppercase + Punctuation.
Enter Option: ''')
print ('')
choice = int(choice)
if choice == 1:
area_code = input('''
Please enter Area Code: ''')
area_code = str(area_code)
area_code = area_code.strip()
if len(area_code) == 3:
print ('')
print (' Generating phone numbers for area code ' + area_code + '.')
print (' Please wait...')
generate_phone_numbers(area_code)
if choice == 2:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits
lst_name = 'num'
print ('')
print (' Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 3:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase
lst_name = 'num_low'
print ('')
print (' Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 4:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase + ascii_uppercase
lst_name = 'num_low_upp'
print ('')
print (' Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 5:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase + ascii_uppercase + punctuation
lst_name = 'num_low_upp_pun'
print ('')
print (' Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

Related

Swap upper and lower case with its ASCII value

Im trying to swap letter type with its ASCII value however I am only getting the last word of the string as an output. it also will not accept any string with number values
def get_sentence():
sentence = input("Please input the sentence:")
words = sentence.split(' ')
sentence = ' '.join(reversed(words))
return sentence
ans = ''
def main():
sentence = get_sentence()
ans =''
for s in sentence:
if ord(s) >= 97 and ord(s) <= 122:
ans = ans + chr(ord(s) - 32)
elif ord(s) >= 65 and ord(s) <= 90 :
ans = ans + chr(ord(s) + 32)
else :
ans += ' '
print(ans)
if __name__ == "__main__":
main()

I am not sure if this is the result you want (adding expected output would be helpful next time) but removing the print statement outside the for loop seems to fix it for me.
def get_sentence():
sentence = input("Please input the sentence:")
words = sentence.split(' ')
sentence = ' '.join(reversed(words))
return sentence
ans = ''
def main():
sentence = get_sentence()
ans =''
for s in sentence:
if ord(s) >= 97 and ord(s) <= 122:
ans = ans + chr(ord(s) - 32)
elif ord(s) >= 65 and ord(s) <= 90 :
ans = ans + chr(ord(s) + 32)
else :
ans += ' '
print(ans) # this should be outside!
if __name__ == "__main__":
main()

There's a simpler way to do this, using built-in methods isupper() and islower(). Then you don't need to handle sentences (or punctuation) separately.
def swap_case(sentence: str) -> str:
letters = (
letter.upper() if letter.islower() else letter.lower()
for letter in sentence
)
return "".join(letters)
print(swap_case(get_sentence()))
Notice my function also returns the result rather than printing it. And it takes input of the sentence, so you can use it in other cases, which makes it more reusable. Not sure why you want the words of the sentence reversed... but ¯\_(ツ)_/¯

reversing this encoding algorithm in Python

I need help reversing this encoding algorithm into a decoding algorithm. I understand the swap function but I'm having trouble with rest of the code.
from string import ascii_letters, digits
def shift(text, shift: int = 0):
SPACE = ' '
letters = ascii_letters + digits + SPACE
letters_length = len(letters)
shifted_chars = []
for char in text:
if char in letters:
shifted_chars.append(letters[(letters.index(char) + shift) % letters_length])
else:
shifted_chars.append(char)
return ''.join(shifted_chars)
def swap(text):
middle = len(text) // 2
return text[middle:] + text[:middle]
def encode(text):
if len(text) < 2:
return shift(text, 7)
for i in range(31):
text = swap(shift(text, i))
return text

Fixed owing to Parisa.H.R's useful comment(s).
The deswap(text) function equalizes asymmetric result of swap(text) in case of odd len(text).
from string import ascii_letters, digits
def shift(text, shift: int = 0):
SPACE = ' '
letters = ascii_letters + digits + SPACE
letters_length = len(letters)
shifted_chars = []
for char in text:
if char in letters:
shifted_chars.append(letters[(letters.index(char) + shift) % letters_length])
else:
shifted_chars.append(char)
return ''.join(shifted_chars)
def swap(text):
middle = len(text) // 2
return text[middle:] + text[:middle]
def deswap(text):
middle = (len(text) + (len(text) % 2)) // 2
return text[middle:] + text[:middle]
def encode(text):
if len(text) < 2:
return shift(text, 7)
for i in range(31):
text = swap(shift(text, i))
return text
def decode(text):
if len(text) < 2:
return shift(text, -7)
for i in range(30, -1, -1):
text = deswap(shift(text, -i))
# text = shift(deswap(text), -i)
return text
Output:
decode(encode('opening'))
# 'opening'
decode(encode('openin'))
# 'openin'
decode(encode('Quora'))
# 'Quora'
decode(encode('Quoran'))
# 'Quoran'
decode(encode(ascii_letters + digits)) == ascii_letters + digits
# True
decode(encode(ascii_letters + digits + ' ')) == ascii_letters + digits + ' '
# True

Cant get my code to generate enough fields for bingo

I need my program to generate as many playing fields as the entered number of players playing the game. Currently it only generates 1 field. Cant wrap my head around this one.
s = int(input("Sisesta mängijate arv: "))
b = int(input("Sisesta väjaku suurus: "))
b = b + 1
bb = b
c = int(input("Sisesta korrutustabeli suurus: "))
for s in range (1 , s+1):
numberolemas = ['0']
t.write(str(s) + ". väljak \n\n")
print(str(s) + ". väljak \n")
for bb in range (1 , bb):
for b in range (1 , b):
import random
number = random.randint(1, c) * random.randint(1, c)
olemas = True
while olemas:
number = random.randint(1, c) * random.randint(1, c)
if number not in numberolemas:
olemas = False
t.write(str(number))
print(str(number), end = ' ')
if number <= 9:
t.write(" ")
print(" ", end = '')
elif number >= 100:
t.write(" ")
print(" ", end = '')
else: t.write(" ") and print(" ", end = '')
numberolemas.append(number)
b = b + 1
t.write("\n\n")
print(" ")
bb = bb + 1
print(" ")
t.close() ```

New Hangman Python

I am working on a Hangman game, but I am having trouble replacing the dashes with the guessed letter. The new string just adds on new dashes instead of replacing the dashes with the guessed letter.
I would really appreciate it if anyone could help.
import random
import math
import os
game = 0
points = 4
original = ["++12345","+*2222","*+33333","**444"]
plusortimes = ["+","*"]
numbers = ["1","2","3"]
#FUNCTIONS
def firstPart():
print "Welcome to the Numeric-Hangman game!"
def example():
result = ""
ori = random.choice(original)
for i in range(2,len(ori)):
if i % 2 == 0:
result = result + ori[i] + ori[0]
else:
result = result + ori[i] + ori[1]
return ori
# def actualGame(length):
#TOP LEVEL
firstPart()
play = raw_input("Do you want to play ? Y - yes, N - no: ")
while (play == "Y" and (points >= 2)):
game = game + 1
points = points
print "Playing game #: ",game
print "Your points so far are: ",points
limit = input("Maximum wrong guesses you want to have allowed? ")
length = input("Maximum length you want for the formulas (including symbols) (must be >= 5)? ")
result = "" #TRACE
ori = random.choice(original)
for i in range(2,len(ori)):
if i % 2 == 0:
result = result + ori[i] + ori[0]
else:
result = result + ori[i] + ori[1]
test = eval(result[:-1])
v = random.choice(plusortimes) #start of randomly generated formula
va = random.choice(plusortimes)
formula = ""
while (len(formula) <= (length - 3)):
formula = formula + random.choice(numbers)
formula2 = str(v + va + formula)
kind = ""
for i in range(2,len(formula2)):
if i % 2 == 0:
kind = kind + formula2[i] + formula2[0]
else:
kind = kind + formula2[i] + formula2[1]
formula3 = eval(kind[:-1])
partial_fmla = "------"
print " (JUST TO TRACE, the program invented the formula: )" ,ori
print " (JUST TO TRACE, the program evaluated the formula: )",test
print "The formula you will have to guess has",length,"symbols: ",partial_fmla
print "You can use digits 1 to 3 and symbols + *"
guess = raw_input("Please enter an operation symbol or digit: ")
a = 0
new = ""
while a<limit:
for i in range(len(formula2)):
if (formula2[i] == partial_fmla[i]):
new = new + partial_fmla[i]
elif (formula2[i] == guess):
new[i] = guess
else:
new[i] =new + "-"
a = a+1
print new
guess = raw_input("Please enter an operation symbol or digit: ")
play = raw_input("Do you want to play ? Y - yes, N - no: ")

The following block seems problematic:
elif (formula2[i] == guess):
new[i] = guess
else:
new[i] =new + "-"
Python does not allow modification of characters within strings, as they are immutable (cannot be changed). Try appending the desired character to your new string instead. For example:
elif formula2[i] == guess:
new += guess
else:
new += '-'
Finally, you should put the definition of new inside the loop directly under, as you want to regenerate it after each guess.

Python 3.2 File I/O. Generating & Spliting a file based on preset size limit. How?

I'm working on some code that generates all permutations of a given set of characters. Although my code works in Python 2.7 it no longer does in Python 3.x due to many changes in Strings. I'd like to adapt and since I'm new to Python, I was hoping you could give me a little push. =)
My question is, as Python generates the word list of your choice, the output file size grows accordingly. I'd like you to show me how can I make this script check for a preset file size and if reached, open a new file and continue writing permutations.
Example:
numeric_lowercase.000001
numeric_lowercase.000002
numeric_lowercase.000003
Remember, I have looked at most examples on the site but they do not work with Python 3.2.
Here's my Python 3.2 working code so far:
import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation
if os.name == 'nt':
def clear_console():
subprocess.call("cls", shell=True)
return
else:
def clear_console():
subprocess.call("clear", shell=True)
return
def generate_phone_numbers(area_code):
f = open('phones.txt', 'w')
for i in range(2010000, 9999999):
f.write(area_code + str(i) + '\n')
def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
f = open(lst_name, 'w')
for curr_length in range(min_digit, max_digit + 1):
for curr_digit in itertools.product(lst_chars, repeat=curr_length):
f.write(''.join(curr_digit) + '\n')
print ('')
print (' wgen - Menu')
choice = 0
while int(choice) not in range(1,6):
clear_console()
choice = input('''
1. Phone numbers for a given area code.
2. Numbers.
3. Numbers + Lowercase.
4. Numbers + Lowercase + Uppercase.
5. Numbers + Lowercase + Uppercase + Punctuation.
Enter Option: ''')
print ('')
choice = int(choice)
if choice == 1:
area_code = input('''
Please enter Area Code: ''')
area_code = str(area_code)
area_code = area_code.strip()
if len(area_code) == 3:
print ('')
print (' Generating phone numbers for area code ' + area_code + '.')
print (' Please wait...')
generate_phone_numbers(area_code)
if choice == 2:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = digits
lst_name = 'numeric.txt'
print ('')
print (' Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
if choice == 3:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = digits + ascii_lowercase
lst_name = 'numeric_lowercase.txt'
print ('')
print (' Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
if choice == 4:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = digits + ascii_lowercase + ascii_uppercase
lst_name = 'numeric_lowercase_uppercase.txt'
print ('')
print (' Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
if choice == 5:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = punctuation
lst_name = 'numeric_lowercase_uppercase_punctuation.txt'
print ('')
print (' Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)

With a little bit of up front configuration you can use the RotatingFileHandler built into the logging library in stdlib.
import logging
from logging.handlers import RotatingFileHandler
log = logging.getLogger('myprog.stufflogger')
log.propagate = False #ensure that we don't mess with other logging
#configure RotatingFileHandler
handler = RotatingFileHandler('base_file_name.txt', maxBytes=1024*1024*20)
handler.setFormatter(logging.Formatter('%(message)s')
handler.terminator = '' # default is new line
log.addHandler(handler)
# you can now use any of the log methods to add the values
log.info('stuff')

I think the best solution would be to write a class that acts like a file, but provides the chunking capability. Your program just writes to this object as though it were a regular file.
The implementation below won't split strings (if you call f.write("this is a test") the entire message is guaranteed to go in one file) and it only starts a new one when the limit is exceeded, so files will be somewhat larger than the chunk size. This behavior is all in the write() method and could be changed if desired.
class chunkyfile(object):
def __init__(self, filename, chunksize=1000000, mode="w", encoding=None,
extension="", start=0, digits=6):
self.filename = filename
self.chunksize = chunksize
self.chunkno = start
self.file = None
self.mode = mode
self.encoding = encoding
self.digits = digits
self.extension = ("." * bool(extension) * (not extension.startswith(".")) +
extension)
self.softspace = 0 # for use with print
def _nextfile(self):
self.file and self.file.close()
self.file = open(self.filename + str(self.chunkno).rjust(self.digits, "0") +
self.extension, mode=self.mode, encoding=self.encoding)
self.chunkno += 1
def write(self, text):
self.file and self.file.tell() > self.chunksize and self.close()
self.file or self._nextfile()
self.file.write(text)
# convenience method, equivalent to print(... file=f)
# requires Python 3.x or from __future__ import print in Py2
def print(*objects, sep=" ", end="\n", flush=False):
print(*objects, sep=sep, end=end, flush=flush, file=self)
def writelines(self, lines):
# do it a line at a time in case we need to split
for line in lines: self.write(line)
def flush(self):
self.file and self.file.flush()
def close(self):
self.file = self.file and self.file.close()
# support "with" statement
def __enter__(self):
return self
def __exit__(self, e, value, tb):
self.close()
# now use the file
with chunkyfile(r"C:\test", 10, extension="txt", encoding="utf8") as f:
f.write("FINALLY ROBOTIC BEINGS RULE THE WORLD")
f.write("The humans are dead")
f.write("The humans are dead")
f.write("We used poisonous gasses")
f.write("And we poisoned their asses")

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Wordlist Generator. Split file Size. How? - python

Related

Swap upper and lower case with its ASCII value

reversing this encoding algorithm in Python

Cant get my code to generate enough fields for bingo

New Hangman Python

Python 3.2 File I/O. Generating & Spliting a file based on preset size limit. How?

Categories

Resources