I have to make a function whose purpose is taking in parameter a list.
Such as this one :
['music', ' extension=mp3', 'reports/INFOB131', ' extension=doc,docx,pdf', ' name_contains=INFOB131', ' max_size=100000', 'reports/INFOB132', ' extension=doc,docx,pdf', ' name_contains=INFOB132', ' max_size=100000', 'games', ' name_contains=SC2,Wesnoth', 'pictures/Namur', ' extension=jpeg', ' min_size=5000000', ' name_contains=cercle', 'pictures/autres', ' extension=jpeg', ' min_size=5000000']
And return a list similar to this :
data_config = [{'music' : {'extension':'mp3'}}, {'reports/INFOB131': {'extension': ['doc', 'docx','pdf'], 'name_contains':'INFOB131', 'max_size':100000}}, {'reports/INFOB132': {'extension': ['doc', 'docx','pdf'], 'name_contains':'INFOB132', 'max_size':100000}}]
So I made that function :
def my_function(list_in_question, my_config_list =[], prev_list = []):
""" """
enumerated_list = list(enumerate(list_in_question))
if not '=' in enumerated_list[0][1]:
main_key = enumerated_list[0][1]# référencé avant assignement
pre_dict = {main_key : {}}
for i in enumerated_list[1:]:
if '=' in i[1] :
splitted = i[1].split('=')
prev_list.append({splitted[0] : splitted[1]})
elif not '=' in i[1] and i[1] != main_key:
for j in prev_list:
pre_dict[main_key].update(j)
my_config_list.append(pre_dict)
return my_function(list_in_question[i[0]:])
elif not '=' in i[1] and i[1] == main_key and main_key!= enumerated_list[0][1]:
return my_config_list
else:
print("The format of the file containig the data in not adequate !")
But I don't understand why when I execute it this way :
new_lines = ['music', ' extension=mp3', '', 'reports/INFOB131', ' extension=doc,docx,pdf', ' name_contains=INFOB131', ' max_size=100000', '', 'reports/INFOB132', ' extension=doc,docx,pdf', ' name_contains=INFOB132', ' max_size=100000', '', 'games', ' name_contains=SC2,Wesnoth', '', 'pictures/Namur', ' extension=jpeg', ' min_size=5000000', ' name_contains=cercle', '', 'pictures/autres', ' extension=jpeg', ' min_size=5000000']
my_function(new_lines)
I end up with this output...
None
I would be very grateful if someone could help me,
Thank you !
PS : If anyone have an idea of how I could do without loop and do it in a recursive way, it would be awesome !
Everyone... Thank you !!! You really hepled me, all your answers are awesome, I have some issues to understand some parts so I'll be annoying just a little longer with some questions of you code. Anyway, thank you for the time you took to help me, you were all more than great help !!!
Try the following code;
def foo(my_list):
# Create an iterator
list_iter = iter(my_list)
# zip the iterator with itself
key_val_tuple = zip(list_iter, list_iter) # This will group two items in the list at a time
output_list = []
for i in key_val_tuple:
value_dict = {}
value = i[1].split('=')
value_dict[value[0]] = value[1].split(",") if len(value[1].split(","))>1 else value[1]
element_dict = {}
element_dict[i[0]] = value_dict
output_list.append(element_dict)
return output_list
input_list = ['music', ' extension=mp3', 'reports/INFOB131', ' extension=doc,docx,pdf', ' name_contains=INFOB131', ' max_size=100000', 'reports/INFOB132', ' extension=doc,docx,pdf', ' name_contains=INFOB132', ' max_size=100000', 'games', ' name_contains=SC2,Wesnoth', 'pictures/Namur', ' extension=jpeg', ' min_size=5000000', ' name_contains=cercle', 'pictures/autres', ' extension=jpeg', ' min_size=5000000']
# Call the function foo
output = foo(input_list)
print(output) # python3
Got the following output
[{'music': {' extension': 'mp3'}}, {'reports/INFOB131': {' extension': ['doc', 'docx', 'pdf']}}, {' name_contains=INFOB131': {' max_size': '100000'}}, {'reports/INFOB132': {' extension': ['doc', 'docx', 'pdf']}}, {' name_contains=INFOB132': {' max_size': '100000'}}, {'games': {' name_contains': ['SC2', 'Wesnoth']}}, {'pictures/Namur': {' extension': 'jpeg'}}, {' min_size=5000000': {' name_contains': 'cercle'}}, {'pictures/autres': {' extension': 'jpeg'}}]
zip(list_iter, list_iter) : This will group two items in the list at a time.
output : [('music', ' extension=mp3'), ('reports/INFOB131', ' extension=doc,docx,pdf'), ...]
Reference:
python zip()
What exactly are Python's iterator, iterable, and iteration protocols?
Convert List to a list of tuples python
You need to traverse the list one time. The pattern is this:
Start an empty list (let's call it new_list)
You find an element in the original list (original_list).
If it does not contain '=', you create a new dictionary in the new_list
If it contains the '=' sign, split the element into k and v (before and after the '='), and in the last entry in the new_list, for the only key, you add a key-value pair
def parse_list(original_list):
new_list=[]
for element in original_list:
if not '=' in element:
new_list.append({element:{}})
else:
k,w=element.split('=')
new_list[-1][new_list[-1].keys()[0]][k]=w
return new_list
new_lines = ['music', ' extension=mp3', '', 'reports/INFOB131', ' extension=doc,docx,pdf', ' name_contains=INFOB131', ' max_size=100000', '', 'reports/INFOB132', ' extension=doc,docx,pdf', ' name_contains=INFOB132', ' max_size=100000', '', 'games', ' name_contains=SC2,Wesnoth', '', 'pictures/Namur', ' extension=jpeg', ' min_size=5000000', ' name_contains=cercle', '', 'pictures/autres', ' extension=jpeg', ' min_size=5000000']
parse_list(new_lines)
Now I should explain the line before the return statement:
new_list[-1] is the dictionary corresponding to the last entry without an equal sign that was found in the original_list. After the first pass through the loop,
new_list=[{'music': {}}]
during the second pass
new_list[-1]={'music': {}}
new_list[-1].keys()=['music']
new_list[-1].keys()[0]='music'
new_list[-1][new_list[-1].keys()[0]]={}
now you just update this dictionary with the parsed k,w pair
One more way of doing it:
import re
def my_function(list_in_question, my_config_list=[], prev_list=[]):
""" """
result = {}
main_key = ''
for element in list_in_question:
if element == '':
main_key = ''
if re.search('=', element):
key, value = element.split('=')
print "key, value = ", key, value
if re.search(',', value):
value_list = value.split(',')
print "value list =", value_list
result[main_key][key] = value_list
else:
result[main_key][key] = value
else:
main_key = element
result[main_key] = {}
return (result)
new_lines = ['music', ' extension=mp3', '', 'reports/INFOB131', ' extension=doc,docx,pdf', ' name_contains=INFOB131',
' max_size=100000', '', 'reports/INFOB132', ' extension=doc,docx,pdf', ' name_contains=INFOB132',
' max_size=100000', '', 'games', ' name_contains=SC2,Wesnoth', '', 'pictures/Namur', ' extension=jpeg',
' min_size=5000000', ' name_contains=cercle', '', 'pictures/autres', ' extension=jpeg',
' min_size=5000000']
print (my_function(new_lines))
Yet another try with only lists and dicts:
def make(lst):
data_config=[]
for st in lst:
if '=' not in st: # new entry
dd = dict()
dds = dd[st] = dict()
data_config.append(dd)
else: # fill entry
k,v = st.split('=')
if ',' in v:
v = v.split(',')
dds[k] = v
return data_config
For :
In [564]: make(l)
Out[564]:
[{'music': {' extension': 'mp3'}},
{'reports/INFOB131': {' extension': ['doc', 'docx', 'pdf'],
' max_size': '100000',
' name_contains': 'INFOB131'}},
{'reports/INFOB132': {' extension': ['doc', 'docx', 'pdf'],
' max_size': '100000',
' name_contains': 'INFOB132'}},
{'games': {' name_contains': ['SC2', 'Wesnoth']}},
{'pictures/Namur': {' extension': 'jpeg',
' min_size': '5000000',
' name_contains': 'cercle'}},
{'pictures/autres': {' extension': 'jpeg', ' min_size': '5000000'}}]
I have text file, that store orders info in following format. I try to search an order by first line of the block, that represent ID and print 7 next lines. But my code checking just the first line or print all line's that contain an input number. Could somebody help me?
4735
['Total price: ', 1425.0]
['Type of menu: ', 'BBQ']
['Type of service: ', ' ']
['Amount of customers: ', 25.0]
['Discount: ', '5%', '= RM', 75.0]
['Time: ', '2017-01-08 21:39:19']
3647
['Total price: ', 2000.0]
['Type of menu: ', ' ']
['Type of service: ', 'Tent ']
['Amount of customers: ', 0]
.......
I use the following code to search in text file.
try:
f = open('Bills.txt', 'r')
f.close()
except IOError:
absent_input = (raw_input("|----File was not founded----|\n|----Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
Id_input = (raw_input("Enter ID of order\n"))
with open("Bills.txt", "r") as f:
searchlines = f.readlines()
j = len(searchlines) - 1
for i, line in enumerate(searchlines):
if Id_input in str(line): # I also try to check in this way (Id_input == str(line)), but it didn't work
k = min(i + 7, j)
for l in searchlines[i:k]: print l,
print
else:
absent_input = (raw_input("|----Order was not founded----|\n|----Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
check the following code.
Id_input = (raw_input("Enter ID of order\n")).strip()
try:
f = open("Bills.txt", "r")
print_rows = False
for idline in f:
if idline.strip() == Id_input:
print_rows = True
continue
if print_rows:
if idline.startswith("["):
print idline
else:
break
if not print_rows:
absent_input = (raw_input("|----Order was not founded----|\n|---- Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
except IOError:
absent_input = (raw_input("|----File was not founded----|\n|---- Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
I know how to remove duplicates from a list using set() or two lists, but how do I maintain the same list and add a number at the end for duplicates? I could do it using if, but it´s not pythonic. Thanks guys!!
nome_a = ['Anthony','Rudolph', 'Chuck', 'Chuck', 'Chuck', 'Rudolph', 'Bob']
nomes = []
for item in nome_a:
if item in nomes:
if (str(item) + ' 5') in nomes:
novoitem = str(item) + ' 6'
nomes.append(novoitem)
if (str(item) + ' 4') in nomes:
novoitem = str(item) + ' 5'
nomes.append(novoitem)
if (str(item) + ' 3') in nomes:
novoitem = str(item) + ' 4'
nomes.append(novoitem)
if (str(item) + ' 2') in nomes:
novoitem = str(item) + ' 3'
nomes.append(novoitem)
else:
novoitem = str(item) + ' 2'
nomes.append(novoitem)
if item not in nomes:
nomes.append(item)
print(nomes)
Edit(1): Sorry. I edited for clarification.
You could use the following:
names = ['Anthony','Rudolph', 'Chuck', 'Chuck', 'Chuck', 'Rudolph', 'Bob']
answer = []
name_dict = {}
for name in names:
if name_dict.get(name):
name_dict[name] += 1
answer.append('{}_{}'.format(name, name_dict[name]))
else:
name_dict[name] = 1
answer.append(name)
print(answer)
Output
['Anthony', 'Rudolph', 'Chuck', 'Chuck_2', 'Chuck_3', 'Rudolph_2', 'Bob']
I got this tfidf from yebrahim and somehow my output document yield all 0 for the result . Any problem with this ?
example of the output is
hippo 0.0
hipper 0.0
hip 0.0
hint 0.0
hindsight 0.0
hill 0.0
hilarious 0.0
thanks for the help
# a list of (words-freq) pairs for each document
global_terms_in_doc = {}
# list to hold occurrences of terms across documents
global_term_freq = {}
num_docs = 0
lang = 'english'
lang_dictionary = {}
top_k = -1
supported_langs = ('english', 'french')
from django.utils.encoding import smart_str, smart_unicode
# support for custom language if needed
def loadLanguageLemmas(filePath):
print('loading language from file: ' + filePath)
f = open(filePath)
for line in f:
words = line.split()
if words[1] == '=' or words[0] == words[1]:
continue
lang_dictionary[words[0]] = words[1]
def remove_diacritic(words):
for i in range(len(words)):
w = unicode(words[i], 'ISO-8859-1')
w = unicodedata.normalize('NFKD', w).encode('ASCII', 'ignore')
words[i] = w.lower()
return words
# function to tokenize text, and put words back to their roots
def tokenize(text):
text = ' '.join(text)
tokens = PunktWordTokenizer().tokenize(text)
# lemmatize words. try both noun and verb lemmatizations
lmtzr = WordNetLemmatizer()
for i in range(0,len(tokens)):
#tokens[i] = tokens[i].strip("'")
if lang != 'english':
if tokens[i] in lang_dictionary:
tokens[i] = lang_dictionary[tokens[i]]
else:
res = lmtzr.lemmatize(tokens[i])
if res == tokens[i]:
tokens[i] = lmtzr.lemmatize(tokens[i], 'v')
else:
tokens[i] = res
# don't return any single letters
tokens = [t for t in tokens if len(t) > 1 and not t.isdigit()]
return tokens
def remove_stopwords(text):
# remove punctuation
chars = ['.', '/', "'", '"', '?', '!', '#', '$', '%', '^', '&',
'*', '(', ')', ' - ', '_', '+' ,'=', '#', ':', '\\', ',',
';', '~', '`', '<', '>', '|', '[', ']', '{', '}']
for c in chars:
text = smart_str(text.replace(c, ' '))
text = text.split()
import nltk
if lang == 'english':
stopwords = nltk.corpus.stopwords.words('english')
else:
stopwords = open(lang + '_stopwords.txt', 'r').read().split()
content = [w for w in text if w.lower().strip() not in stopwords]
return content
# __main__ execution
import sys, re, math, unicodedata
from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [options] input_file')
parser.add_option('-l', '--language', dest='language',
help='language to use in tokenizing and lemmatizing. supported\
languages: {english, french}', metavar='LANGUAGE')
parser.add_option('-k', '--top-k', dest='top_k',
help='output only terms with score no less k')
parser.add_option('-m', '--mode', dest='mode',
help='display mode. can be either "both" or "term"')
(options, args) = parser.parse_args()
if options.language:
if options.language not in supported_langs:
print 'only ', supported_langs, ' are supported in this version.'
quit()
if options.language != 'english':
lang = options.language
loadLanguageLemmas(options.language + '_lemmas.txt')
if options.top_k:
top_k = int(options.top_k)
display_mode = 'both'
if options.mode:
if options.mode == 'both' or options.mode == 'term':
display_mode = options.mode
else:
parser.print_help()
if not args:
parser.print_help()
quit()
reader = open(args[0])
all_files = reader.read().splitlines()
num_docs = len(all_files)
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize.punkt import PunktWordTokenizer
print('initializing..')
for f in all_files:
# local term frequency map
terms_in_doc = {}
doc_words = open(f).read().lower()
#print 'words:\n', doc_words
doc_words = remove_stopwords(doc_words)
#print 'after stopwords:\n', doc_words
doc_words = tokenize(doc_words)
#print 'after tokenize:\n', doc_words
#quit()
# increment local count
for word in doc_words:
if word in terms_in_doc:
terms_in_doc[word] += 1
else:
terms_in_doc[word] = 1
# increment global frequency
for (word,freq) in terms_in_doc.items():
if word in global_term_freq:
global_term_freq[word] += 1
else:
global_term_freq[word] = 1
global_terms_in_doc[f] = terms_in_doc
print('working through documents.. ')
for f in all_files:
writer = open(f + '_final', 'w')
result = []
# iterate over terms in f, calculate their tf-idf, put in new list
max_freq = 0;
for (term,freq) in global_terms_in_doc[f].items():
if freq > max_freq:
max_freq = freq
for (term,freq) in global_terms_in_doc[f].items():
idf = math.log(float(1 + num_docs) / float(1 + global_term_freq[term]))
tfidf = float(freq) / float(max_freq) * float(idf)
result.append([tfidf, term])
# sort result on tfidf and write them in descending order
result = sorted(result, reverse=True)
for (tfidf, term) in result[:top_k]:
if display_mode == 'both':
writer.write(term + '\t' + str(tfidf) + '\n')
else:
writer.write(term + '\n')
print('success, with ' + str(num_docs) + ' documents.')