Related
I have large text file which has numbers. I want to loop through the file and append the numbers to the list_of_numbers. Problem is that the loop appends to the list but not in the way I want, that's why the list looks like this after iteration
['\n', '+', '1', '6', '1', '0', '8', '5', '0', '7', '7', '6', '4', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '6', '0', '2', '9', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '6', '8', '4', '6', '\n', '+', '1', '6', '1', '0', '8', '5', '0', '5', '9', '3', '4', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '0', '7', '8', '3', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '9', '2', '8', '2', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '0', '0', '4', '9', '\n']
this is just part of the output. I want this to be in this type [123455334,492023232,32322323]
I tried to do this but it does not work and gets errors
print(list([int(x) for x in ''.join(list_of_numbers).split('\n')]))
here is my full code
from tkinter import *
from tkinter import filedialog
import selenium
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium import webdriver
from selenium import webdriver
list_of_numbers=[]
full_list_of_numbers=[]
def openFile():
tf = filedialog.askopenfilename(
initialdir="C:/Users/MainFrame/Desktop/",
title="Open Text file",
filetypes=(("Text Files", "*.txt"),)
)
pathh.insert(END, tf)
tf = open(tf) # or tf = open(tf, 'r')
data = tf.read()
txtarea.insert(END, data)
tf.close()
for i in data:
list_of_numbers.append(i)
print(list_of_numbers)
ws = Tk()
ws.title("PythonGuides")
ws.geometry("400x450")
ws['bg']='#fb0'
txtarea = Text(ws, width=40, height=20)
txtarea.pack(pady=20)
pathh = Entry(ws)
pathh.pack(side=LEFT, expand=True, fill=X, padx=20)
Button(
ws,
text="Open File",
command=openFile
).pack(side=RIGHT, expand=True, fill=X, padx=20)
ws.mainloop()
print(list_of_numbers)
while ' ' in list_of_numbers:
list_of_numbers.remove(' ')
print(list([int(x) for x in ''.join(list_of_numbers).split('\n')]))
Look at that part
tf = open(tf) # or tf = open(tf, 'r')
data = tf.read()
txtarea.insert(END, data)
tf.close()
for i in data:
list_of_numbers.append(i)
data is one big string. Then you iterate over it one char at a time and append that single char (incl, '+' and '\n' to the list. So you get what you get.
Replace the above snippet with following:
with open(tf) as f: # use context manager
for line in f:
txtarea.insert(END, line)
list_of_numbers.append(int(line))
Note, this assumes there are no empty lines in your file. If there are, then
with open(tf) as f: # use context manager
for line in f:
txtarea.insert(END, line)
line = line.strip()
if line:
list_of_numbers.append(int(line))
I want to train a Keras model by using CrossValidation, but my data is dict of lists.
I want 10 folds, so I want the subset of 10 % of the dict keys per validation step, and another 10% (with shuffle) in the next.
Example:
For the first validation step:
pairs_train = {'0': list1,
'1': list2,
'2': list3,
'3': list4,
'4': list5,
'5': list6,
'6': list7,
'7': list8,
'8': list9,
}
pairs_val = {'9': list10,
}
Here's my function:
def crossValidation(self, k_folds=10):
cv_accuracy_train = []
cv_accuracy_val = []
cv_loss_train = []
cv_loss_val = []
s = pd.Series(pairs)
idx = 0
for train_idx, val_idx in kfold.split(s):
print("=========================================")
print("====== K Fold Validation step => %d/%d =======" % (idx, k_folds))
print("=========================================")
train_gen = DataGenerator(pairs=s[train_idx], batch_size=self.param_grid['batch_size'],
nr_files=len(self.Data.all_files), nr_tests=len(self.Data.all_tests),
negative_ratio=self.param_grid['negative_ratio'])
val_gen = DataGenerator(pairs=s[val_idx], batch_size=self.param_grid['batch_size'],
nr_files=len(self.Data.all_files), nr_tests=len(self.Data.all_tests),
negative_ratio=self.param_grid['negative_ratio'])
# Train
h = self.model.fit(train_gen,
validation_data=val_gen,
epochs=self.param_grid['nb_epochs'],
verbose=2)
cv_accuracy_train.append(np.array(h.history['mae'])[-1])
cv_accuracy_val.append(np.array(h.history['val_mae'])[-1])
cv_loss_train.append(np.array(h.history['loss'])[-1])
cv_loss_val.append(np.array(h.history['val_loss'])[-1])
idx += 1
Traceback:
File "/Users/joaolousada/Documents/5ÂșAno/Master-Thesis/main/Prioritizer/Prioritizer.py", line 173, in crossValidation
train_gen = DataGenerator(pairs=s[train_idx], batch_size=self.param_grid['batch_size'],
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/series.py", line 908, in __getitem__
return self._get_with(key)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/series.py", line 943, in _get_with
return self.loc[key]
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py", line 879, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py", line 1099, in _getitem_axis
return self._getitem_iterable(key, axis=axis)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py", line 1037, in _getitem_iterable
keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py", line 1298, in _validate_read_indexer
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n ...\n 3257, 3258, 3261, 3262, 3263, 3265, 3266, 3267, 3268, 3269],\n dtype='int64', length=2943)] are in the [index]"
If having a dict with list values. For example
pairs = {'0': [1,2,3],
'1': [1,2,3],
'2': [4,6,8],
'3': [2,1,9],
'4': [9,7,8],
'5': [4,6,8],
'6': [9,7,8],
'7': [9,7,8],
'8': [1,2,3],
'9': [4,6,8],
}
The following function would return the indices to split the dict by indices
def kfold_split(pairs:dict, perc:float, shuffle:bool) -> list:
keys = list(pairs.keys())
sets = len(keys)
cv_perc = int(sets*perc)
folds = int(sets/cv_perc)
indices = []
for fold in range(folds):
# If you want to generate random keys
if shuffle:
# Choose random keys
random_keys = list(np.random.choice(keys, cv_perc))
other_keys = list(set(keys) - set(random_keys))
indices.append((other_keys, random_keys))
else:
if fold == 0:
fold_keys = keys[-cv_perc*(fold+1):]
else:
fold_keys = keys[-cv_perc*(fold+1):-cv_perc*(fold)]
other_keys = list(set(keys) - set(fold_keys))
indices.append((other_keys, fold_keys))
return indices
And you can retrieve shuffle indices
kfold_split(pairs, perc=.2, shuffle=True)
>>>
[(['6', '2', '1', '5', '4', '7', '0', '3'], ['9', '8']),
(['6', '1', '9', '5', '4', '7', '0', '3'], ['8', '2']),
(['2', '1', '8', '9', '5', '4', '7', '3'], ['6', '0']),
(['2', '8', '9', '5', '4', '7', '0', '3'], ['1', '6']),
(['6', '2', '8', '5', '4', '7', '0', '3'], ['9', '1'])]
or order indices
kfold_split(pairs, perc=.2, shuffle=False)
>>>
[(['6', '2', '1', '5', '4', '7', '0', '3'], ['8', '9']),
(['2', '1', '8', '9', '5', '4', '0', '3'], ['6', '7']),
(['6', '2', '1', '8', '9', '7', '0', '3'], ['4', '5']),
(['6', '1', '8', '9', '5', '4', '7', '0'], ['2', '3']),
(['6', '2', '8', '9', '5', '4', '7', '3'], ['0', '1'])]
Then you can filter your dictionary based on these indices as follows
for indices in result:
train_indices, test_indices = indices
# Filter dict by indices
pair_test = {k:v for k,v in pairs.items() if k in test_indices}
# Train data
pair_train = {k:v for k,v in pairs.items() if k not in train_indices}
# Some other stuff here
I managed to find a solution for my own problem, by taking all dict keys as a np.array and using them in kf.split(). Then with the indices obtained I access the dict key that I want. Not sure if the more optimized/pythonic solution, but it works fine.
def crossValidation(self, k_folds=10):
cv_accuracy_train = []
cv_accuracy_val = []
cv_loss_train = []
cv_loss_val = []
s = np.array(list(self.Data.pairs.keys()))
kfold = KFold(n_splits=k_folds, shuffle=True)
idx = 0
for train_idx, val_idx in kfold.split(s):
print("=========================================")
print("====== K Fold Validation step => %d/%d =======" % (idx, k_folds))
print("=========================================")
pairs_train = {s[key]: self.Data.pairs[s[key]] for key in train_idx}
pairs_val = {s[key]: self.Data.pairs[s[key]] for key in val_idx}
train_gen = DataGenerator(pairs=pairs_train, batch_size=self.param_grid['batch_size'],
nr_files=len(self.Data.all_files), nr_tests=len(self.Data.all_tests),
negative_ratio=self.param_grid['negative_ratio'])
val_gen = DataGenerator(pairs=pairs_val, batch_size=self.param_grid['batch_size'],
nr_files=len(self.Data.all_files), nr_tests=len(self.Data.all_tests),
negative_ratio=self.param_grid['negative_ratio'])
# Train
h = self.model.fit(train_gen,
validation_data=val_gen,
epochs=self.param_grid['nb_epochs'],
verbose=2)
cv_accuracy_train.append(np.array(h.history['accuracy'])[-1])
cv_accuracy_val.append(np.array(h.history['val_accuracy'])[-1])
cv_loss_train.append(np.array(h.history['loss'])[-1])
cv_loss_val.append(np.array(h.history['val_loss'])[-1])
idx += 1
Its hard for me to explain this accurate, but what i want is this.
The numbers between the elements containing the string '/' merged together like so:
source = ['3', '/', '7', '/', '1', '1', '/', '1', '5', '/', '2', '2', '/', '1', '1', '5']
some function....
output = ['3', '/', '7', '/', '11', '/', '15', '/', '22', '/', '115']
You could try iterating through the list and keeping a tracker. When you reach a number, add it to the tracker; when you reach a /, reset and add the tracker to the list.
def merge(arr):
ret = []
el = ''
for i in arr:
if i == '/':
ret.extend([el, '/'])
el = ''
else:
el += i
ret.append(el)
return ret
>>> merge(['3', '/', '7', '/', '1', '1', '/', '1', '5', '/', '2', '2', '/', '1', '1', '5'])
['3', '/', '7', '/', '11', '/', '15', '/', '22', '/', '115']
def merge(source, separator):
lst = ''.join(source).split(separator) # join lists then split on separator
result = [separator] * (len(lst) * 2 - 1) # inject separator between each item
result[0::2] = lst
return result
source = ['3', '/', '7', '/', '1', '1', '/', '1', '5', '/', '2', '2', '/', '1', '1', '5']
print(merge(source, '/')) # ['3', '/', '7', '/', '11', '/', '15', '/', '22', '/', '115']
Same idea with slightly different implementation:
def func(input):
ret = []
i = 0
curr_grp = []
while i < len(input):
if input[i] != '/':
curr_grp.append(input[i])
else:
ret.append(''.join(curr_grp))
ret.append('/')
curr_grp = []
i += 1
return ret
You could use the itertools.groupby() function like this:
from itertools import groupby
separator = '/'
source = ['3', '/', '7', '/', '1', '1', '/', '1', '5', '/', '2', '2', '/', '1', '1', '5']
result = []
for blend, chars in groupby(source, lambda v: v != separator):
result.append(''.join(chars) if blend else separator)
print(result) # -> ['3', '/', '7', '/', '11', '/', '15', '/', '22', '/', '115']
Here is my string,
str = 'A:[{type:"mb",id:9,name:"John",url:"/mb9/",cur:0,num:83498},
{type:"mb",id:92,name:"Mary",url:"/mb92/",cur:0,num:404},
{type:"mb",id:97,name:"Dan",url:"/mb97/",cur:0,num:139},
{type:"mb",id:268,name:"Jennifer",url:"/mb268/",cur:0,num:0},
{type:"mb",id:289,name:"Mike",url:"/mb289/",cur:0,num:0}],B:
[{type:"mb",id:157,name:"Sue",url:"/mb157/",cur:0,num:35200},
{type:"mb",id:3,name:"Rob",url:"/mb3/",cur:0,num:103047},
{type:"mb",id:2,name:"Tracy",url:"/mb2/",cur:0,num:87946},
{type:"mb",id:26,name:"Jenny",url:"/mb26/",cur:0,num:74870},
{type:"mb",id:5,name:"Florence",url:"/mb5/",cur:0,num:37261},
{type:"mb",id:127,name:"Peter",url:"/mb127/",cur:0,num:63711},
{type:"mb",id:15,name:"Grace",url:"/mb15/",cur:0,num:63243},
{type:"mb",id:82,name:"Tony",url:"/mb82/",cur:0,num:6471},
{type:"mb",id:236,name:"Lisa",url:"/mb236/",cur:0,num:4883}]'
I want to use findall or search to extract all the data under "name" and "url" from str. Here is what I did,
pattern = re.comile(r'type:(.*),id:(.*),name:(.*),url:(.*),cur:(.*),num:
(.*)')
for (v1, v2, v3, v4, v5, v6) in re.findall(pattern, str):
print v3
print v4
But unfortunately, this doesn't do what I want. Is there anything wrong? Thanks for your inputs.
You shouldn't call you string "str," because that's a built-in function. But here's an option for you:
# Find all of the entries
x = re.findall('(?<![AB]:)(?<=:).*?(?=[,}])', s)
['"mb"', '9', '"John"', '"/mb9/"', '0', '83498', '"mb"', '92', '"Mary"',
'"/mb92/"', '0', '404', '"mb"', '97', '"Dan"', '"/mb97/"', '0', '139',
'"mb"', '268', '"Jennifer"', '"/mb268/"', '0', '0', '"mb"', '289', '"Mike"',
'"/mb289/"', '0', '0', '"mb"', '157', '"Sue"', '"/mb157/"', '0', '35200',
'"mb"', '3', '"Rob"', '"/mb3/"', '0', '103047', '"mb"', '2', '"Tracy"',
'"/mb2/"', '0', '87946', '"mb"', '26', '"Jenny"', '"/mb26/"', '0', '74870',
'"mb"', '5', '"Florence"', '"/mb5/"', '0', '37261', '"mb"', '127', '"Peter"',
'"/mb127/"', '0', '63711', '"mb"', '15', '"Grace"', '"/mb15/"', '0', '63243',
'"mb"', '82', '"Tony"', '"/mb82/"', '0', '6471', '"mb"', '236', '"Lisa"',
'"/mb236/"', '0', '4883']
# Break up into each section
y = []
for i in range(0, len(x), 6):
y.append(x[i:i+6])
[['"mb"', '9', '"John"', '"/mb9/"', '0', '83498']
['"mb"', '92', '"Mary"', '"/mb92/"', '0', '404']
['"mb"', '97', '"Dan"', '"/mb97/"', '0', '139']
['"mb"', '268', '"Jennifer"', '"/mb268/"', '0', '0']
['"mb"', '289', '"Mike"', '"/mb289/"', '0', '0']
['"mb"', '157', '"Sue"', '"/mb157/"', '0', '35200']
['"mb"', '3', '"Rob"', '"/mb3/"', '0', '103047']
['"mb"', '2', '"Tracy"', '"/mb2/"', '0', '87946']
['"mb"', '26', '"Jenny"', '"/mb26/"', '0', '74870']
['"mb"', '5', '"Florence"', '"/mb5/"', '0', '37261']
['"mb"', '127', '"Peter"', '"/mb127/"', '0', '63711']
['"mb"', '15', '"Grace"', '"/mb15/"', '0', '63243']
['"mb"', '82', '"Tony"', '"/mb82/"', '0', '6471']
['"mb"', '236', '"Lisa"', '"/mb236/"', '0', '4883']]
# Name is 3rd value in each list and url is 4th
for i in y:
name = i[2]
url = i[3]
You can try this:
import re
data = """
A:[{type:"mb",id:9,name:"John",url:"/mb9/",cur:0,num:83498},
{type:"mb",id:92,name:"Mary",url:"/mb92/",cur:0,num:404},
{type:"mb",id:97,name:"Dan",url:"/mb97/",cur:0,num:139},
{type:"mb",id:268,name:"Jennifer",url:"/mb268/",cur:0,num:0},
{type:"mb",id:289,name:"Mike",url:"/mb289/",cur:0,num:0}],B:
[{type:"mb",id:157,name:"Sue",url:"/mb157/",cur:0,num:35200},
{type:"mb",id:3,name:"Rob",url:"/mb3/",cur:0,num:103047},
{type:"mb",id:2,name:"Tracy",url:"/mb2/",cur:0,num:87946},
{type:"mb",id:26,name:"Jenny",url:"/mb26/",cur:0,num:74870},
{type:"mb",id:5,name:"Florence",url:"/mb5/",cur:0,num:37261},
{type:"mb",id:127,name:"Peter",url:"/mb127/",cur:0,num:63711},
{type:"mb",id:15,name:"Grace",url:"/mb15/",cur:0,num:63243},
{type:"mb",id:82,name:"Tony",url:"/mb82/",cur:0,num:6471},
{type:"mb",id:236,name:"Lisa",url:"/mb236/",cur:0,num:4883}]
"""
full_data = [i[1:-1] for i in re.findall('(?<=name:)".*?"(?=,)|(?<=url:)".*?"(?=,)', data)]
final_data = [full_data[i]+":"+full_data[i+1] for i in range(0, len(full_data)-1, 2)]
print(full_data)
Output
['John:/mb9/', 'Mary:/mb92/', 'Dan:/mb97/', 'Jennifer:/mb268/', 'Mike:/mb289/', 'Sue:/mb157/', 'Rob:/mb3/', 'Tracy:/mb2/', 'Jenny:/mb26/', 'Florence:/mb5/', 'Peter:/mb127/', 'Grace:/mb15/', 'Tony:/mb82/', 'Lisa:/mb236/']
I'm having difficulty with iterating through the nested list table below. I understand how to iterate through the table once, but to go a level deeper and iterate through each nested list, I am stuck on the correct syntax to use. In iterating through the sublists, I am trying to cast each 'age' and 'years experience' to an integer, perform the operation 'age' - 'years experience', and append the value (as a string) to each sublist.
table = [
['first_name', 'last_name', 'age', 'years experience', 'salary'],
['James', 'Butt', '29', '8', '887174.4'],
['Josephine', 'Darakjy', '59', '39', '1051267.9'],
['Art', 'Venere', '22', '2', '47104.2'],
['Lenna', 'Paprocki', '33', '7', '343240.2'],
['Donette', 'Foller', '26', '2', '273541.4'],
['Simona', 'Morasca', '35', '15', '960967.0'],
['Mitsue', 'Tollner', '51', '31', '162776.7'],
['Leota', 'Dilliard', '64', '39', '464595.5'],
['Sage', 'Wieser', '27', '9', '819519.7'],
['Kris', 'Marrier', '59', '33', '327505.55000000005'],
['Minna', 'Amigon', '45', '23', '571227.05'],
['Abel', 'Maclead', '46', '23', '247927.25'],
['Kiley', 'Caldarera', '33', '7', '179182.8'],
['Graciela', 'Ruta', '48', '21', '136978.95'],
['Cammy', 'Albares', '29', '9', '1016378.95'],
['Mattie', 'Poquette', '39', '15', '86458.75'],
['Meaghan', 'Garufi', '21', '3', '260256.5'],
['Gladys', 'Rim', '52', '26', '827390.5'],
['Yuki', 'Whobrey', '32', '10', '652737.0'],
['Fletcher', 'Flosi', '59', '37', '954975.15']]
##Exercise 3 (rows as lists): Iterate over each row and append the following values:
#If it is the first row then extend it with the following ['Started Working', 'Salary / Experience']
#Start work age (age - years experience)
#Salary / Experience ratio = (salary / divided by experience)
for i, v in enumerate(table):
extension = ['Started Working', 'Salary/Experience']
if i == 0:
v.extend(extension)
print(i,v) #test to print out the index and nested list values
#for index, value in enumerate(v):
# age =
#exp =
#start_work = age - exp
#print(index, value) test to print out the index and each value in the nested list
Pass the argument start to enumerate, enumerate(table, 1) in your case,
table = [['first_name', 'last_name', 'age', 'years experience', 'salary'],
['James', 'Butt', '29', '8', '887174.4'],
['Josephine', 'Darakjy', '59', '39', '1051267.9'],
['Art', 'Venere', '22', '2', '47104.2']]
table[0].extend(['Started Working', 'Salary/Experience'])
for idx, row in enumerate(table[1:], 1):
start_work_age = int(row[2]) - int(row[3])
ratio = float(row[4]) / int(row[3])
table[idx].extend([str(start_work_age), str(ratio)])
print(table)
# Output
[['first_name', 'last_name', 'age', 'years experience', 'salary', 'Started Working', 'Salary/Experience'],
['James', 'Butt', '29', '8', '887174.4', '21', '110896.8'],
['Josephine', 'Darakjy', '59', '39', '1051267.9', '20', '26955.5871795'],
['Art', 'Venere', '22', '2', '47104.2', '20', '23552.1']]
If you can convert the space to an underscore in years experience you can use collections.namedtuple to make your life simpler:
from collections import namedtuple
table = [
['first_name', 'last_name', 'age', 'years_experience', 'salary'],
['James', 'Butt', '29', '8', '887174.4'],
['Josephine', 'Darakjy', '59', '39', '1051267.9'],
['Art', 'Venere', '22', '2', '47104.2'],
# ...
]
workerv1 = namedtuple('workerv1', ','.join(table[0]))
for i,v in enumerate(table):
worker = workerv1(*v)
if i == 0:
swage = 'Started Working'
sex_ratio = 'S/Ex ratio'
else:
swage = int(worker.age) - int(worker.years_experience)
sex_ratio = float(worker.salary) / float(worker.years_experience)
print("{w.first_name},{w.last_name},{w.age},{w.years_experience},{w.salary},{0},{1}".format(
swage, sex_ratio, w=worker))