Replacing names of a file - python

I have the file and I want to replace names according to the pair of objects in the list and elements inside:
def replace_variables(i, distance = 0, T1 ='0', T2 = '0', Phi = '0' ):
i_[0][0] = '-2'
i_[1][0] = '2'
input_name = input_name.replace('distance',distance).replace('one',T1).replace('two',T2).replace('Phi',Phi)
print input_name
input_name = 'file_distance_one_two_Phi.txt'
a = [['distance','+2','-2'], ['T1','+2','-2'], ['T2','+2','-2'], ['Phi','+2','-2']]
new_list = list(itertools.permutations(a, 2))
for i in new_list:
replace_variables(i, x, y)
But I'm getting back:
file_distance_T1_0_0.txt, file_-2_T2_0_0.txt, file_distance_-2_0_0_0.txt, and so on
I want to get:
file_-2,-2_0_0.txt, file_-2_0_-1_0.txt, file_2_-2_0_0.txt and so forth

Related

Appending to list replaces all items in the list with most recent item

So this is what I have. What I get at the end I have a list of every element in the formattedList the same. But when I print features[i] it prints the right features that I want to add to the end of the list.
# function to change modified json list back to dict so it can be saved as a json file again
def makeDict(jsonlist):
features = [{}] * 364
geoid1 = [{}] * 364
geometry = [{}] * 364
properties = [{}] * 364
formattedList = []
tempList = []
keylistfeatures = ['geometry', 'properties']
keylistgeometry = ['coordinates', 'type']
keylistproperties = ['content', 'datatype', 'density', 'density', 'description', 'display', 'file', 'size', 'source'
, 'target']
keylistfile = ['date', 'name', 'size']
file = {}
size = []
density = []
for i in range(0, 364):
# make density list
density = [jsonlist[i][6], jsonlist[i][7]]
# size list
size = [jsonlist[i][8], jsonlist[i][9]]
# file dictionary
file['date'] = jsonlist[i][1]
file['name'] = jsonlist[i][0]
file['size'] = jsonlist[i][2]
# geometry dictionary
geometry[i]['coordinates'] = jsonlist[i][11]
geometry[i]['type'] = jsonlist[i][12]
# properties dictionary
properties[i]['content'] = jsonlist[i][5]
properties[i]['datatype'] = jsonlist[i][4]
properties[i]['density'] = density
properties[i]['description'] = jsonlist[i][10]
properties[i]['display'] = jsonlist[i][3]
properties[i]['file'] = file
properties[i]['size'] = size
properties[i]['source'] = jsonlist[i][14]
properties[i]['target'] = jsonlist[i][15]
features[i]['geometry'] = geometry[i]
features[i]['properties'] = properties[i]
features[i]['type'] = jsonlist[i][13]
# print(features[i])
formattedList.append(features[i])
# print(formattedList)
return formattedList
Been stuck on this for hours but my guess is it's something simple I don't understand.
When I use the debugger, here is what I see
1st iteration
2nd iteration
3rd iteration
4th iteration
And also features is always a list of 364 of the same thing for that iteration
features

How to call different data files using a for loop in pandas?

I have a list of files named such as
topaccount_2015_09_individuals
topaccount_2015_12_indiviuuals
...
topaccount_2021_12_individuals
which are subsets of
topaccount_2015_09
topaccount_2015_12
...
topaccount_2021_12
I want to call them and do some data manipulation so i created a list,
known_series = known['Address']
y = ['2015_09', '2015_12', '2016_03', '2016_06', '2016_09', '2016_12', '2017_03', '2017_06', '2017_09', '2017_12',
'2018_03', '2018_06', '2018_09', '2018_12', '2019_03', '2019_06', '2019_09', '2019_12' , '2020_03', '2020_06', '2020_09', '2020_12',
'2021_03', '2021_03', '2021_06', '2021_09', '2021_12']
for q in y:
topaccount_[q]_individuals = topaccount_[q][~topaccount_[q]['address'].isin(known_series)]
topaccount_[q]_individuals = topaccount_[q]_individuals.reset_index(drop=True)
but it is giving me an error. what am I doing wrong? (known_series is already defined in the script)
UPDATE
I followed the suggestion below, but i have one more problem, which is how to address the master dataframe from which I am extracting _individuals dataframes.
y = ['2015_09', '2015_12', '2016_03', '2016_06', '2016_09', '2016_12', '2017_03', '2017_06', '2017_09', '2017_12',
'2018_03', '2018_06', '2018_09', '2018_12', '2019_03', '2019_06', '2019_09', '2019_12' , '2020_03', '2020_06', '2020_09', '2020_12',
'2021_03', '2021_03', '2021_06', '2021_09', '2021_12']
file_individuals = []
file = []
for x in y:
file_individuals.append(f'topaccount_{x}_individuals')
file.append(f'topaccount_{x}')
print(file_individuals)
print(file)
for file_individuals in file_individuals:
file_individuals = **topaccount_[q][~topaccount_[q]**['address'].isin(known_series)]
file_individuals = file_individuals[~file_individuals['address'].isin(coinmarketcap_series)]
file_individuals = file_individuals[~file_individuals['address'].isin(tord_series)]
file_individuals = file_individuals[~file_individuals['address'].isin(exchanges_series)]
file_individuals = file_individuals.reset_index(drop=True)
REUPDATE
d = {}
names=[]
for x in y:
d['ind'] = f"topaccount_{x}_individuals"
d['top'] = f"topaccount_{x}"
names.append(d)
for n in names:
n['ind'] = n['top'][~n['top']['address'].isin(known_series)]
and I get the following error:
n['ind'] = n['top'][~n['top']['address'].isin(known_series)]
TypeError: string indices must be integers
Something like this then and use the name list later.
name = []
for x in y:
name.append(f'topaccount_{x}_individuals')
print(name)
['topaccount_2015_09_individuals', 'topaccount_2015_12_individuals', 'topaccount_2016_03_individuals', 'topaccount_2016_06_individuals', 'topaccount_2016_09_individuals', 'topaccount_2016_12_individuals', 'topaccount_2017_03_individuals', 'topaccount_2017_06_individuals', 'topaccount_2017_09_individuals', 'topaccount_2017_12_individuals', 'topaccount_2018_03_individuals', 'topaccount_2018_06_individuals', 'topaccount_2018_09_individuals', 'topaccount_2018_12_individuals', 'topaccount_2019_03_individuals', 'topaccount_2019_06_individuals', 'topaccount_2019_09_individuals', 'topaccount_2019_12_individuals', 'topaccount_2020_03_individuals', 'topaccount_2020_06_individuals', 'topaccount_2020_09_individuals', 'topaccount_2020_12_individuals', 'topaccount_2021_03_individuals', 'topaccount_2021_03_individuals', 'topaccount_2021_06_individuals', 'topaccount_2021_09_individuals', 'topaccount_2021_12_individuals']
Alternatively,
d = {}
names=[]
for x in y:
d['ind'] = f"topaccount_{x}_individuals"
d['top'] = f"topaccount_{x}"
names.append(d)
for n in names:
n['ind'] = n['top'].....

How to transpose a single item in a nested list

I am trying to input values into my table but the table is not coming out the way I would like it to. The headers ("OrderDate", "Rep", etc.) of my given csv file should be under the "Columns:" cell in the following image: Table of statistical values
I have tried to create multiple functions that could transpose the headers but when trying to print the table, it would give the error:
TypeError: unsupported format string passed to list.__format__.
One code I tired to input just before the "labels" line was:
headers2 = [x.split() for x in headers]
P.S. I have removed the csv file code and manually put in a list assigned to "A".
My Code:
A = [['OrderDate', 'Region', 'Rep', 'Item', 'Units', 'Unit Price'],
['4-Jul-2014', 'East', 'Richard', 'Pen Set', '62', '4.99'],
['12-Jul-2014', 'East', 'Nick', 'Binder', '29', '1.99'],
['21-Jul-2014', 'Central', 'Morgan', 'Pen Set', '55', '12.49'],
['29-Jul-2014', 'East', 'Susan', 'Binder', '81', '19.99'],
['7-Aug-2014', 'Central', 'Matthew', 'Pen Set', '42', '23.95'],
['15-Aug-2014', 'East', 'Richard', 'Pencil', '35', '4.99'],
['24-Aug-2014', 'West', 'James', 'Desk', '3', '275'],
['1-Sep-2014', 'Central', 'Smith', 'Desk', '2', '125']]
minVal = []
maxVal = []
hist = []
average = []
stanDev = []
mode = []
headers = A[0] #this sets the variable "headers" as the first row
rows = A[1:] #sets the variable 'rows' to be a nested list without headers
def rows2cols(A):
if len(A) == 0: #this covers the base case of having an empty csv file
return []
res = [[] for x in headers] #creates a list of empty lists
for line in A:
for col in range(len(line)):
res[col].append(line[col])
return res
def convertstringtofloats(A):
res = []
for x in A:
res.append(float(x))
return res
def isnumericlist(A):
for x in A:
try:
numeric = float(x)
except:
return False
return True
def getMin(A):
B = convertstringtofloats(cols[col]) #Let's Python know what B is for the next line. If this isn't here, there is an error.
res = B[0]
for x in A:
if x < res:
res = x
return res
def getMax(A):
B = convertstringtofloats(cols[col]) #Let's Python know what B is for the next line. If this isn't here, there is an error.
res = B[0]
for x in A:
if x > res:
res = x
return res
def getAvg(A):
return sum(A)/len(A)
def most_common(A):
counts = {}
for x in A:
counts[(x)] = counts.get((x), 0) + 1
max = -1
maxKey = ""
for key,value in counts.items():
if max < value:
max = value
maxKey = key
return maxKey
def getSD(A):
sumsq = 0
for n in A:
sumsq += (getAvg(A))**2
return sumsq
cols = rows2cols(rows) #transposes 'rows' and assigns to variable 'cols'
def stats(A):
B = convertstringtofloats(A)
minVal.append(getMin(B))
maxVal.append(getMax(B))
average.append(getAvg(B))
stanDev.append(getSD(B))
for col in range(len(headers)):
if isnumericlist(cols[col]):
stats(cols[col]) #calls the function to calculate stats of the transposed matrix
else:
minVal.append("n/a")
maxVal.append("n/a")
average.append("n/a")
stanDev.append("n/a")
mode.append(most_common(cols[col]))
#headers2 = [x.split() for x in headers]
labels = ["Columns:", "Min", "Max", "Avg", "Std. Dev.", "Most Common Word"] #labels for the table
table_values = [labels, headers, minVal, maxVal, average, stanDev, mode] #combines all the calculated stats into a single list
print(table_values)
def print_table(table):
longest_cols = [
(max([len(str(row[i])) for row in table]) + 0) for i in range(len(table[0]))
]
row_format = "|".join([" {:>" + str(longest_col) + "} " for longest_col in longest_cols])
first = True
for row in table:
print(row_format.format(*row))
if first:
print((sum(longest_cols) + (len(table[0]) - 0) * 3) * "-")
first = False
print_table(table_values) # this prints the 'labels' at the top, but the statistical values are not in the right place

How to add a member function to an existing Python object?

Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option().
An example is:
import copy
import numpy as np
from math import *
from pybrain.structure import *
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets.supervised import SupervisedDataSet
import pickle
import neural_network_related
class A(object):
"""the neural network for simulation"""
'''
todo:
- find boundary
- get_angles_from_coefficients
'''
def __init__(self,
index, # the index of the current network
list_of_coor_data_files, # accept multiple files of training data
energy_expression_file, # input, output files
preprocessing_settings = None,
connection_between_layers = None, connection_with_bias_layers = None,
PCs = None, # principal components
):
self._index = index
self._list_of_coor_data_files = list_of_coor_data_files
self._energy_expression_file = energy_expression_file
self._data_set = []
for item in list_of_coor_data_files:
self._data_set += self.get_many_cossin_from_coordiantes_in_file(item)
self._preprocessing_settings = preprocessing_settings
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
self._node_num = [8, 15, 2, 15, 8]
self._PCs = PCs
def save_into_file(self, filename = None):
if filename is None:
filename = "network_%s.pkl" % str(self._index) # by default naming with its index
with open(filename, 'wb') as my_file:
pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL)
return
def get_cossin_from_a_coordinate(self, a_coordinate):
num_of_coordinates = len(a_coordinate) / 3
a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3)
diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:] # bond vectors
diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:]
normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2);
normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors))
normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:];
diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors
cos_of_angles = range(len(normal_vectors_normalized_1))
sin_of_angles_vec = range(len(normal_vectors_normalized_1))
sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization
for index in range(len(normal_vectors_normalized_1)):
cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index]));
return cos_of_angles + sin_of_angles
def get_many_cossin_from_coordinates(self, coordinates):
return map(self.get_cossin_from_a_coordinate, coordinates)
def get_many_cossin_from_coordiantes_in_file (self, filename):
coordinates = np.loadtxt(filename)
return self.get_many_cossin_from_coordinates(coordinates)
def mapminmax(self, my_list): # for preprocessing in network
my_min = min(my_list)
my_max = max(my_list)
mul_factor = 2.0 / (my_max - my_min)
offset = (my_min + my_max) / 2.0
result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list))
return (result_list, (mul_factor, offset)) # also return the parameters for processing
def get_mapminmax_preprocess_result_and_coeff(self,data=None):
if data is None:
data = self._data_set
data = np.array(data)
data = np.transpose(data)
result = []; params = []
for item in data:
temp_result, preprocess_params = self.mapminmax(item)
result.append(temp_result)
params.append(preprocess_params)
return (np.transpose(np.array(result)), params)
def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None):
# try begin
if preprocessing_settings is None:
preprocessing_settings = self._preprocessing_settings
temp_setttings = np.transpose(np.array(preprocessing_settings))
result = []
for item in input_data:
item = np.multiply(item - temp_setttings[1], temp_setttings[0])
result.append(item)
return result
# try end
def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None):
if connection_between_layers is None:
connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None:
connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
expression = ""
# first part: network
for i in range(2):
expression = '\n' + expression
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i])
bias_coef = connection_with_bias_layers[i].params
for j in range(np.size(mul_coef, 0)):
temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j)
for k in range(np.size(mul_coef, 1)):
temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k)
temp_expression += ' %f);\n' % (bias_coef[j])
expression = temp_expression + expression # order of expressions matter in OpenMM
# second part: definition of inputs
index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19];
for i in range(len(index_of_backbone_atoms) - 3):
index_of_coss = i
index_of_sins = i + 4
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0])
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0])
expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i)
expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i)
expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \
(i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3])
return expression
def write_expression_into_file(self, out_file = None):
if out_file is None: out_file = self._energy_expression_file
expression = self.get_expression_of_network()
with open(out_file, 'w') as f_out:
f_out.write(expression)
return
def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None):
if input_data is None: input_data = self._data_set
if connection_between_layers is None: connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
temp_mid_result = range(4)
mid_result = []
# first need to do preprocessing
for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings):
for i in range(4):
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num
bias_coef = connection_with_bias_layers[i].params
previous_result = item if i == 0 else temp_mid_result[i - 1]
temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef
if i != 3: # the last output layer is a linear layer, while others are tanh layers
temp_mid_result[i] = map(tanh, temp_mid_result[i])
mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy
return mid_result
def get_PC_and_save_it_to_network(self):
'''get PCs and save the result into _PCs
'''
mid_result = self.get_mid_result()
self._PCs = [item[1] for item in mid_result]
return
def train(self):
####################### set up autoencoder begin #######################
node_num = self._node_num
in_layer = LinearLayer(node_num[0], "IL")
hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")]
bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")]
out_layer = LinearLayer(node_num[4], "OL")
layer_list = [in_layer] + hidden_layers + [out_layer]
molecule_net = FeedForwardNetwork()
molecule_net.addInputModule(in_layer)
for item in (hidden_layers + bias_layers):
molecule_net.addModule(item)
molecule_net.addOutputModule(out_layer)
connection_between_layers = range(4); connection_with_bias_layers = range(4)
for i in range(4):
connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1])
connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1])
molecule_net.addConnection(connection_between_layers[i]) # connect two neighbor layers
molecule_net.addConnection(connection_with_bias_layers[i])
molecule_net.sortModules() # this is some internal initialization process to make this module usable
####################### set up autoencoder end #######################
trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1)
data_set = SupervisedDataSet(node_num[0], node_num[4])
sincos = self._data_set
(sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos)
for item in sincos_after_process: # is it needed?
data_set.addSample(item, item)
trainer.trainUntilConvergence(data_set, maxEpochs=50)
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
print("Done!\n")
return
def create_sge_files_for_simulation(self,potential_centers = None):
if potential_centers is None:
potential_centers = self.get_boundary_points()
neural_network_related.create_sge_files(potential_centers)
return
def get_boundary_points(self, list_of_points = None, num_of_bins = 5):
if list_of_points is None: list_of_points = self._PCs
x = [item[0] for item in list_of_points]
y = [item[1] for item in list_of_points]
temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins])
hist_matrix = temp[0]
# add a set of zeros around this region
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1)
hist_matrix = (hist_matrix != 0).astype(int)
sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j]
if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1]
if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j]
if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1]
bin_width_0 = temp[1][1]-temp[1][0]
bin_width_1 = temp[2][1]-temp[2][0]
min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0 # multiply by 0.5 since we want the center of the grid
min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1
potential_centers = []
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0: # no points in this block but there are points in neighboring blocks
temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)]
potential_centers.append(temp_potential_center)
return potential_centers
# this function is added after those old objects of A were created
def plotting_in_PC_space_with_coloring_option(self,
list_of_coordinate_files_for_plotting=None, # accept multiple files
color_option='pure'):
'''
by default, we are using training data, and we also allow external data input
'''
if list_of_coordinate_files_for_plotting is None:
PCs_to_plot = self._PCs
else:
temp_sincos = []
for item in list_of_coordinate_files_for_plotting:
temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item)
temp_mid_result = self.get_mid_result(input_data = temp_sincos)
PCs_to_plot = [item[1] for item in temp_mid_result]
(x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot])
# coloring
if color_option == 'pure':
coloring = 'red'
elif color_option == 'step':
coloring = range(len(x))
fig, ax = plt.subplots()
ax.scatter(x,y, c=coloring)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
plt.show()
return
But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)?
Thanks!
Something like this:
class A:
def q(self): print 1
a = A()
def f(self): print 2
setattr(A, 'f', f)
a.f()
This is called a monkey patch.

convert contents of metadata file into variables list

Hi I m wanting to convert the contents of a file (in this case a Landsat 7 metadata file) into a series of variables defined by the contents of the file using Python 2.7. The file contents looks like this:
GROUP = L1_METADATA_FILE
GROUP = METADATA_FILE_INFO
ORIGIN = "Image courtesy of the U.S. Geological Survey"
REQUEST_ID = "0101305309253_00043"
LANDSAT_SCENE_ID = "LE71460402010069SGS00"
FILE_DATE = 2013-06-02T11:19:59Z
STATION_ID = "SGS"
PROCESSING_SOFTWARE_VERSION = "LPGS_12.2.1"
DATA_CATEGORY = "NOMINAL"
END_GROUP = METADATA_FILE_INFO
GROUP = PRODUCT_METADATA
DATA_TYPE = "L1T"
ELEVATION_SOURCE = "GLS2000"
OUTPUT_FORMAT = "GEOTIFF"
EPHEMERIS_TYPE = "DEFINITIVE"
SPACECRAFT_ID = "LANDSAT_7"
SENSOR_ID = "ETM"
SENSOR_MODE = "BUMPER"
WRS_PATH = 146
WRS_ROW = 040
DATE_ACQUIRED = 2010-03-10
GROUP = IMAGE_ATTRIBUTES
CLOUD_COVER = 0.00
IMAGE_QUALITY = 9
SUN_AZIMUTH = 137.38394502
SUN_ELEVATION = 48.01114126
GROUND_CONTROL_POINTS_MODEL = 55
GEOMETRIC_RMSE_MODEL = 3.790
GEOMETRIC_RMSE_MODEL_Y = 2.776
GEOMETRIC_RMSE_MODEL_X = 2.580
END_GROUP = IMAGE_ATTRIBUTES
Example of interested variable items:
GROUP = MIN_MAX_RADIANCE
RADIANCE_MAXIMUM_BAND_1 = 293.700
RADIANCE_MINIMUM_BAND_1 = -6.200
RADIANCE_MAXIMUM_BAND_2 = 300.900
RADIANCE_MINIMUM_BAND_2 = -6.400
RADIANCE_MAXIMUM_BAND_3 = 234.400
RADIANCE_MINIMUM_BAND_3 = -5.000
RADIANCE_MAXIMUM_BAND_4 = 241.100
RADIANCE_MINIMUM_BAND_4 = -5.100
RADIANCE_MAXIMUM_BAND_5 = 47.570
RADIANCE_MINIMUM_BAND_5 = -1.000
RADIANCE_MAXIMUM_BAND_6_VCID_1 = 17.040
RADIANCE_MINIMUM_BAND_6_VCID_1 = 0.000
RADIANCE_MAXIMUM_BAND_6_VCID_2 = 12.650
RADIANCE_MINIMUM_BAND_6_VCID_2 = 3.200
RADIANCE_MAXIMUM_BAND_7 = 16.540
RADIANCE_MINIMUM_BAND_7 = -0.350
RADIANCE_MAXIMUM_BAND_8 = 243.100
RADIANCE_MINIMUM_BAND_8 = -4.700
END_GROUP = MIN_MAX_RADIANCE
I am open to other ideas as I don't need all entries as variables, just a selection. And I see some headers are listed more than once. i.e. GROUP is used multiple times. I need to be able to select certain variables (integer values) and use in formulas in other areas of code. ANY help would be appreciated (novice python coder).
I'm not sure exactly what you are looking for, but maybe something like this:
s = '''GROUP = L1_METADATA_FILE
GROUP = METADATA_FILE_INFO
ORIGIN = "Image courtesy of the U.S. Geological Survey"
REQUEST_ID = "0101305309253_00043"
LANDSAT_SCENE_ID = "LE71460402010069SGS00"
FILE_DATE = 2013-06-02T11:19:59Z
STATION_ID = "SGS"
PROCESSING_SOFTWARE_VERSION = "LPGS_12.2.1"
DATA_CATEGORY = "NOMINAL"
END_GROUP = METADATA_FILE_INFO
GROUP = PRODUCT_METADATA
DATA_TYPE = "L1T"
ELEVATION_SOURCE = "GLS2000"
OUTPUT_FORMAT = "GEOTIFF"
EPHEMERIS_TYPE = "DEFINITIVE"
SPACECRAFT_ID = "LANDSAT_7"
SENSOR_ID = "ETM"
SENSOR_MODE = "BUMPER"
WRS_PATH = 146
WRS_ROW = 040
DATE_ACQUIRED = 2010-03-10'''
output = {} #Dict
for line in s.split("\n"): #Iterates through every line in the string
l = line.split("=") #Seperate by "=" and put into a list
output[l[0].strip()] = l[1].strip() #First word is key, second word is value
print output #Output is a dictonary containing all key-value pairs in your metadata seperated by "="
print output["SENSOR_ID"] #Outputs "ETM"
==============
Edited:
f = open('metadata.txt', 'r') #open file for reading
def build_data(f): #build dictionary
output = {} #Dict
for line in f.readlines(): #Iterates through every line in the string
if "=" in line: #make sure line has data as wanted
l = line.split("=") #Seperate by "=" and put into a list
output[l[0].strip()] = l[1].strip() #First word is key, second word is value
return output #Returns a dictionary with the key, value pairs.
data = build_data(f)
print data["IMAGE_QUALITY"] #prints 9

Categories