need to save them into different data frames
query = '''select name
from my_table
where class = {}
and student_number > {}
and student_number <= {} +10
group by name'''
inputs = list(range(0, 100,10))
classes = [1,2,3,4]
the expected result is running these batches for each class individually. e.g df_class1, df_class2 df_class3, df_class4
query = '''
select name from my_table where class = {} and student_number >
{} and student_number <= {} +50 group by name'''
inputs = list(range(0, 100,10))
classes = [1,2,3,4]
not sure on this part ##for i in inputs: for c in classes: query.format(c, i, i)##
results = pd.DataFrame() for input, query in queries.items():
res = my_db.execute(query)
results = results.append(pd.DataFrame(res))
each results as sth like ;df_class1, df_class2 df_class3, df_class4
You can use formatted string to save the resultant dataframe for each iteration.
inputs = list(range(0, 100,10))
classes = [1,2,3,4]
for i in inputs:
for c in classes:
query.format(c, i, i)
res = my_db.execute(query)
df = pd.DataFrame(res)
df.to_csv(f'result_{i}_{c}.csv')
Related
I work with Rolls of plastic film in different legnth and width. And I'm creating a Database to store all the orders, and, in order to avoid repetition, I created separate tables for length(class(Comprimento)) and width(class(Largura)). I used UUID to create distinct ID's.
Now, I want to cross both tables in a Model class. Which is:
class Largura(Base):
__tablename__ = 'largura'
id = Column(GUID(), primary_key=True, default=lambda: str(uuid.uuid4()))
largura = Column(String)
modelos_l = relationship('Modelo', back_populates='larguras', cascade='all, delete')
def __repr__(self):
return f"<Largura {self.largura}>"
class Comprimento(Base):
__tablename__ = 'comprimento'
id = Column(GUID(), primary_key=True, default=lambda: str(uuid.uuid4()))
comprimento = Column(String)
modelos_c = relationship('Modelo', back_populates='comprimentos', cascade='all, delete')
def __repr__(self):
return f"<Comprimento {self.comprimento}>"
class Modelo(Base):
__tablename__ = 'modelo'
id = Column(GUID(), primary_key=True, default=lambda: str(uuid.uuid4()))
descricao = Column(String(50))
largura_id = Column(GUID(), ForeignKey("largura.id"), default=lambda: str(uuid.uuid4()))
comprimento_id = Column(GUID(), ForeignKey("comprimento.id"), default=lambda: str(uuid.uuid4()))
larguras = relationship('Largura', back_populates='modelos_l')
comprimentos = relationship('Comprimento', back_populates='modelos_c')
def __repr__(self):
return f"<Modelo {self.id}>"
Then, i created a file dedicated to my data insert on this table:
from DBModelPy3 import Comprimento,Largura,Modelo,session
from sqlalchemy import create_engine
import pandas as pd
#Pre Loading my CSV file
df = pd.read_csv("dataorged.csv", sep=',')
pd.set_option('display.float_format','{:.0f}'.format) #change the number format to hide the ','
cnx = create_engine('sqlite:///data_hub2.db', echo=True).connect()
df_modelo = df[['larg_ajustada', 'comp']] # My dataframe that contains the orders. I chose the specifics columns needed for this insertion.
#print(df_modelo)
# Loading the Tables from my database
df_largura = pd.read_sql_table('largura', cnx)
df_comprimento = pd.read_sql_table('comprimento', cnx)
With everything loaded I decided to combine all the legnths and widths i had already on my two tables (df_largura and df_comprimento), and then filtered using the original file which contains the orders.
# COMBINING ALL THE LENGTH AND WIDTH OF MY TABLES
model_num = []
for n_larg in range(len(df_largura)):
db_larg = str(df_largura['largura'][n_larg])
for n_comp in range(len(df_comprimento)):
db_comp = df_comprimento['comprimento'][n_comp]
combined = str(db_larg) + "x" + str(db_comp)
model_num.append([db_larg,db_comp,combined])
df_modelos_ex = pd.DataFrame(model_num)
df_modelos_ex.columns = ['larg','comp','combined']
With these, i had all possible combinations on my dataframe.
And created the combined variable to match later
modelos_existentes = []
# COMBINATIONS THAT APPEAR IN THE ORDER DATAFRAME #
for item in range(len(df_modelo)):
mod_larg = df_modelo['larg_ajustada'][item]
mod_comp = df_modelo['comp'][item]
mod_comb = str(mod_larg) + "x" + str(mod_comp)
modelos_existentes.append([mod_larg,mod_comp,mod_comb])
df_mod_existentes = pd.DataFrame(modelos_existentes)
df_mod_existentes.columns = ['ex_larg','ex_comp','ex_comb']
df_limpo = df_mod_existentes.drop_duplicates(subset=['ex_comb'])
df_limpo.reset_index(drop=True, inplace=True)
With all my elements, then the madness began.
I started a loop to run through all my Dataframes:
for l_row in range(len(df_limpo)): # For Each Row in my dataframe which contains the orders,
larg = df_limpo['ex_larg'][l_row] # create variable for width
comp = df_limpo['ex_comp'][l_row] # create variable for lenght
comb = df_limpo['ex_comb'][l_row] # create variable for combination of both
for n_row in range(len(df_largura)): # For each row in my width table from DB,
db_larg_id = df_largura['id'][n_row] # I create a Variable for the PK from width
db_larg_largura = df_largura['largura'][n_row] # Create a Variable with the value
lar = session.query(Largura).filter(Largura.id == db_larg_id).first()
if db_larg_largura == larg: # If the value on my table matches the value of the row in the order,
for m_row in range(len(df_comprimento)): # For each length in my table on the DB,
db_comp_id = df_comprimento['id'][m_row]
db_comp_comprimento = df_comprimento['comprimento'][m_row]
compr = session.query(Comprimento).filter(Comprimento.id == db_comp_id).first()
if db_comp_comprimento == comp: # If the value on my table matches the value of the row in the order
new_model = Modelo(descricao=df_limpo['ex_comb'][n_linha], larguras=lar, comprimentos=compr)
from here, i would only add the session.add(new_model) and session.commit() to finish my code.
But it's not adding.
What I would like is for my Modelo table be like:
MODELO Table
ID(PK) | DESCRIPTION (Combined values String) | Largura_id (width_id, FK) | Comprimento_id (length_id, FK)
Sorry about the long explanation. Tried my best!
If anyone have the same trouble:
##########################
# ADDING TO THE DATABANK #
##########################
lista_a = [] #Created an empty List
for n_linha in range(len(df_limpo)): #Ran through my dataframe
larg_a = df_limpo['ex_larg'][n_linha] #Extracted width and length from it
comp_a = df_limpo['ex_comp'][n_linha]
for m_linha in range(len(df_largura)): #Ran my width table from database
db_larg_id = df_largura['id'][m_linha]
db_larg_largura = df_largura['largura'][m_linha]
if larg_a == db_larg_largura: #Checked if the width from my dataframe matches the one on the table
lista_a.append([larg_a,comp_a,db_larg_id]) #appended to the list_a
#print(lista_a)
df_lista_a = pd.DataFrame(lista_a) #Created a new Dataframe
df_lista_a.columns = ['larg','comp','id_larg']
lista_b = [] #Created a new list
for n_row in range(len(df_lista_a)): #Ran through my new dataframe
larg_b = df_lista_a['larg'][n_row] #Extracted each column from it
comp_b = df_lista_a['comp'][n_row]
larg_b_id = df_lista_a['id_larg'][n_row]
#df_limpo_lrow = df_limpo['ex_larg'][n_row]
#df_limpo_crow = df_limpo['ex_comp'][n_row]
#df_limpo_cbrow = df_limpo['ex_comb'][n_row]
#print(larg_b,comp_b,larg_b_id,n_row)
for m_row in range(len(df_comprimento)): #Ran through my lenght table
db_comp_id = df_comprimento['id'][m_row]
db_comp_comprimento = df_comprimento['comprimento'][m_row]
if comp_b == db_comp_comprimento: #Check if the lenght from dataframe matches the lenght on my table on the database
#print(larg_b,comp_b,n_row,m_row,df_limpo_lrow)
lista_b.append([larg_b,comp_b,larg_b_id,db_comp_id]) #appended the lenght id to my list
break
#print(lista_b)
#print(len(df_lista_a),len(df_limpo),len(lista_b))
df_lista_b = pd.DataFrame(lista_b) #converted to Dataframe.
df_lista_b.columns = ['larg','comp','id_larg','id_comp']
# HERE's the ACTUAL INSERTION
for n_model in range(len(df_lista_b)): #For each model found on the list, extract the values and add to new_model.
mod_largura = df_lista_b['larg'][n_model]
mod_comprimento = df_lista_b['comp'][n_model]
mod_largura_id = df_lista_b['id_larg'][n_model]
mod_comprimento_id = df_lista_b['id_comp'][n_model]
lar = session.query(Largura).filter(Largura.id == df_largura['id'][1]).first()
compr = session.query(Comprimento).filter(Comprimento.id == df_comprimento['id'][1]).first()
new_model = Modelo(descricao=df_limpo['ex_comb'][n_model], larguras=lar, comprimentos=compr)
print("Modelo: " + df_limpo['ex_comb'][n_model] + " com Id's " + mod_largura_id + " e " + mod_comprimento_id + " adicionados!")
session.add(new_model)
session.commit()
Then it's done.
I am trying to scrape the "PRINCIPAL STOCKHOLDERS" table from the linktext fileand convert it to a csv file. Right now I am only half successful. Namely, I can locate the table and parse it but somehow I cannot convert the text table to a standard one. My code is attached. Can someone help me with it?
url = r'https://www.sec.gov/Archives/edgar/data/1034239/0000950124-97-003372.txt'
# Different approach, the first approach does not work
filing_url = requests.get(url)
content = filing_url.text
splited_data = content.split('\n')
table_title = 'PRINCIPAL STOCKHOLDERS'
END_TABLE_LINE = '- ------------------------'
def find_no_line_start_table(table_title,splited_data):
found_no_lines = []
for index, line in enumerate(splited_data):
if table_title in line:
found_no_lines.append(index)
return found_no_lines
table_start = find_no_line_start_table(table_title,splited_data)
# I need help with locating the table. If I locate the table use the above function, it will return two locations and I have to manually choose the correct one.
table_start = table_start[1]
def get_start_data_table(table_start, splited_data):
for index, row in enumerate(splited_data[table_start:]):
if '<C>' in row:
return table_start + index
def get_end_table(start_table_data, splited_data ):
for index, row in enumerate(splited_data[start_table_data:]):
if END_TABLE_LINE in row:
return start_table_data + index
def row(l):
l = l.split()
number_columns = 8
if len(l) >= number_columns:
data_row = [''] * number_columns
first_column_done = False
index = 0
for w in l:
if not first_column_done:
data_row[0] = ' '.join([data_row[0], w])
if ':' in w:
first_column_done = True
else:
index += 1
data_row[index] = w
return data_row
start_line = get_start_data_table(table_start, splited_data)
end_line = get_end_table(start_line, splited_data)
table = splited_data[start_line : end_line]
# I also need help with convert the text table to a CSV file, somehow the following function does not #recognize my column.
def take_table(table):
owner = []
Num_share = []
middle = []
middle_1 = []
middle_2 = []
middle_3 = []
prior_offering = []
after_offering = []
for r in table:
data_row = row(r)
if data_row:
col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8 = data_row
owner.append(col_1)
Num_share.append(col_2)
middle.append(col_3)
middle_1.append(col_4)
middle_2.append(col_5)
middle_3.append(col_6)
prior_offering.append(col_7)
after_offering.append(col_8)
table_data = {'owner': owner, 'Num_share': Num_share, 'middle': middle, 'middle_1': middle_1,
'middle_2': middle_2, 'middle_3': middle_3, 'prior_offering': prior_offering,
'after_offering': after_offering}
return table_data
#print (table)
dict_table = take_table(table)
a = pd.DataFrame(dict_table)
a.to_csv('trail.csv')
I think what you need to do is
pd.DataFrame.from_dict(dict_table)
instead of
pd.DataFrame(dict_table)
I am trying to return an array of constructed objects that are build on top of objects that I retrieve from some url plus another fields that I get from another url.
I have an array that consists of two arrays that each has about 8000 objects...
I have tried to make each object construction as a thread however it still takes a lot of time...
Any solution? Here is my code:
def get_all_players_full_data(ea_players_json):
all = []
ea_players_json = list(ea_players_json.values())
for i in range(len(ea_players_json)):
for player_obj in ea_players_json[i]:
all.append(player_obj)
for player_obj in range(len(all)):
all_data = []
with concurrent.futures.ThreadPoolExecutor(len(all)) as executor:
for player_data in all:
future = executor.submit(build_full_player_data_obj, player_data)
print(future.result())
all_data.append(future.result())
def build_full_player_data_obj(ea_player_data):
if ea_player_data.get("c") is not None:
player_full_name = ea_player_data.get("c")
else:
player_full_name = ea_player_data.get("f") + " " + ea_player_data.get("l")
player_id = ea_player_data.get("id")
# go to futhead to find all cards of that player
futhead_url_player_data = f'{FUTHEAD_PLAYER}{player_full_name}'
details_of_specific_player = json.loads(requests.get(futhead_url_player_data).content)
cards_from_the_same_id = []
for player_in_json_futhead in details_of_specific_player:
if player_in_json_futhead["player_id"] == player_id:
rating = player_in_json_futhead["rating"]
specific_card_id = player_in_json_futhead["def_id"]
revision = player_in_json_futhead["revision_type"]
name = player_in_json_futhead["full_name"]
nation = player_in_json_futhead["nation_name"]
position = player_in_json_futhead["position"]
club = player_in_json_futhead["club_name"]
cards_from_the_same_id.append(Player(specific_card_id, name, rating, revision, nation,
position, club))
return cards_from_the_same_id
I am trying to update a rating row by row. I have one dataframe of players, that all start with the same rating. For each match, I want the rating to change. Another dataframe contains results of each match.
import pandas as pd
gamesdata = [['paul','tom'],['paul','lisa'],['tom','paul'],['lisa','tom'],['paul','lisa'],['lisa','tom'],['paul','tom']]
games = pd.DataFrame(gamesdata, columns = ['Winner', 'Looser'])
playersdata= ['lisa','paul','tom']
players = pd.DataFrame(playersdata, columns = ['Name'])
mean_elo = 1000
elo_width = 400
k_factor = 64
players['elo'] = mean_elo
def update_elo(winner_elo, loser_elo):
expected_win = expected_result(winner_elo, loser_elo)
change_in_elo = k_factor * (1-expected_win)
winner_elo += change_in_elo
loser_elo -= change_in_elo
return winner_elo, loser_elo
def expected_result(elo_a, elo_b):
expect_a = 1.0/(1+10**((elo_b - elo_a)/elo_width))
return expect_a
for index, row in games.iterrows():
winnername = row['Winner']
losername = row['Looser']
web = players['elo'].loc[players['Name'] == winnername].values[0]
wIndex = players.loc[players['Name'] == winnername]
#I want to return just the index, so I can update the value
print(wIndex)
leb = players['elo'].loc[players['Name'] == losername].values[0]
print('Winner Elo before: ' + str(web))
winner_elo, looser_elo = update_elo(web, leb)
print('Winner Elo after: ' + str(winner_elo))
#here I want to update value
#players.at[wIndex,'elo']=winner_elo
I am trying to update the value in the players table using
players.at[wIndex,'elo']=winner_elo
but i struggle to get the index with this code:
wIndex = players.loc[players['Name'] == winnername]
Found a sollution:
wIndex = players.loc[players['Name'] == winnername].index.values
Can't believe i missed that
Hi I m wanting to convert the contents of a file (in this case a Landsat 7 metadata file) into a series of variables defined by the contents of the file using Python 2.7. The file contents looks like this:
GROUP = L1_METADATA_FILE
GROUP = METADATA_FILE_INFO
ORIGIN = "Image courtesy of the U.S. Geological Survey"
REQUEST_ID = "0101305309253_00043"
LANDSAT_SCENE_ID = "LE71460402010069SGS00"
FILE_DATE = 2013-06-02T11:19:59Z
STATION_ID = "SGS"
PROCESSING_SOFTWARE_VERSION = "LPGS_12.2.1"
DATA_CATEGORY = "NOMINAL"
END_GROUP = METADATA_FILE_INFO
GROUP = PRODUCT_METADATA
DATA_TYPE = "L1T"
ELEVATION_SOURCE = "GLS2000"
OUTPUT_FORMAT = "GEOTIFF"
EPHEMERIS_TYPE = "DEFINITIVE"
SPACECRAFT_ID = "LANDSAT_7"
SENSOR_ID = "ETM"
SENSOR_MODE = "BUMPER"
WRS_PATH = 146
WRS_ROW = 040
DATE_ACQUIRED = 2010-03-10
GROUP = IMAGE_ATTRIBUTES
CLOUD_COVER = 0.00
IMAGE_QUALITY = 9
SUN_AZIMUTH = 137.38394502
SUN_ELEVATION = 48.01114126
GROUND_CONTROL_POINTS_MODEL = 55
GEOMETRIC_RMSE_MODEL = 3.790
GEOMETRIC_RMSE_MODEL_Y = 2.776
GEOMETRIC_RMSE_MODEL_X = 2.580
END_GROUP = IMAGE_ATTRIBUTES
Example of interested variable items:
GROUP = MIN_MAX_RADIANCE
RADIANCE_MAXIMUM_BAND_1 = 293.700
RADIANCE_MINIMUM_BAND_1 = -6.200
RADIANCE_MAXIMUM_BAND_2 = 300.900
RADIANCE_MINIMUM_BAND_2 = -6.400
RADIANCE_MAXIMUM_BAND_3 = 234.400
RADIANCE_MINIMUM_BAND_3 = -5.000
RADIANCE_MAXIMUM_BAND_4 = 241.100
RADIANCE_MINIMUM_BAND_4 = -5.100
RADIANCE_MAXIMUM_BAND_5 = 47.570
RADIANCE_MINIMUM_BAND_5 = -1.000
RADIANCE_MAXIMUM_BAND_6_VCID_1 = 17.040
RADIANCE_MINIMUM_BAND_6_VCID_1 = 0.000
RADIANCE_MAXIMUM_BAND_6_VCID_2 = 12.650
RADIANCE_MINIMUM_BAND_6_VCID_2 = 3.200
RADIANCE_MAXIMUM_BAND_7 = 16.540
RADIANCE_MINIMUM_BAND_7 = -0.350
RADIANCE_MAXIMUM_BAND_8 = 243.100
RADIANCE_MINIMUM_BAND_8 = -4.700
END_GROUP = MIN_MAX_RADIANCE
I am open to other ideas as I don't need all entries as variables, just a selection. And I see some headers are listed more than once. i.e. GROUP is used multiple times. I need to be able to select certain variables (integer values) and use in formulas in other areas of code. ANY help would be appreciated (novice python coder).
I'm not sure exactly what you are looking for, but maybe something like this:
s = '''GROUP = L1_METADATA_FILE
GROUP = METADATA_FILE_INFO
ORIGIN = "Image courtesy of the U.S. Geological Survey"
REQUEST_ID = "0101305309253_00043"
LANDSAT_SCENE_ID = "LE71460402010069SGS00"
FILE_DATE = 2013-06-02T11:19:59Z
STATION_ID = "SGS"
PROCESSING_SOFTWARE_VERSION = "LPGS_12.2.1"
DATA_CATEGORY = "NOMINAL"
END_GROUP = METADATA_FILE_INFO
GROUP = PRODUCT_METADATA
DATA_TYPE = "L1T"
ELEVATION_SOURCE = "GLS2000"
OUTPUT_FORMAT = "GEOTIFF"
EPHEMERIS_TYPE = "DEFINITIVE"
SPACECRAFT_ID = "LANDSAT_7"
SENSOR_ID = "ETM"
SENSOR_MODE = "BUMPER"
WRS_PATH = 146
WRS_ROW = 040
DATE_ACQUIRED = 2010-03-10'''
output = {} #Dict
for line in s.split("\n"): #Iterates through every line in the string
l = line.split("=") #Seperate by "=" and put into a list
output[l[0].strip()] = l[1].strip() #First word is key, second word is value
print output #Output is a dictonary containing all key-value pairs in your metadata seperated by "="
print output["SENSOR_ID"] #Outputs "ETM"
==============
Edited:
f = open('metadata.txt', 'r') #open file for reading
def build_data(f): #build dictionary
output = {} #Dict
for line in f.readlines(): #Iterates through every line in the string
if "=" in line: #make sure line has data as wanted
l = line.split("=") #Seperate by "=" and put into a list
output[l[0].strip()] = l[1].strip() #First word is key, second word is value
return output #Returns a dictionary with the key, value pairs.
data = build_data(f)
print data["IMAGE_QUALITY"] #prints 9