excel if and logic to data frame - python

I have aloe of excel files I am trying to convert to python codes and need some help :)
I have a data frame like this:
Date STD-3 STD-25 STD-2 STD-15 STD-1 Data STD1 STD15 STD2 STD25 STD3
11.05.2022 -0,057406797 -0,047838998 -0,038271198 -0,028703399 -0,019135599 0,021233631 0,019135599 0,028703399 0,038271198 0,047838998 0,057406797
I need to check for this logic:
"Data" < "STD1" and "Data" > "STD-1" = 0
"Data" > "STD1" and "Data" < "STD15" = 1
"Data" > "STD15" and "Data" < "STD2" = 1,5
"Data" > "STD2" and "Data" < "STD25" = 2
"Data" > "STD25" and "Data" < "STD3" = 2,5
"Data" > "STD3" = 3
"Data" < "STD-1" and "Data" > "STD-15" = -1
"Data" < "STD-15" and "Data" > "STD-2" = -1,5
"Data" < "STD-2" and "Data" > "STD-25" = -2
"Data" < "STD-25" and "Data" > "STD-3" = -2,5
"Data" > "STD3" = -3
And add the output to a new column.

condition = [((df['DATA'] < df['STD1']) & (df['DATA'] > df['STD-1'])), ((df['DATA'] > df['STD1']) & (df['DATA'] < df['STD15'])), ((df['DATA'] > df['STD15']) & (df['DATA'] < df['STD2'])), ((df['DATA'] > df['STD2']) & (df['DATA'] < df['STD25'])), ((df['DATA'] > df['STD25']) & (df['DATA'] < df['STD3'])), df['DATA'] > df['STD3'], ((df['DATA'] < df['STD-1']) & (df['DATA'] > df['STD-15'])), ((df['DATA'] < df['STD-15']) & (df['DATA'] > df['STD-2'])), ((df['DATA'] < df['STD-25']) & (df['DATA'] > df['STD-3'])), df['DATA'] > df['STD-3']]
result = [0, 1, 1.5, 2, 2.5, 3, -1, -1.5, -2.5, -3]
df['RESULT'] = np.select(condition, result, None)

Related

Build update statement in python using results from previous query

I have a task in which I must update a database on another server. As my options are limited I'm using python to do the update.
However I have this error:
pyodbc.ProgrammingError: ('42000', "[42000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Incorrect syntax near ')'. (102) (SQLExecDirectW)")
My code is this:
first I create a select and then use it in the update
> query_dwt = "SELECT [cdcliente]\
> ,[nmcontato]\
> ,[cddepartamento]\
> ,[nmcargo] \
> ,[dsemail]\
> ,[cdlingua]\
> ,[nrcpfcnpj]\
> ,[cdcargo]\
> ,[cdcontatosuperior]\
> ,[idativo]\
> ,[cdcidade]\
> ,[dsendereco]\
> ,[dscomplemento]\
> ,[nmbairro]\
> ,[nrcep]\
> ,[nrcelular]\
> ,[dtnascimento]\
> ,[idbloqueado]\
> ,[cdlocalidade]\
> ,[nrmatricula]\
> ,[nmskin]\
> FROM [dw].[d_Qualitor_ad_contato_RH] WITH (NOLOCK)\
> WHERE cdcliente = 9402\
> AND (cdcontato = 38584 OR cdcontato = 22320 OR cdcontato = 37284);"
Second I use the select created to bring the information from the table to update the desired table
> query_qltr = """UPDATE ad\
> SET\
> ad.nmcontato = PR.nmcontato\
> ,ad.cddepartamento = PR.cddepartamento\
> ,ad.nmcargo = PR.nmcargo\
> ,ad.dsemail = PR.dsemail\
> ,ad.cdlingua = PR.cdlingua\
> ,ad.nrcpfcnpj = PR.nrcpfcnpj\
> ,ad.cdcargo = PR.cdcargo\
> ,ad.cdcontatosuperior = PR.cdcontatosuperior\
> ,ad.idativo = PR.idativo\
> ,ad.cdcidade = PR.cdcidade\
> ,ad.dsendereco = PR.dsendereco\
> ,ad.dscomplemento = PR.dscomplemento\
> ,ad.nmbairro = PR.nmbairro\
> ,ad.nrcep = PR.nrcep\
> ,ad.nrcelular = PR.nrcelular\
> ,ad.dtnascimento = PR.dtnascimento\
> ,ad.idbloqueado = PR.idbloqueado\
> ,ad.cdlocalidade = PR.cdlocalidade\
> ,ad.nrmatricula = PR.nrmatricula\
> ,ad.nmskin = PR.nmskin\
> FROM dbo.ad_contato ad\
> INNER JOIN ({}) PR\
> ON ad.cdcontato = PR.cdcontato\
> AND ad.cdcliente LIKE '9402';""".format(OpenSqlDatabaseConnection.execute_query(query_dwt,'target-db-conn-str'))
>
> OpenSqlDatabaseConnection.execute_query(query_qltr,'rdn-db-clt-sql-06a-inssql01-qualitor-prd-jdbc-conn-string-01')
I'm sure it's something simple but I can't figure it out.
Solution:
1-Extract the data in the first database and insert it into a dataframe.
def select_dw_qualitor_ad_contato():
query_dwt = "SELECT [cdcliente]\
,[cdcontato]\
,[nmcontato]\
,[cddepartamento]\
,[nmcargo] \
,[dsemail]\
,[cdlingua]\
,[nrcpfcnpj]\
,[cdcargo]\
,[cdcontatosuperior]\
,[idativo]\
,[cdcidade]\
,[dsendereco]\
,[dscomplemento]\
,[nmbairro]\
,[nrcep]\
,[nrcelular]\
,[dtnascimento]\
,[idbloqueado]\
,[cdlocalidade]\
,[nrmatricula]\
,[nmskin]\
FROM [dw].[d_Qualitor_ad_contato_RH] WITH (NOLOCK)\
WHERE cdcliente = 9402\
AND (cdcontato = 38584\
OR cdcontato = 22320\
OR cdcontato = 37284\
OR cdcontato = 36139\
OR cdcontato = 41035\
OR cdcontato = 38819);"
return pd.read_sql(query_dwt,OpenSqlDatabaseConnection.connection('target-db-conn-str'),
parse_dates={"date_column": {"errors": "ignore"}})
2- Update second query row by row with dataframe
def update_qualitor_table():
dfdw = QueriesLists.select_dw_qualitor_ad_contato()
end = len(dfdw)
for i,line in enumerate(dfdw):
if i < end:
df = dfdw.iloc[i]
QueriesLists.update_database(df)
else:
break
3- Query SQL using update command using dataframe
def update_database(df):
query_qltr = "UPDATE [dbo].[ad_contato]\
SET [nmcontato] = CAST('{0}' AS VARCHAR(200))\
,[cddepartamento] = CAST('{1}' AS INT)\
,[nmcargo] = CAST('{2}' AS VARCHAR (50))\
,[dsemail] = CAST('{3}' AS VARCHAR(200))\
,[cdlingua] = CAST('{4}' AS INT)\
,[nrcpfcnpj] = CAST('{5}' AS VARCHAR(20))\
,[cdcargo] = CAST('{6}' AS INT)\
,[cdcontatosuperior] = CAST('{7}' AS INT)\
,[idativo] = CAST('{8}' AS VARCHAR(1))\
,[dsendereco] = CAST('{9}' AS VARCHAR(200))\
,[dscomplemento] = CAST('{10}' AS VARCHAR(200))\
,[nmbairro] = CAST('{11}' AS VARCHAR(40))\
,[nrcep] = CAST('{12}' AS VARCHAR(9))\
,[dtnascimento] = CAST('{13}' AS DATETIME) \
,[idbloqueado] = CAST('{14}' AS VARCHAR(1))\
,[cdlocalidade] = CAST('{15}' AS INT)\
,[nrmatricula] = CAST('{16}' AS VARCHAR(20))\
WHERE [cdcontato] = CAST('{17}' AS INT)\
AND [cdcliente] = 9402;\
".format(str(df[2]) #nmcontato
,int(df[3]) #cddepartamento
,str(df[4]) #nmcargo
,str(df[5]) #dsemai
,int(df[6]) #cdlingua
,str(df[7]) #nrcpfcnpj
,int(df[8]) #cdcargo
,int(df[9]) #cdcontasuperior
,str(df[10]) #idativo
,str(df[12]) #dsendereco
,str(df[13]) #dscomplemento
,str(df[14]) #nmbairro
,str(df[15]) #nrcep
,pd.to_datetime(df[17]) #datetime
,str(df[18]) #idbloqueado
,int(df[19]) #cdlocalidade
,str(df[20]) #nrmatricula
,int(df[1])
)
OpenSqlDatabaseConnection.execute_query(query_qltr,'rdn-db-clt-sql-06a-inssql01-qualitor-prd-jdbc-conn-string-01')

Create new column based on condtions of others

I have this df:
Segnale Prezzo Prezzo_exit
0 Long 44645 43302
1 Short 41169 44169
2 Long 44322 47093
3 Short 45323 42514
sample code to generate it:
tbl2 = {
"Segnale" : ["Long", "Short", "Long", "Short"],
"Prezzo" : [44645, 41169, 44322, 45323],
"Prezzo_exit" : [43302, 44169, 47093, 42514]}
df = pd.DataFrame(tbl2)
I need to create a new column named "esito" with this conditions:
if df["Segnale"] =="Long" and df["Prezzo"] < df["Prezzo_exit"] #row with "target"
if df["Segnale"] =="Long" and df["Prezzo"] > df["Prezzo_exit"] #row with "stop"
if df["Segnale"] =="Short" and df["Prezzo"] < df["Prezzo_exit"] #row with "stop"
if df["Segnale"] =="Short" and df["Prezzo"] > df["Prezzo_exit"] #row with "target"
So the final result will be:
Segnale Prezzo Prezzo_exit esito
0 Long 44645 43302 stop
1 Short 41169 44169 stop
2 Long 44322 47093 target
3 Short 45323 42514 target
I tried with no success:
df.loc[(df['Segnale'].str.contains('Long') & df['Prezzo'] <
df['Prezzo_exit']), 'Esito'] = 'Target'
df.loc[(df['Segnale'].str.contains('Long') & df['Prezzo'] > df['Prezzo_exit']), 'Esito'] =
'Stop'
df.loc[(df['Segnale'].str.contains('Short') & df['Prezzo'] > df['Prezzo_exit']), 'Esito'] =
'Target'
df.loc[(df['Segnale'].str.contains('Short') & df['Prezzo'] > df['Prezzo_exit']), 'Esito'] =
'Stop'
This will do what your question asks:
df.loc[(df.Segnale=='Long') & (df.Prezzo < df.Prezzo_exit), 'esito'] = 'target'
df.loc[(df.Segnale=='Long') & (df.Prezzo > df.Prezzo_exit), 'esito'] = 'stop'
df.loc[(df.Segnale=='Short') & (df.Prezzo < df.Prezzo_exit), 'esito'] = 'stop'
df.loc[(df.Segnale=='Short') & (df.Prezzo > df.Prezzo_exit), 'esito'] = 'target'
Output:
Segnale Prezzo Prezzo_exit esito
0 Long 44645 43302 stop
1 Short 41169 44169 stop
2 Long 44322 47093 target
3 Short 45323 42514 target
UPDATE:
You could also do this:
df['esito'] = ( pd.Series(['stop']*len(df)).where(
((df.Segnale=='Long') & (df.Prezzo > df.Prezzo_exit)) | ((df.Segnale=='Short') & (df.Prezzo < df.Prezzo_exit)),
'target') )
... or this:
df['esito'] = ( np.where(
((df.Segnale=='Long') & (df.Prezzo > df.Prezzo_exit)) | ((df.Segnale=='Short') & (df.Prezzo < df.Prezzo_exit)),
'stop', 'target') )
You need add parentheses to following comparison
(df['Prezzo'] < df['Prezzo_exit'])
For simplification, you can use np.select to select condition and choice in one statement.

Inverse line graph year count matplotlib pandas python

I'm trying to create a lineplot of the count of three different groups i.e. desktop, mobile & tablet with the x axis having the years of 2014, 2015 and 2016 but I am getting the error
my code is currently:
#year-by-year change
desktop14 = od.loc[(od.Account_Year_Week >= 201401) & (od.Account_Year_Week <= 201453) & (od.online_device_type_detail == "DESKTOP"), "Gross_Demand_Pre_Credit"]
desktop15 = od.loc[(od.Account_Year_Week >= 201501) & (od.Account_Year_Week <= 201553) & (od.online_device_type_detail == "DESKTOP"), "Gross_Demand_Pre_Credit"]
desktop16 = od.loc[(od.Account_Year_Week >= 201601) & (od.Account_Year_Week <= 201653) & (od.online_device_type_detail == "DESKTOP"), "Gross_Demand_Pre_Credit"]
mobile14 = od.loc[(od.Account_Year_Week >= 201401) & (od.Account_Year_Week <= 201453) & (od.online_device_type_detail == "MOBILE"), "Gross_Demand_Pre_Credit"]
mobile15 = od.loc[(od.Account_Year_Week >= 201501) & (od.Account_Year_Week <= 201553) & (od.online_device_type_detail == "MOBILE"), "Gross_Demand_Pre_Credit"]
mobile16 = od.loc[(od.Account_Year_Week >= 201601) & (od.Account_Year_Week <= 201653) & (od.online_device_type_detail == "MOBILE"), "Gross_Demand_Pre_Credit"]
tablet14 = od.loc[(od.Account_Year_Week >= 201401) & (od.Account_Year_Week <= 201453) & (od.online_device_type_detail == "TABLET"), "Gross_Demand_Pre_Credit"]
tablet15 = od.loc[(od.Account_Year_Week >= 201501) & (od.Account_Year_Week <= 201553) & (od.online_device_type_detail == "TABLET"), "Gross_Demand_Pre_Credit"]
tablet16 = od.loc[(od.Account_Year_Week >= 201601) & (od.Account_Year_Week <= 201653) & (od.online_device_type_detail == "TABLET"), "Gross_Demand_Pre_Credit"]
devicedata = [["Desktop", desktop14.count(), desktop15.count(), desktop16.count()], ["Mobile", mobile14.count(), mobile15.count(), mobile16.count()], ["Tablet", tablet14.count(), tablet15.count(), tablet16.count()]]
df = pd.DataFrame(devicedata, columns=["Device", "2014", "2015", "2016"]).set_index("Device")
plt.show()
I want to make each of the lines the Device types and the x axis showing the change in year. How do I do this - (essentially reversing the axis).
any help is greatly appreciated
Just do
df.transpose().plot()
Result will be something like this:

numpy.where makes code slow

I have the following block of code:
def hasCleavage(tags, pair, fragsize):
limit = int(fragsize["mean"] + fragsize["sd"] * 4)
if pair.direction == "F1R2" or pair.direction == "R2F1":
x1 = np.where((tags[pair.chr_r1] >= pair.r1["pos"]) & (tags[pair.chr_r1] <= pair.r1["pos"]+limit))[0]
x2 = np.where((tags[pair.chr_r2] <= pair.r2["pos"]+pair.frside) & (tags[pair.chr_r2] >= pair.r2["pos"]+pair.frside-limit))[0]
elif pair.direction == "F1F2" or pair.direction == "F2F1":
x1 = np.where((tags[pair.chr_r1] >= pair.r1["pos"]) & (tags[pair.chr_r1] <= pair.r1["pos"]+limit))[0]
x2 = np.where((tags[pair.chr_r2] >= pair.r2["pos"]) & (tags[pair.chr_r2] <= pair.r2["pos"]+limit))[0]
elif pair.direction == "R1R2" or pair.direction == "R2R1":
x1 = np.where((tags[pair.chr_r1] <= pair.r1["pos"]+pair.frside) & (tags[pair.chr_r1] >= pair.r1["pos"]+pair.frside-limit))[0]
x2 = np.where((tags[pair.chr_r2] <= pair.r2["pos"]+pair.frside) & (tags[pair.chr_r2] >= pair.r2["pos"]+pair.frside-limit))[0]
else: #F2R1 or R1F2
x1 = np.where((tags[pair.chr_r2] >= pair.r2["pos"]) & (tags[pair.chr_r2] <= pair.r2["pos"]+limit))[0]
x2 = np.where((tags[pair.chr_r1] <= pair.r1["pos"]+pair.frside) & (tags[pair.chr_r1] >= pair.r1["pos"]+pair.frside-limit))[0]
if x1.size > 0 and x2.size > 0:
return True
else:
return False
My script takes 16 minutes to finish. It calls hasCleavage millions of times, one time per row reading a file. When I add above the variable limit a return True (preventing calling np.where), the script takes 5 minutes.
tags is a dictionary containing numpy arrays with ascending numbers.
Do you have any suggestions to improve performance?
EDIT:
tags = {'JH584302.1': array([ 351, 1408, 2185, 2378, 2740, 2904, 3364, 3657,
4240, 5324, 5966, 5977, 5986, 6488, 6531, 6847,
6961, 6973, 6991, 7107, 7383, 7395, 7557, 7569,
9178, 10077, 10456, 10471, 11271, 11466, 12311, 12441,
12598, 13051, 13123, 13859, 14167, 14672, 15156, 15252,
15268, 15273, 15694, 15786, 16361, 17073, 17293, 17454])
}
fragsize = {'sd': 130.29407997430428, 'mean': 247.56636}
And pair is an object of a custom class
<__main__.Pair object at 0x17129ad0>

How can I migrate flask-sqlalchemy multiple databases?

I know multiple databases work on flask-sqlalchemy with __bind_key__.
But I don't know how to migrate those databases on alembic(flask-migrate).
Here's env.py:
from flask import current_app
config.set_main_option('sqlalchemy.url', current_app.config.get('SQLALCHEMY_BINDS')['bind_main'])
target_metadata = {
'bind_main': current_app.extensions['migrate'].db.metadata,
'bind_follower': current_app.extensions['migrate'].db.metadata,
}
How can I set follower db on target_metadata? flask-migrate doesn't care about bind databases.
Thanks.
To create a multiple database migration repository, add the --multidb argument to the init command:
$ python app.py db init --multidb
For more details please refer to flask-migrate documentation
It may be easier to scrap the old "migrations" folder and initialize the database again with the new setup, applying any customizations to the fresh directory. Most likely this would include modifying env.py or script.py.mako.
Diff init vs init --multidb
I ran a diff of the results for flask db init (i.e. single.migrations) vs flask db init --multidb (i.e. multi.migrations):
diff --suppress-common-lines single.migrations/README multi.migrations/README
1c1
< Single-database configuration for Flask.
---
> Multi-database configuration for Flask.
diff --suppress-common-lines single.migrations/env.py multi.migrations/env.py
5a6
> from sqlalchemy import MetaData
9a11,12
> USE_TWOPHASE = False
>
26a30,42
> bind_names = []
> if current_app.config.get('SQLALCHEMY_BINDS') is not None:
> bind_names = list(current_app.config['SQLALCHEMY_BINDS'].keys())
> else:
> get_bind_names = getattr(current_app.extensions['migrate'].db,
> 'bind_names', None)
> if get_bind_names:
> bind_names = get_bind_names()
> for bind in bind_names:
> context.config.set_section_option(
> bind, "sqlalchemy.url",
> str(current_app.extensions['migrate'].db.get_engine(
> bind=bind).url).replace('%', '%%'))
28a45
>
34a52,62
> def get_metadata(bind):
> """Return the metadata for a bind."""
> if bind == '':
> bind = None
> m = MetaData()
> for t in target_metadata.tables.values():
> if t.info.get('bind_key') == bind:
> t.tometadata(m)
> return m
>
>
47,50c75,76
< url = config.get_main_option("sqlalchemy.url")
< context.configure(
< url=url, target_metadata=target_metadata, literal_binds=True
< )
---
> # for the --sql use case, run migrations for each URL into
> # individual files.
52,53c78,99
< with context.begin_transaction():
< context.run_migrations()
---
> engines = {
> '': {
> 'url': context.config.get_main_option('sqlalchemy.url')
> }
> }
> for name in bind_names:
> engines[name] = rec = {}
> rec['url'] = context.config.get_section_option(name, "sqlalchemy.url")
>
> for name, rec in engines.items():
> logger.info("Migrating database %s" % (name or '<default>'))
> file_ = "%s.sql" % name
> logger.info("Writing output to %s" % file_)
> with open(file_, 'w') as buffer:
> context.configure(
> url=rec['url'],
> output_buffer=buffer,
> target_metadata=get_metadata(name),
> literal_binds=True,
> )
> with context.begin_transaction():
> context.run_migrations(engine_name=name)
70,85c116,169
< if script.upgrade_ops.is_empty():
< directives[:] = []
< logger.info('No changes in schema detected.')
<
< connectable = current_app.extensions['migrate'].db.get_engine()
<
< with connectable.connect() as connection:
< context.configure(
< connection=connection,
< target_metadata=target_metadata,
< process_revision_directives=process_revision_directives,
< **current_app.extensions['migrate'].configure_args
< )
<
< with context.begin_transaction():
< context.run_migrations()
---
> if len(script.upgrade_ops_list) >= len(bind_names) + 1:
> empty = True
> for upgrade_ops in script.upgrade_ops_list:
> if not upgrade_ops.is_empty():
> empty = False
> if empty:
> directives[:] = []
> logger.info('No changes in schema detected.')
>
> # for the direct-to-DB use case, start a transaction on all
> # engines, then run all migrations, then commit all transactions.
> engines = {
> '': {'engine': current_app.extensions['migrate'].db.get_engine()}
> }
> for name in bind_names:
> engines[name] = rec = {}
> rec['engine'] = current_app.extensions['migrate'].db.get_engine(
> bind=name)
>
> for name, rec in engines.items():
> engine = rec['engine']
> rec['connection'] = conn = engine.connect()
>
> if USE_TWOPHASE:
> rec['transaction'] = conn.begin_twophase()
> else:
> rec['transaction'] = conn.begin()
>
> try:
> for name, rec in engines.items():
> logger.info("Migrating database %s" % (name or '<default>'))
> context.configure(
> connection=rec['connection'],
> upgrade_token="%s_upgrades" % name,
> downgrade_token="%s_downgrades" % name,
> target_metadata=get_metadata(name),
> process_revision_directives=process_revision_directives,
> **current_app.extensions['migrate'].configure_args
> )
> context.run_migrations(engine_name=name)
>
> if USE_TWOPHASE:
> for rec in engines.values():
> rec['transaction'].prepare()
>
> for rec in engines.values():
> rec['transaction'].commit()
> except: # noqa: E722
> for rec in engines.values():
> rec['transaction'].rollback()
> raise
> finally:
> for rec in engines.values():
> rec['connection'].close()
diff --suppress-common-lines single.migrations/script.py.mako multi.migrations/script.py.mako
1c1,4
< """${message}
---
> <%!
> import re
>
> %>"""${message}
19,20c22,48
< def upgrade():
< ${upgrades if upgrades else "pass"}
---
> def upgrade(engine_name):
> globals()["upgrade_%s" % engine_name]()
>
>
> def downgrade(engine_name):
> globals()["downgrade_%s" % engine_name]()
>
> <%
> from flask import current_app
> bind_names = []
> if current_app.config.get('SQLALCHEMY_BINDS') is not None:
> bind_names = list(current_app.config['SQLALCHEMY_BINDS'].keys())
> else:
> get_bind_names = getattr(current_app.extensions['migrate'].db, 'bind_names', None)
> if get_bind_names:
> bind_names = get_bind_names()
> db_names = [''] + bind_names
> %>
>
> ## generate an "upgrade_<xyz>() / downgrade_<xyz>()" function
> ## for each database name in the ini file.
>
> % for db_name in db_names:
>
> def upgrade_${db_name}():
> ${context.get("%s_upgrades" % db_name, "pass")}
>
21a50,51
> def downgrade_${db_name}():
> ${context.get("%s_downgrades" % db_name, "pass")}
23,24c53
< def downgrade():
< ${downgrades if downgrades else "pass"}
---
> % endfor
Common subdirectories: single.migrations/versions and multi.migrations/versions
Note: lines prefixed with < are in the single and lines prefixed with > are found in the multiple. StackOverflow isn't the best medium to display the diff. Alternative diff -y --color=always (or another diff tool) is much easier to view.
For summary it's a few lines were removed in env.py and new lines were added to env.py script.py.mako to accommodate bind keys.
Specify New Default
To start with a new directory and retain the old in the codebase for comparison, specify the directory during the initialization:
flask init db --multidb --directory"multi.migrations"
The new directory can be specified in the program's Migrate constructor:
migrate = Migrate(directory="multi.migrations")
Likewise, in python interactive, a directory argument can be passed to the API calls:
show(directory='migrations', revision='head')

Categories