encoding issue when exporting from dataframe in python to MS access

encoding issue when exporting from dataframe in python to MS access - python

Dears,
i have a python script that have a query that reads from a DB , store the result in a Dataframe , then export it to MS access.
in the loop , it divides the result into 3 files ( each file has different month ) .
the issue in the column : LI_DESC , it have Arabic letter that shows correctly in jupyter , but it shows incorrect char when exported to access .
here is the columns showing correctly in jupyter:
here is the columns shown in access file:
python code:
import cx_Oracle
import os
import accessdb
import pandas as pd
dsn_tns = cx_Oracle.makedsn('10.112.**.****', '1521', service_name='cdwn10g.hq')
conn = cx_Oracle.connect(user='BI', password='BI', dsn=dsn_tns , encoding='utf-8')
sql_query= pd.read_sql_query("""SELECT MONTH1,LI_DESC,PORT,REGS_NUM,REG_DT,CTRY_CD,TAR_CD,UNS_QTY,UN_CD,KGN,KGG,CIF_AMT,CURCY_CD,CURCY_RT
FROM STTS.CDS
WHERE SUBSTR(REG_DT_G,1,6) BETWEEN to_number(extract(year from add_months(sysdate,-3)) || '' || to_char(add_months(sysdate,-3), 'MM')) AND to_number(extract(year from add_months(sysdate,-1)) || '' || to_char(add_months(sysdate,-1), 'MM'))
ORDER BY PORT, REGS_NUM, REG_DT""",conn)
df = pd.DataFrame(sql_query)
from datetime import datetime
today = datetime.now()
if not os.path.exists(r'C:\Users\nalkar\Documents\Python Scripts\RUNDATE'+today.strftime('%Y%m%d')) :
os.makedirs(r'C:\Users\nalkar\Documents\Python Scripts\RUNDATE'+today.strftime('%Y%m%d'))
months= df['MONTH1'].unique().tolist()
for month in months:
mydf=df.loc[df.MONTH1 == month]
mydf.to_accessdb(r"C:\Users\nalkar\Documents\Python Scripts\RUNDATE"+today.strftime('%Y%m%d')+"\%s.accdb" %month, "Data")
print('done')
else:
print(r'directory already exist')

Related

WebScrapping running automatically 2 times a day - Python (selenium)

I've created a simple webScrapper that gets some information from CNN website and puts it into a database table.
Its working properly in Python and I'm using VScode.
I am looking for a way to run this script 2 times a day automatically, anyone knows how to do it? I tried AWS but I was not able to do it!
I want to run the code automatically online, with my computer off and it has to update my CSV file.
Some important information:
Considering that it is a webScrapper I have some files that I have to use in my folders such as chromedriver.exe and a CSV that append the new roll with new information.
Here is my code:
imports:
import pandas as pd
from datetime import datetime
import requests
import json
from pandas_datareader import data as web
import yfinance as yf
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
from datetime import date, timedelta
from selenium.webdriver.chrome.options import Options
import pyodbc
WebScrapping code:
dataset.to_csv(r"C:\Users\belig\OneDrive\Python\MeuProjeto\Projetos\WebScrapping_News\WebScrapping_News\dataset.csv", index=False)
dataset = pd.read_csv(r"C:\Users\belig\OneDrive\Python\MeuProjeto\Projetos\WebScrapping_News\WebScrapping_News\dataset_news.csv", sep=";")
# Creating Variables
# %%
Date = 1
WeekDay = 2
Brazil_Ibovespa = 3
BRL_Dollar = 4
Titulo_CNNBrasil = 5
# Setup Date Var
Date = datetime.now().strftime("%d/%m/%Y, %H:%M:%S")
Date
# Setup WeekDay Var
date_now = datetime.now()
WeekDay = date_now.strftime("%A")
WeekDay
# Setup Brazil_Ibovespa Var
today = date.today()
start_day = today - timedelta(days = 7)
tickers_DowJones = "^BVSP"
datayf = yf.download(tickers_DowJones, start=start_day, end=today)
print(datayf)
datayf = datayf['Adj Close']
Brazil_Ibovespa = datayf[-1]
Brazil_Ibovespa
# Setup BRL_Dollar Var
requisicao = requests.get('https://economia.awesomeapi.com.br/all/USD-BRL')
cotacao = requisicao.json()
BRL_Dollar = round(float(cotacao['USD']['bid']),2)
BRL_Dollar
# Starting Driver WebScrapping (option to hide windown)
driver_exe = r'C:\Users\belig\OneDrive\Python\MeuProjeto\Projetos\WebScrapping_News\WebScrapping_News\chromedriver.exe'
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(driver_exe, options=options)
# Setup Titulo_CNNBrasil Var
driver.get('https://www.cnnbrasil.com.br/')
Titulo_CNNBrasil = driver.find_element(By.XPATH, '//*[#id="block1847327"]/div/div/a/h2').text
print(Titulo_CNNBrasil)
# Setup Url_CNNBrasil Var
driver.find_element(By.XPATH, '//*[#id="block1847327"]/div/div/a/h2').click()
Url_CNNBrasil = driver.current_url
print(Url_CNNBrasil)
# Setup Topics_CNNBrasil Var
try:
Topics_CNNBrasil = driver.find_element(By.CLASS_NAME, 'tags__list').text
Topics_CNNBrasil = Topics_CNNBrasil.replace('\n', ', ')
print(Topics_CNNBrasil)
except:
Topics_CNNBrasil = 'None'
print
Add to SQL and DataFrame:
# Add Row to DataFrame
new_row = pd.DataFrame({"Date":[Date], "WeekDay":[WeekDay], "Brazil_Ibovespa":[Brazil_Ibovespa], "BRL_Dollar":[BRL_Dollar], "Titulo_CNNBrasil":[Titulo_CNNBrasil], "Url_CNNBrasil":[Url_CNNBrasil], "Topics_CNNBrasil":[Topics_CNNBrasil], index=[0])
print(new_row)
dataset = pd.concat([dataset, new_row], ignore_index=True)
# dataset = dataset.append({"Date":Date, "WeekDay": WeekDay}, ignore_index=True)
print(dataset)
dataset.to_csv(r'C:\Users\belig\OneDrive\Python\MeuProjeto\Projetos\WebScrapping_News\WebScrapping_News\dataset_news.csv', index=False, encoding="utf-8-sig", sep = ';')
# Add info to SQL Server
dados_conexao = (
"Driver={SQL Server};"
"Server=Beligolli;"
"Database=WebScrappingNews;"
'Trusted_Connection=yes;'
# UID = Login;
# PWD=Senha;
)
conexao = pyodbc.connect(dados_conexao)
cursor = conexao.cursor()
comando = "INSERT INTO NewsDataBase (Date_Hour, WeekDay_, Brazil_Ibovespa, BRL_Dollar, Titulo_CNNBrasil, Url_CNNBrasil, Topics_CNNBrasil VALUES (?, ?, ?, ?, ?, ?, ?)"
valores = (Date, WeekDay, Brazil_Ibovespa, BRL_Dollar, Titulo_CNNBrasil, Url_CNNBrasil, Topics_CNNBrasil)
cursor.execute(comando, valores)
cursor.commit()
cursor.close()
conexao.close()
print(f'Adicionado {Date} - {WeekDay} ao dataset')

First i start with this library to schedule the events:
import schedule
import time
def job():
print("I'm working...")
schedule.every(10).minutes.do(job)
schedule.every().hour.do(job)
schedule.every().day.at("10:30").do(job)
while 1:
schedule.run_pending()
time.sleep(1)
and then save the script and if you want to run the app as a windows service

how to refresh data to every 3 minutes in streamlit

I have written these function to show csv file data into UI of streamlit. That csv file is in 'w+' mode so data gets refreshed to every 3 minutes, want to display same on UI. Want to reflect data at same time on UI. That csv file data gets updated on time like 9:15, 9:18, 9:21AM.
def strike_details():
col1, col2 = st.columns(2)
with col1:
st.header("NIFTY")
data1 = pd.read_csv(os.path.join(directory_of_python_script, str('strike_data_csv') , "NIFTY_strike.csv"), on_bad_lines='skip')
st.table(data1)
with col2:
st.header("BANKNIFTY")
data2 = pd.read_csv(os.path.join(directory_of_python_script, str('strike_data_csv') , "BANKNIFTY_strike.csv"), on_bad_lines='skip')
st.table(data2)
strike_details()
These function displays tables like below image

Did you try the schedule module?.
import time
import streamlit as st
from schedule import every, repeat, run_pending
with st.empty():
#repeat(every(3).minutes)
def strike_details():
col1, col2 = st.columns(2)
with col1:
st.header("NIFTY")
data1 = pd.read_csv(os.path.join(directory_of_python_script, str('strike_data_csv') , "NIFTY_strike.csv"), on_bad_lines='skip')
st.table(data1)
with col2:
st.header("BANKNIFTY")
data2 = pd.read_csv(os.path.join(directory_of_python_script, str('strike_data_csv') , "BANKNIFTY_strike.csv"), on_bad_lines='skip')
st.table(data2)
while True:
run_pending()
time.sleep(1)

Streamlit - IF statement depending on text_input

There is an error message (KeyError: '') for line 25,26 when the text inputs are empty but I can't manage to get rid of it. I want the variables Vec1 and Vec2 to be stored only when there exists a text input for both widgets. To run the code, you can load any xlsx table, like this one:
Var1
Var2
5
7
6
8
Here is my code
import numpy as np
import pandas as pd
import streamlit as st
st.set_page_config(layout="wide")
#Import file
xlsx_file = st.sidebar.file_uploader('Import File', type = 'xlsx')
#Select Variables of interest
Vec1Name = st.sidebar.text_input("First Variable Name")
Vec2Name = st.sidebar.text_input("Second Variable Name")
st.title('Data')
col1, col2 = st.columns((3,1))
if xlsx_file is not None:
df = pd.read_excel(xlsx_file)
col1.write(''' #### Dataframe''')
col1.write(df)
if all(var is not None for var in [Vec1Name, Vec2Name]):
#Store Variables
Vec1 = df[str(Vec1Name)]
Vec2 = df[str(Vec2Name)]
#Variables of Interest
col2.write(''' #### Variables of Interest''')
col2.write(df[[str(Vec1Name),str(Vec2Name)]])
Thank you for your help!

The error you're facing is because the text_input can not be found within df. If you know already that you want the input to be amongst the columns, why not use st.selectbox instead, and specify df.columns as options? Let me know if that code works better:
import numpy as np
import pandas as pd
import streamlit as st
st.set_page_config(layout="wide")
#Import file
xlsx_file = st.sidebar.file_uploader('Import File', type = 'xlsx')
st.title('Data')
col1, col2 = st.columns((3,1))
if xlsx_file is not None:
df = pd.read_excel(xlsx_file)
#Select Variables of interest
Vec1Name = st.sidebar.selectbox("First Variable Name", df.columns)
Vec2Name = st.sidebar.selectbox("Second Variable Name", df.columns)
col1.write(''' #### Dataframe''')
col1.write(df)
#Store Variables
Vec1 = df[str(Vec1Name)]
Vec2 = df[str(Vec2Name)]
#Variables of Interest
col2.write(''' #### Variables of Interest''')
col2.write(df[[str(Vec1Name),str(Vec2Name)]])

How can I refresh the data in the background of a running flask app?

I have a simple flask app that queries a database to write a csv then pyplot to create a chart out of that.
I would like to refresh the data in the background every 10 minutes while the app is running. The page doesn't need to refresh the html automatically. It just needs to have fresh data when someone opens the page.
Can I do that in a single script? Or do I need to run a different script outside in crontab or something?
I would just kick over the container every 10 minutes but it takes about 5 minutes to get the query, so that's a 5 minute outage. Not a great idea. I'd prefer it to fetch in the background.
Here is what I'm working with:
import os
from datetime import date
import teradatasql
import pandas as pd
import matplotlib.pyplot as plt
from flask import Flask, render_template
import time
import multitasking
### variables
ausername = os.environ.get('dbuser')
apassword = os.environ.get('dbpassword')
ahost = os.environ.get('dbserver')
systems = ["prd1", "prd2", "frz1", "frz2", "devl"]
qgsystems = ["", "#Tera_Prd2_v2", "#Tera_Frz1_v2", "#Tera_Frz2_v2", "#Tera_Devl_v2"]
weeks = ["0", "7", "30"]
query = """{{fn teradata_write_csv({system}_{week}_output.csv)}}select (bdi.infodata) as sysname,
to_char (thedate, 'MM/DD' ) || ' ' || Cast (thetime as varchar(11)) as Logtime,
sum(drc.cpuuexec)/sum(drc.secs) (decimal(7,2)) as "User CPU",
sum(drc.cpuuserv)/sum(drc.secs) (decimal(7,2)) as "System CPU",
sum(drc.cpuiowait)/sum(drc.secs) (decimal(7,2)) as "CPU IO Wait"
from dbc.resusagescpu{qgsystem} as drc
left outer join boeing_tables.dbcinfotbl{qgsystem} as bdi
on bdi.infokey = 'sysname'
where drc.thedate >= (current_date - {week})
order by logtime asc
Group by sysname,logtime
;
"""
### functions
#multitasking.task
def fetch(system,qgsystem,week):
with teradatasql.connect (host=ahost, user=ausername, password=apassword) as con:
with con.cursor () as cur:
cur.execute (query.format(system=system, qgsystem=qgsystem, week=week))
[ print (row) for row in cur.fetchall () ]
#multitasking.task
def plot(system,week):
for week in weeks:
for system in systems:
df = pd.read_csv(system + "_" + week + "_output.csv")
df.pop('sysname')
df.plot.area(x="Logtime")
figure = plt.gcf()
figure.set_size_inches(12, 6)
plt.savefig( "/app/static/" + system + "_" + week + "_webchart.png", bbox_inches='tight', dpi=100)
### main
for week in weeks:
for system, qgsystem in zip(systems, qgsystems):
fetch(system,qgsystem,week)
for week in weeks:
for system in systems:
plot(system,week)
app = Flask(__name__,template_folder='templates')
#app.route('/')
def index():
return render_template("index.html")

Accessing *.mdb file with Pandas [duplicate]

I have 7 tables which I want to read from an Access file (.mdb), then I need to change the values using pandas DataFrame, and then save them again in a new Access file. Do you have any suggestion on how to do that?
I am relatively new in python, and any support is highly appreciated.

This may be some help: https://pypi.python.org/pypi/pandas_access
Everything should be straight forward after you're able to load the tables into pandas data frame. Then do the data manipulations you need to and send back to Access.

I think you should check this.
https://pypi.python.org/pypi/pyodbc/
Also, to read data from Access Table, try something like this.
# -*- coding: utf-8 -*-
import pypyodbc
pypyodbc.lowercase = False
conn = pypyodbc.connect(
r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};" +
r"Dbq=C:\Users\Public\Database1.accdb;")
cur = conn.cursor()
cur.execute("SELECT CreatureID, Name_EN, Name_JP FROM Creatures");
while True:
row = cur.fetchone()
if row is None:
break
print(u"Creature with ID {0} is {1} ({2})".format(
row.get("CreatureID"), row.get("Name_EN"), row.get("Name_JP")))
cur.close()
conn.close()
Or . . . just use VBA, if you are already using Access.
Dim outputFileName As String
outputFileName = CurrentProject.Path & "\Export_" & Format(Date, "yyyyMMdd") & ".xls"
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel9, "Table1", outputFileName , True
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel9, "Table2", outputFileName , True
This could be an options too . . .
strPath = "V:\Reports\Worklist_Summary.xlsx"
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel12, "qryEscByDate", strPath
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel12, "qryCreatedByDate", strPath
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel12, "qryClosedByDate", strPath
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel12, "qryCreatedByUsers", strPath
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel12, "qrySummaries", strPath
Or . . . run some VBA scripts . . .
Option Compare Database
Option Explicit
Private Sub Command2_Click()
Dim strFile As String
Dim varItem As Variant
strFile = InputBox("Designate the path and file name to export to...", "Export")
If (strFile = vbNullString) Then Exit Sub
For Each varItem In Me.List0.ItemsSelected
DoCmd.TransferSpreadsheet transferType:=acExport, _
spreadsheetType:=acSpreadsheetTypeExcel9, _
tableName:=Me.List0.ItemData(varItem), _
fileName:=strFile
Next
MsgBox "Process complete.", vbOKOnly, "Export"
End Sub
Private Sub Form_Open(Cancel As Integer)
Dim strTables As String
Dim tdf As TableDef
For Each tdf In CurrentDb.TableDefs
If (Left(tdf.Name, 4) <> "MSys") Then
strTables = strTables & tdf.Name & ","
End If
Next
strTables = Left(strTables, Len(strTables) - 1)
Me.List0.RowSource = strTables
End Sub
When all data is exported, do your transformations, and load (back to Access or another destination).
I'll bet you don't even need the export step. You can probably do everything you need to do in Access, all y itself.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

encoding issue when exporting from dataframe in python to MS access - python

Related

WebScrapping running automatically 2 times a day - Python (selenium)

how to refresh data to every 3 minutes in streamlit

Streamlit - IF statement depending on text_input

How can I refresh the data in the background of a running flask app?

Accessing *.mdb file with Pandas [duplicate]

Categories

Resources