Is there any parameter to format the date when using st.date_input() in streamlit? I want to remove T00:00:00. This is the output:
I have written this code that allows the user to add new data to a DF:
st.sidebar.header("Afegeix una classe")
options_form2 = st.sidebar.form("options_form2")
dataClasse = options_form2.date_input("Data de la classe")
genere = options_form2.selectbox(
"Gènere",
('H', 'D')
)
idCode = options_form2.selectbox(
"ID",
('ABSUDUHM', 'DWHWBMMX', 'MIXEECJR', 'NFKQWKOP', 'RQWLPVCJ')
)
duradaClasse = options_form2.selectbox(
"Durada de la classe",
('1h', '1h 30min', '2h')
)
preu = options_form2.number_input("Preu")
submitButton = options_form2.form_submit_button()
if submitButton:
st.write(dataClasse, genere, idCode, duradaClasse)
newData = {
"Data de la classe": dataClasse,
"Genere": genere,
"ID": idCode,
"Durada de la classe": duradaClasse,
"Preu": preu
}
# Add new data to the data frame
df = df.append(newData, ignore_index=True)
df.to_excel("classes_particulars.xlsx", index=False)
However, the date format added to the DF contains the time, and I don't want it. I just want to add the date.
I have tried with from datetime import date but I am not sure how to implement it the right way.
I don't think there is such a parameter but want you can do is to format your column.
before:
Example:
Note: If the name of your column is not Date make sure you replace my Date index with the name of your column.
import datetime
import streamlit as st
df["Date"] = [
datetime.datetime.strptime(
str(target_date).split(" ")[0], '%Y-%m-%d').date()
for target_date in df["Date"]
]
st.dataframe(df)
After:
Edition:
import streamlit as st
import pandas as pd
import datetime
st.sidebar.header("Afegeix una classe")
options_form2 = st.sidebar.form("options_form2")
dataClasse = options_form2.date_input("Data de la classe")
genere = options_form2.selectbox("Gènere", ('H', 'D'))
idCode = options_form2.selectbox(
"ID", ('ABSUDUHM', 'DWHWBMMX', 'MIXEECJR', 'NFKQWKOP', 'RQWLPVCJ'))
duradaClasse = options_form2.selectbox("Durada de la classe",
('1h', '1h 30min', '2h'))
preu = options_form2.number_input("Preu")
submitButton = options_form2.form_submit_button()
df = pd.DataFrame([])
if submitButton:
st.write(dataClasse, genere, idCode, duradaClasse)
newData = {
"Data de la classe": dataClasse,
"Genere": genere,
"ID": idCode,
"Durada de la classe": duradaClasse,
"Preu": preu
}
# Add new data to the data frame
df = df.append(newData, ignore_index=True)
df["Data de la classe"] = [
datetime.datetime.strptime(str(target_date).split(" ")[0],
'%Y-%m-%d').date()
for target_date in df["Data de la classe"]
]
df.to_excel("classes_particulars.xlsx", index=False)
st.dataframe(df)
Output:
I have a dataframe with articles, here is the first articles:
0 La reprise de l’économie française s’étiole et... Sur le Vieux-Port, à Marseille, le 28 septembr... 2020-10-06
1 Aux Etats-Unis, un rapport parlementaire veut ... Les icones des services de Google, Amazon, Fac... 2020-10-07
2 Les beaux jours de la médiation en entreprise Carnet de bureau. Des entreprises appellent de... 2020-10-07
3 Plan de relance : comment « déterminer mainten... Tribune. Parmi les multiples critiques entendu... 2020-10-07
4 Des lauréats du Nobel qui ne le méritaient pas Chaque automne, depuis plus d’un siècle, le pe... 2015-10-07
I would like to only keep articles that have 7 days maximum from actual date.
Something like this: actual date <= articles <= 7 days old maximum
I have coded this to scrape articles:
%%time
lemonde_title = []
lemonde_content = []
published_date =[]
from newspaper import Article
from newspaper import ArticleException
from datetime import datetime
for art_link in all_urls:
try:
art = Article(art_link)
art.download()
art.parse()
lemonde_title.append(art.title)
lemonde_content.append(art.text)
try:
publish_date = datetime.strptime(str(art.publish_date), '%Y-%m-%d %H:%M:%S').strftime('%Y-%M-%D')
published_date.append(publish_date)
except:
published_date.append('unconverted')
except ArticleException:
pass
I converted the date column like this:
# converting the string to datetime format
df['date'] = pd.to_datetime(df['date'], format='%Y-%M-%D')
And when I try the following code I got an error TypeError: Invalid comparison between dtype=datetime64[ns] and date:
import datetime
date_before = datetime.date.today() - datetime.timedelta(days=7)
df = df[df['date'] >date_before]
df = pd.DataFrame({
'text': ["t1", "t2", "t3"],
'date' : ['2020-10-06', '2020-10-05', '2012-10-06']
})
df['date'] = pd.to_datetime(df['date'])
till = pd.to_datetime(datetime.date.today() - datetime.timedelta(days=7))
df = df[df['date'] >= till]
Output:
text date
0 t1 2020-10-06
1 t2 2020-10-05
use this this works
import datetime
date_before = datetime.date.today() - datetime.timedelta(days=7)
df = df[df['date'] >date_before]
date before can be edited by you in the way you want to.
import datetime as dt
df[(dt.datetime.today()-df.date).apply(lambda x: 0<= x.days <7) ]
This should do the trick!
import pandas as pd
import datetime as dt
from pandas_datareader import data as web
import yfinance as yf
yf.pdr_override()
filename=r'C:\Users\User\Desktop\from_python\data_from_python.xlsx'
yeah = pd.read_excel(filename, sheet_name='entry')
stock = []
stock = list(yeah['name'])
stock = [ s.replace('\xa0', '') for s in stock if not pd.isna(s) ]
adj_close=pd.DataFrame([])
high_price=pd.DataFrame([])
low_price=pd.DataFrame([])
volume=pd.DataFrame([])
print(stock)
['^GSPC', 'NQ=F', 'AAU', 'ALB', 'AOS', 'APPS', 'AQB', 'ASPN', 'ATHM', 'AZRE', 'BCYC', 'BGNE', 'CAT', 'CC', 'CLAR', 'CLCT', 'CMBM', 'CMT', 'CRDF', 'CYD', 'DE', 'DKNG', 'EARN', 'EMN', 'FBIO', 'FBRX', 'FCX', 'FLXS', 'FMC', 'FMCI', 'GME', 'GRVY', 'HAIN', 'HBM', 'HIBB', 'IEX', 'IOR', 'KFS', 'MAXR', 'MPX', 'MRTX', 'NSTG', 'NVCR', 'NVO', 'OESX', 'PENN', 'PLL', 'PRTK', 'RDY', 'REGI', 'REKR', 'SBE', 'SQM', 'TCON', 'TCS', 'TGB', 'TPTX', 'TRIL', 'UEC', 'VCEL', 'VOXX', 'WIT', 'WKHS', 'XNCR']
for symbol in stock:
adj_close[symbol] = web.get_data_yahoo([symbol],start,end)['Adj Close']
I have a list of tickers, I have got the adj close price, how can get these tickers NAME and SECTORS?
for single ticker I found in web, it can be done like as below
sbux = yf.Ticker("SBUX")
tlry = yf.Ticker("TLRY")
print(sbux.info['sector'])
print(tlry.info['sector'])
How can I make it as a dataframe that I can put the data into excel as I am doing for adj price.
Thanks a lot!
You can try this answer using a package called yahooquery. Disclaimer: I am the author of the package.
from yahooquery import Ticker
import pandas as pd
symbols = ['^GSPC', 'NQ=F', 'AAU', 'ALB', 'AOS', 'APPS', 'AQB', 'ASPN', 'ATHM', 'AZRE', 'BCYC', 'BGNE', 'CAT', 'CC', 'CLAR', 'CLCT', 'CMBM', 'CMT', 'CRDF', 'CYD', 'DE', 'DKNG', 'EARN', 'EMN', 'FBIO', 'FBRX', 'FCX', 'FLXS', 'FMC', 'FMCI', 'GME', 'GRVY', 'HAIN', 'HBM', 'HIBB', 'IEX', 'IOR', 'KFS', 'MAXR', 'MPX', 'MRTX', 'NSTG', 'NVCR', 'NVO', 'OESX', 'PENN', 'PLL', 'PRTK', 'RDY', 'REGI', 'REKR', 'SBE', 'SQM', 'TCON', 'TCS', 'TGB', 'TPTX', 'TRIL', 'UEC', 'VCEL', 'VOXX', 'WIT', 'WKHS', 'XNCR']
# Create Ticker instance, passing symbols as first argument
# Optional asynchronous argument allows for asynchronous requests
tickers = Ticker(symbols, asynchronous=True)
data = tickers.get_modules("summaryProfile quoteType")
df = pd.DataFrame.from_dict(data).T
# flatten dicts within each column, creating new dataframes
dataframes = [pd.json_normalize([x for x in df[module] if isinstance(x, dict)]) for module in ['summaryProfile', 'quoteType']]
# concat dataframes from previous step
df = pd.concat(dataframes, axis=1)
# View columns
df.columns
Index(['address1', 'address2', 'city', 'state', 'zip', 'country', 'phone',
'fax', 'website', 'industry', 'sector', 'longBusinessSummary',
'fullTimeEmployees', 'companyOfficers', 'maxAge', 'exchange',
'quoteType', 'symbol', 'underlyingSymbol', 'shortName', 'longName',
'firstTradeDateEpochUtc', 'timeZoneFullName', 'timeZoneShortName',
'uuid', 'messageBoardId', 'gmtOffSetMilliseconds', 'maxAge'],
dtype='object')
# Data you're looking for
df[['symbol', 'shortName', 'sector']].head(10)
symbol shortName sector
0 NQZ20.CME Nasdaq 100 Dec 20 NaN
1 ALB Albemarle Corporation Basic Materials
2 AOS A.O. Smith Corporation Industrials
3 ASPN Aspen Aerogels, Inc. Industrials
4 AAU Almaden Minerals, Ltd. Basic Materials
5 ^GSPC S&P 500 NaN
6 ATHM Autohome Inc. Communication Services
7 AQB AquaBounty Technologies, Inc. Consumer Defensive
8 APPS Digital Turbine, Inc. Technology
9 BCYC Bicycle Therapeutics plc Healthcare
It processes stocks and sectors at the same time. However, some stocks do not have a sector, so an error countermeasure is added.
Since the issue column name consists of sector and issue name, we change it to a hierarchical column and update the retrieved data frame. Finally, I save it in CSV format to import it into Excel. I've only tried some of the stocks due to the large number of stocks, so there may be some issues.
import datetime
import pandas as pd
import yfinance as yf
import pandas_datareader.data as web
yf.pdr_override()
start = "2018-01-01"
end = "2019-01-01"
# symbol = ['^GSPC', 'NQ=F', 'AAU', 'ALB', 'AOS', 'APPS', 'AQB', 'ASPN', 'ATHM', 'AZRE', 'BCYC', 'BGNE', 'CAT',
#'CC', 'CLAR', 'CLCT', 'CMBM', 'CMT', 'CRDF', 'CYD', 'DE', 'DKNG', 'EARN', 'EMN', 'FBIO', 'FBRX', 'FCX', 'FLXS',
#'FMC', 'FMCI', 'GME', 'GRVY', 'HAIN', 'HBM', 'HIBB', 'IEX', 'IOR', 'KFS', 'MAXR', 'MPX', 'MRTX', 'NSTG', 'NVCR',
#'NVO', 'OESX', 'PENN', 'PLL', 'PRTK', 'RDY', 'REGI', 'REKR', 'SBE', 'SQM', 'TCON', 'TCS', 'TGB', 'TPTX', 'TRIL',
#'UEC', 'VCEL', 'VOXX', 'WIT', 'WKHS', 'XNCR']
stock = ['^GSPC', 'NQ=F', 'AAU', 'ALB', 'AOS', 'APPS']
adj_close = pd.DataFrame([])
for symbol in stock:
try:
sector = yf.Ticker(symbol).info['sector']
name = yf.Ticker(symbol).info['shortName']
except:
sector = 'None'
name = 'None'
adj_close[sector, symbol] = web.get_data_yahoo(symbol, start=start, end=end)['Adj Close']
idx = pd.MultiIndex.from_tuples(adj_close.columns)
adj_close.columns = idx
adj_close.head()
None Basic Materials Industrials Technology
^GSPC_None NQ=F_None AAU_None ALB_Albemarle Corporation AOS_A.O. Smith Corporation APPS_Digital Turbine, Inc.
2018-01-02 2695.810059 6514.75 1.03 125.321663 58.657742 1.79
2018-01-03 2713.060059 6584.50 1.00 125.569397 59.010468 1.87
2018-01-04 2723.989990 6603.50 0.98 124.073502 59.286930 1.86
2018-01-05 2743.149902 6667.75 1.00 125.502716 60.049587 1.96
2018-01-08 2747.709961 6688.00 0.95 130.962250 60.335583 1.96
# for excel
adj_close.to_csv('stock.csv', sep=',')
I am trying to create dummy data as follows:
import numpy as np
import pandas as pd
def dummy_historical(seclist, dates, startvalues):
dfHist = pd.DataFrame(0, index=[0], columns=seclist)
for sec in seclist:
# (works fine)
svalue = startvalues[sec].max()
# this creates a random sequency of 84 rows and 1 column (works fine)
dfRandom = pd.DataFrame(np.random.randint(svalue-10,svalue+10, size=(dates.size, 1 )), index=dates, columns=[sec])
# does not work
dfHist[sec] = pd.concat([ dfHist[sec] , dfRandom ])
return dfHist
When I print dfHist, it only shows me the first row (as when initiated). Thus nothing has been filled.
Here is an example of the data:
seclist = ['AAPL', 'GOOGL']
# use any number for startvalues
dates = DatetimeIndex(['2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
'2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
'2017-01-13', '2017-01-14', '2017-01-15', '2017-01-16',
'2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20',
'2017-01-21', '2017-01-22', '2017-01-23', '2017-01-24',
'2017-01-25', '2017-01-26', '2017-01-27', '2017-01-28',
'2017-01-29', '2017-01-30', '2017-01-31', '2017-02-01',
'2017-02-02', '2017-02-03', '2017-02-04', '2017-02-05',
'2017-02-06', '2017-02-07', '2017-02-08', '2017-02-09',
'2017-02-10', '2017-02-11', '2017-02-12', '2017-02-13',
'2017-02-14', '2017-02-15', '2017-02-16', '2017-02-17',
'2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
'2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
'2017-02-26', '2017-02-27', '2017-02-28', '2017-03-01',
'2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05',
'2017-03-06', '2017-03-07', '2017-03-08', '2017-03-09',
'2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
'2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
'2017-03-18', '2017-03-19', '2017-03-20', '2017-03-21',
'2017-03-22', '2017-03-23', '2017-03-24', '2017-03-25',
'2017-03-26', '2017-03-27', '2017-03-28', '2017-03-29'],
dtype='datetime64[ns]', freq='D')
You need to pass axis=1 to concat if you want to concatenate columns. In addition, you don't need to initialize your data frame with data in the beginning (except you want to have the 0 value):
def dummy_historical(seclist, dates, startvalues):
dfHist = pd.DataFrame()
for sec in seclist:
svalue = startvalues[sec].max()
dfRandom = pd.DataFrame(np.random.randint(svalue-10,svalue+10, size=(dates.size, 1 )), index=dates, columns=[sec])
dfHist = pd.concat([ dfHist , dfRandom ], axis=1)
return dfHist
You can even write in a more concise way avoiding concat like:
def generate(sec):
svalue = startvalues[sec].max()
return np.random.randint(svalue-10,svalue+10, size=dates.size)
dfHist = pd.DataFrame({sec: generate(sec) for sec in seclist}, index=dates)