Im really struggling trying to figure out how I'm supposed to translate a Python script to powershell.
The code is:
list_files = ['DC-JAN-2017.xlsx', 'DC-FEB-2017.xlsx', 'DC-MAR-2017.xlsx','DC-APR-2017.xlsx', 'DC-MAY-2017.xlsx', 'DC-JUN-2017.xlsx','DC-JUL-2017.xlsx', 'DC-AUG-2017.xlsx', 'DC-SEP-2017.xlsx','DC-OCT-2017.xlsx', 'DC-NOV-2017.xlsx', 'DC-DEC-2017.xlsx']
zip_loop = zip(list_files, [i[3:6] for i in list_files])
# Final report DataFrame
df_report = pd.DataFrame()
for file_name, month in zip_loop:
# Import and Clean Data
df_clean = clean(file_raw, month)
# Build Monthly report
df_month = process_month(df_clean, month)
# Merge with previous Months report
if df_report.empty
df_report = df_month
else:
df_report = df_report.merge(df_month, on = 'index')
# Save Final Report
df_report.to_excel('Final Report.xlsx')
What i've tried so far
$list_files = ['DC-JAN-2017.xlsx', 'DC-FEB-2017.xlsx', 'DC-MAR-2017.xlsx','DC-APR-2017.xlsx', 'DC-MAY-2017.xlsx', 'DC-JUN-2017.xlsx',
'DC-JUL-2017.xlsx', 'DC-AUG-2017.xlsx', 'DC-SEP-2017.xlsx','DC-OCT-2017.xlsx', 'DC-NOV-2017.xlsx', 'DC-DEC-2017.xlsx']
$zip_loop = $zip($list_files, [$i[3:6] for $i in $list_files])
# Final report DataFrame
$df_report = $pd.DataFrame()
for $file_name, $month in $zip_loop:
# Import and Clean Data
$df_clean = clean($file_raw, $month)
# Build Monthly report
$df_month = process_month($df_clean, $month)
# Merge with previous Months report
if $df_report.empty
$df_report = $df_month
else:
$df_report = $df_report.merge($df_month, on = 'index')
# Save Final Report
$df_report.to_excel('Final Report.xlsx')
I'm not too sure what the "in", "on", statements is supposed to be in powershell.
Related
I am New in pandas and streamlit , What I am trying is to filter such a dataframe using streamlit selectbox
but unfortunately everything is going well except that when changing the filter value it does not reflect on the shown table
as you could see the name in the filter does not update the table
here is the code I have used:
import xlrd
import pandas as pd
import os
from datetime import datetime
import streamlit as st
# import plotly_express as px
# to refer to the file
# change the current directory
currentDir = os.chdir('C:\\Users\\user\\Desktop\\lists');
files=os.listdir(currentDir)
columns=['Name','status','memorize-from-surah','memorize-from-ayah','memorize-to-surah','memorize-to-ayah','memorization-grade','words-meaning-grade','revision-from-surah','revision-from-ayah','revision-to-surah','revision-to-ayah','revision-grade']
folderDF=pd.DataFrame()
for file in files:
# get the file name without extension for the sheikh name
sheikh=os.path.splitext(file)[0]
sheetDF=pd.DataFrame()
workbook = pd.ExcelFile(f'C:\\users\\user\\Desktop\\lists\\{file}')
sheets_numbers = len(workbook.sheet_names)
print(sheets_numbers)
for i in range(1, sheets_numbers-1):
# print(workbook.sheet_by_index(i).name)
current_sheet = pd.read_excel(file,sheet_name=i,header=None,index_col=1)
date= current_sheet.iloc[6, 10]
# for j in range(7,current_sheet.nrows):
# if current_sheet.cell(j,3).value=="غاب" or current_sheet.cell(j,3).value=="عذر":
# for k in range(4,current_sheet.ncols):
# current_sheet.cell(j,k).value=""
sheets=pd.read_excel(file,sheet_name=i,skiprows=11,header=None,index_col=1)
# df = pd.DataFrame(sheets.iloc[:,1:], index=index)
#remove the first col
df=pd.DataFrame(sheets.iloc[:,1:])
#remove empty rows
df=df[df.iloc[:,0].notna()]
#rename the columns
df.columns = columns
#get the nrows
nrows= len(df.index)
sheikhCol=pd.Series(nrows*sheikh)
dateCol=pd.Series(nrows*date)
halkaCol=pd.Series(nrows*i)
# df.insert(1,"sheikh",sheikhCol)
df.insert(1,"halka",halkaCol)
df.insert(2,"sheikh",sheikhCol)
df.insert(3,"date",dateCol)
df["sheikh"]=sheikh
df['date']=date
df['halka']=i
if i == 1:
sheetDF=pd.DataFrame(df)
datatoexcel = pd.ExcelWriter('C:\\users\\user\\Desktop\\dataOut.xlsx')
sheetDF.to_excel(datatoexcel)
datatoexcel.save()
else:
sheetDF = pd.concat([sheetDF, df], axis=0)
folderDF=pd.concat([folderDF,sheetDF],axis=0)
datatoexcel=pd.ExcelWriter('C:\\users\\user\\Desktop\\dataOut.xlsx')
folderDF.to_excel(datatoexcel)
datatoexcel.save()
#
# setting up the streamlit page
st.set_page_config(page_title='makraa reports',layout='wide')
# make filteration
#
st.sidebar.header("make filtration criteria")
nameFilter= folderDF['Name'].unique()
halkaFilter= folderDF['halka'].unique()
sheikhFilter= folderDF['sheikh'].unique()
student_choice= st.sidebar.selectbox("select the student Name",nameFilter)
halka_choice= st.sidebar.selectbox("select the halka Number",halkaFilter)
sheikh_choice= st.sidebar.selectbox("select the sheikh Number",sheikhFilter)
# student_choice2= st.sidebar.multiselect("select the student Name",options=nameFilter,default=nameFilter)
# filteredDf=folderDF[folderDF["Name"]== student_choice]
filteredDf = folderDF[(folderDF["Name"] == student_choice) & (folderDF["halka"] == halka_choice)]
# filteredDf=folderDF.query('Name==#student_choice')
st.write(filteredDf)
note st.dataframe(filteredDf) does not make any difference
the streamlit version I used is 0.75 , since the recent version gave me the StreamlitAPIException like that enter link description here
could you give a hand in this
Here is a sample code with example data.
Code
import streamlit as st
import pandas as pd
data = {
'Name': ['a', 'b', 'c'],
'halka': [1, 2, 3]
}
st.set_page_config(page_title='makraa reports',layout='wide')
folderDF = pd.DataFrame(data)
# make filteration
#
st.sidebar.header("make filtration criteria")
nameFilter = folderDF['Name'].unique()
halkaFilter = folderDF['halka'].unique()
# sheikhFilter = folderDF['sheikh'].unique()
student_choice = st.sidebar.selectbox("select the student Name", nameFilter)
halka_choice = st.sidebar.selectbox("select the halka Number", halkaFilter)
# sheikh_choice= st.sidebar.selectbox("select the sheikh Number",sheikhFilter)
# student_choice2= st.sidebar.multiselect("select the student Name",options=nameFilter,default=nameFilter)
filteredDf = folderDF[(folderDF["Name"] == student_choice) & (folderDF["halka"] == halka_choice)]
# filteredDf = filteredDf[filteredDf["halka"] == halkaFilter]
st.write(filteredDf)
Output
I am trying to implement a "user-friendly" portfolio optimization program in Python.
Since I am still a beginner I did not quite manage to realize it.
The only thing the program should use as input are the stock codes.
I tried to create a mwe below:
import numpy as np
import yfinance as yf
import pandas as pd
def daily_returns(price):
price = price.to_numpy()
shift_1 = price[1:]
shift_2 = price[:-1]
return (shift_1 - shift_2)/shift_1
def annual_returns(price):
price = price.to_numpy()
start = price[0]
end = price[len(price)-1]
return (end-start)/start
def adjusting(price):
adj = len(price)
diff = adj - adjvalue
if diff != 0:
price_new = price[:-diff]
else: price_new = price
return price_new
#Minimal Reproducible Example
#getting user input
names = input('Stock codes:')
names = names.split()
a = len(names)
msft = yf.Ticker(names[0])
aapl = yf.Ticker(names[1])
#import data
hist_msft = msft.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_msft = pd.DataFrame(hist_msft,columns=['Close'])
#hist_msft = hist_msft.to_numpy()
hist_aapl = aapl.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_aapl = pd.DataFrame(hist_aapl,columns=['Close'])
#hist_aapl = hist_aapl.to_numpy()
#daily returns
aapl_daily_returns = daily_returns(hist_aapl)
aapl_daily_returns = np.ravel(aapl_daily_returns)
msft_daily_returns = daily_returns(hist_msft)
msft_daily_returns = np.ravel(msft_daily_returns)
#adjusting for different trading periods
adjvalue = min(len(aapl_daily_returns),len(msft_daily_returns))
aapl_adj = adjusting(aapl_daily_returns)
msft_adj = adjusting(msft_daily_returns)
#annual returns
aapl_ann_returns = annual_returns(hist_aapl)
msft_ann_returns = annual_returns(hist_msft)
#inputs for optimization
cov_mat = np.cov([aapl_adj,msft_adj])*252
ann_returns = np.concatenate((aapl_ann_returns,msft_ann_returns))
Now I just want the code to work with a various, unknown number of inputs. I tried reading a lot about global variables or tried to figure it out with dictionaries but couldn't really achieve any progress.
I think using the for loop can solve your problem!
...
names = input('Stock codes:')
names = names.split()
for name in names:
#analyze here
#I don't know anything about stocks so I wont write anything here
...
I got "Pandas ValueError Arrays Must be All Same Length"
Before I start, I checked answers to similar problems and folks suggest to use something like:
DataFrame(dict([ (k,Series(v)) for k,v in d.iteritems() ]))
if you have only two values in dictionary or,
a = {'Links' : lines ,'Titles' : titles , 'Singers': finalsingers , 'Albums':finalalbums , 'Years' : years}
df = pd.DataFrame.from_dict(a, orient='index')
df.transpose()
But neither of them worked for me. What my code does is, goes to file in directory, captures the name and last_modified time, opens the file and use it in function called phash and returns a value. I think there could be a problem with phash function, maybe sometimes it returns a null value.
So in my case data is something like this:
raw_data = {}
hash_11 = []
time_1 = []
file_name_1 = []
for file in date_file_list:
try:
#print(file[1])
y = file[1]
file_name = os.path.basename(y) # extract just the filename or #file_name = os.path.split(file[1])
file_name = file_name.split('_-_')[0]
file_name_1.append(file_name)
#print(file_name)
# convert date tuple to MM/DD/YYYY HH:MM:SS format
#file_date = time.strftime("%m/%d/%y %H:%M:%S", file[0])
time = file[0]
time_1.append(time)
img = Image.open(str(file[1]))
hash_1 = imagehash.dhash(img)
hash_11.append(hash_1)
#hash_1 = str(hash_1)
#data = [hash_1, time, file_name]
#hamming_list.append(data)
#print(file_name, hash_1, file_date)
data ={'hash_1': hash_11,'time': time_1, 'file_name': file_name_1}
raw_data.update(data)
except:
pass
df = pd.DataFrame(raw_data, columns = ['hash_1', 'time','file_name'])
My goal is to ultimately create a scatter plot with date on the x-axis and won delegates (of each candidate) on the y-axis. I'm unsure of how to "fill in the blanks" when it comes to missing dates. I've attached a picture of the table I get.
For example, I'm trying to put March 1 as the date for Alaska, Arkansas, etc. to make it possible to plot the data.
# CREATE DATAFRAME WITH DELEGATE WON/TARGET INFORMATION
import requests
from lxml import html
import pandas
url = "http://projects.fivethirtyeight.com/election-2016/delegate-targets/"
response = requests.get(url)
doc = html.fromstring(response.text)
tables = doc.findall('.//table[#class="delegates desktop"]')
election = tables[0]
election_rows = election.findall('.//tr')
def extractCells(row, isHeader=False):
if isHeader:
cells = row.findall('.//th')
else:
cells = row.findall('.//td')
return [val.text_content() for val in cells]
def parse_options_data(table):
rows = table.findall(".//tr")
header = extractCells(rows[1], isHeader=True)
data = [extractCells(row, isHeader=False) for row in rows[2:]]
trumpdata = "Trump Won Delegates"
cruzdata = "Cruz Won Delegates"
kasichdata = "Kasich Won Delegates"
data = pandas.DataFrame(data, columns=["Date", "State or Territory", "Total Delegates", trumpdata, cruzdata, kasichdata, "Rubio"])
data.insert(4, "Trump Target Delegates", data[trumpdata].str.extract(r'(\d{0,3}$)'))
data.insert(6, "Cruz Target Delegates", data[cruzdata].str.extract(r'(\d{0,3}$)'))
data.insert(8, "Kasich Target Delegates", data[kasichdata].str.extract(r'(\d{0,3}$)'))
data = data.drop('Rubio', 1)
data[trumpdata] = data[trumpdata].str.extract(r'(^\d{0,3})')
data[cruzdata] = data[cruzdata].str.extract(r'(^\d{0,3})')
data[kasichdata] = data[kasichdata].str.extract(r'(^\d{0,3})')
return df
election_data = parse_options_data(election)
df = pandas.DataFrame(election_data)
df
You could do,
data.fillna('March 1')
I would advise you to go through the documentation
http://pandas.pydata.org/pandas-docs/stable/10min.html
I am trying to write a script to generate data. I am using random package for this. I execute the script and everything works fine. But when I check through the results, I found out that the script fails to generate the last 100+ rows for some reason.
Can someone suggest me why this could be happening?
from __future__ import print_function
from faker import Faker;
import random;
## Vaue declaration
population = 3;
product = 3;
years = 3;
months = 13;
days = 30;
tax= 3.5;
## Define Column Header
Column_Names = "Population_ID",";","Product_Name",";","Product_ID",";","Year",";",
"Month",";","Day","Quantity_sold",";","Sales_Price",";","Discount",
";","Actual_Sales_Price",tax;
## Function to generate sales related information
def sales_data():
for x in range(0,1):
quantity_sold = random.randint(5,20);
discount = random.choice(range(5,11));
sales_price = random.uniform(20,30);
return quantity_sold,round(sales_price,2),discount,round((sales_price)-(sales_price*discount)+(sales_price*tax));
## Format the month to quarter and return the value
def quarter(month):
if month >= 1 and month <= 3:
return "Q1";
elif month > 3 and month <= 6:
return "Q2";
elif month > 6 and month <= 9:
return "Q3";
else:
return "Q4";
## Generate product_id
def product_name():
str2 = "PROD";
sample2 = random.sample([1,2,3,4,5,6,7,8,9],5);
string_list = [];
for x in sample2:
string_list.append(str(x));
return (str2+''.join(string_list));
### Main starts here ###
result_log = open("C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv",'w')
print (Column_Names, result_log);
### Loop and Generate Data ###
for pop in range(0,population):
pop = random.randint(55000,85000);
for prod_id in range(0,product):
product_name2 = product_name();
for year in range(1,years):
for month in range(1,months):
for day in range(1,31):
a = sales_data();
rows = str(pop)+";"+product_name2+";"+str(prod_id)+";"+str(year)+";"+str(month)+";"+quarter(month)+";"+str(day)+";"+str(a[0])+";"+str(a[1])+";"+str(a[2])+";"+str(tax)+";"+str(a[3]);
print(rows,file=result_log);
#print (rows);
tax = tax+1;
You need to close a file to have the buffers flushed:
result_log.close()
Better still, use the file object as a context manager and have the with statement close it for you when the block exits:
filename = "C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv"
with result_log = open(filename, 'w'):
# code writing to result_log
Rather than manually writing strings with delimiters in between, you should really use the csv module:
import csv
# ..
column_names = (
"Population_ID", "Product_Name", "Product_ID", "Year",
"Month", "Day", "Quantity_sold", "Sales_Price", "Discount",
"Actual_Sales_Price", tax)
# ..
with result_log = open(filename, 'wb'):
writer = csv.writer(result_log, delimiter=';')
writer.writerow(column_names)
# looping
row = [pop, product_name2, prod_id, year, month, quarter(month), day,
a[0], a[1], a[2], tax, a[3]]
writer.writerow(row)