I have the following development which I'm working with the ElementTree and Pandas module in Python:
import xml.etree.ElementTree as ET
import pandas as pd
file_xml = ET.parse('example1.xml')
rootXML = file_xml.getroot()
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
# "Value": signal.attrib["Value"]
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
signal_ig_st = signals_df[signals_df.Name.isin(["Status"])]
row_values_ig_st = signal_ig_st.T
vector_ig_st = row_values_ig_st.iloc[[1]]
signal_nav_DSP_rq = signals_df[signals_df.Name.isin(["SetDSP"])]
row_values_nav_DSP_rq = signal_nav_DSP_rq.T
vector_nav_DSP_rq = row_values_nav_DSP_rq.iloc[[1]]
signal_HMI_st = signals_df[signals_df.Name.isin(["HMI"])]
row_values_HMI_st = signal_HMI_st.T
vector_HMI_st = row_values_HMI_st.iloc[[1]]
signal_delay_ac = signals_df[signals_df.Name.isin(["Delay"])]
row_values_delay_ac = signal_delay_ac.T
vector_delay_ac = row_values_delay_ac.iloc[[1]]
signal_AutoConfigO_Rear = signals_df[signals_df.Name.isin(["AutoConfigO_Rear"])]
row_values_AutoConfigO_Rear = signal_AutoConfigO_Rear.T
vector_AutoConfigO_Rear = row_values_AutoConfigO_Rear.iloc[[1]]
signal_ACO_Front = signals_df[signals_df.Name.isin(["AutoConfigO_Front"])]
row_values_ACO_Front = signal_ACO_Front.T
vertor_ACO_Front = row_values_ACO_Front.iloc[[1]]
signal_ACO_Drvr = signals_df[signals_df.Name.isin(["AutoConfigO_Drvr"])]
row_values_ACO_Drvr = signal_ACO_Drvr.T
vector_ACO_Drvr = row_values_ACO_Drvr.iloc[[1]]
signal_ACO_Allst = signals_df[signals_df.Name.isin(["AutoConfigO_Allst"])]
row_values_ACO_Allst = signal_ACO_Allst.T
vector_ACO_Allst = row_values_ACO_Allst.iloc[[1]]
signal_RURRq_st = signals_df[signals_df.Name.isin(["RUResReqstStat"])]
row_values_RURRq_st = signal_RURRq_st.T
vector_RURRq_st = row_values_RURRq_st.iloc[[1]]
signal_RURqSy_st = signals_df[signals_df.Name.isin(["RUReqstrSystem"])]
row_values_RURqSy_st = signal_RURqSy_st.T
vector_RURqSy_st = row_values_RURqSy_st.iloc[[1]]
signal_RUAudS_st = signals_df[signals_df.Name.isin(["RUSource"])]
row_values_RUAudS_st = signal_RUAudS_st.T
vector_RUAudS_st = row_values_RUAudS_st.iloc[[1]]
signal_DSP_st = signals_df[signals_df.Name.isin(["DSP"])]
row_values_DSP = signal_DSP.T
vector_DSP = row_values_DSP.iloc[[1]]
print('1: ', vector_ig_st)
print('2: ', vector_nav_DSP_rq)
print('3: ', vector_HMI_st)
print('4: ', vector_delay_ac)
The output of the above is the following, they are the first 4 prints and it is fine, because it is what they want, but I have to simplify the code, so that any type of xml file of the type example.xml, can be read not only example1.xml:
The simplified code is required to bring the data as it is in the names_list variable, but not to use this variable, which is actually hard-coded:
names_list = [
'Status', 'SetDSP', 'HMI', 'Delay', 'AutoConfigO_Rear',
'AutoConfigO_Front', 'AutoConfigO_Drvr','AutoConfigO_Allst',
'RUResReqstStat', 'RUReqstrSystem', 'RUSource', 'DSP'
]
So when the client wants to put another XML file with the same structure, but with other names that are not in the code, it can read them without problem. Beforehand thank you very much.
I hope I'm understanding the questions correctly. my understanding is that
you want to dynamically produce the extract_name_value() function, and make it not as bulky in your code.
Im sorry, but I failed to comprehend the for i in signal_name: print(i) part of the question. perhaps you can rephrase the question, and help me understand?
my solution to the extract_name_value() part would be using the exec() function.
it is a built-in solution for dynamic execution.
name_list = ['Status', 'SetDSP', 'HMI', 'Delay', 'AutoConfigO_Rear',
'AutoConfigO_Front', 'AutoConfigO_Drvr', 'AutoConfigO_Allst',
'RUResReqstStat', 'RUReqstrSystem', 'RUSource', 'DSP']
def _build_extract_name_value_func(name_list):
extract_name_value_func = ""
for name in name_list:
holder_func = f"""
signal_{name} = signals_df[signals_df.Name.isin([{name}])]
row_values_{name} = signal_{name}.T
vector_{name} = row_values_{name}.iloc[[1]]
vector_list.append(vector_{name})
"""
extract_name_value_func += holder_func
return extract_name_value_func
def extract_name_value(name_list):
extract_name_value_func = build_extract_name_value_func(name_list)
exec(extract_name_value_func)
the code was not tested with actual data, because I am not familiar with handling xml structures. But I hope the python part can be some help to you.
I was able to solve it, I used a for loop and iterated the dataframe itself:
for i in signals_df.Name:
signal = signals_df [signals_df.Name.isin ([i])]
row_values = signal.T
vector = row_values.iloc [[1]]
print (vector)
Related
I wrote a code to convert PDF to CSV, read the CSV file, and export only relevant information from the CSV file. The function is supposed to return filtered information such as english_name: 'someones name', original_language_name: 'someones name' etc, but instead the command returned english_name: '', original_language_name: '' etc. Below is the code that I wrote:
import pandas as pd
import tabula
from pandas import DataFrame
from backend.classes import Shareholder, Officer
from typing import List
def strip_string(string):
return str(string).strip()
def get_float_without_thousands_separator(string, thousands_separator):
return float(string.replace(thousands_separator, ''))
def extract_officers_and_shareholders_lists_from_df(df, total_number_of_shares, no_data_placeholder, number_of_shares, thousands_separator):
officers = []
shareholders = []
NAME = 'Nama'
POSITION = 'Jabatan'
for row in range((df.shape[0])):
if str(df[POSITION][row]).strip() != no_data_placeholder:
original_language_name = strip_string(df[NAME][row])
english_name = strip_string(df[NAME][row])
position = strip_string(df[POSITION][row])
officer = Officer(english_name=english_name, original_language_name=original_language_name, position=position)
officers.append(officer)
elif str(df[number_of_shares][row]).strip() != no_data_placeholder:
original_language_name = strip_string(df[NAME][row])
english_name = strip_string(df[NAME][row])
number_of_shares_string = strip_string(df[number_of_shares][row])
number_of_shares_number = get_float_without_thousands_separator(number_of_shares_string, thousands_separator)
shareholding_percentage = (number_of_shares_number / total_number_of_shares) * 100
shareholder = Shareholder(english_name=english_name, original_language_name=original_language_name, shareholding_percentage=shareholding_percentage)
shareholders.append(shareholder)
return officers, shareholders
def get_officers_and_shareholders_lists(pdf_input_file):
NO_DATA_PLACEHOLDER = '-'
NUMBER_OF_SHARES = 'Jumlah Lembar Saham'
THOUSANDS_SEPARATOR = '.'
output_file_path = 'CSV/Officers_and_Shareholders.csv'
tabula.convert_into(pdf_input_file, output_file_path, output_format='csv', pages='all')
df = pd.read_csv(output_file_path, header=3, on_bad_lines='skip')
all_shares = df[NUMBER_OF_SHARES].to_list()
all_shares_strings = [strip_string(shares) for shares in all_shares if strip_string(shares) != NO_DATA_PLACEHOLDER]
all_shares_numbers = [get_float_without_thousands_separator(shares, THOUSANDS_SEPARATOR) for shares in all_shares_strings]
total_number_of_shares = sum(all_shares_numbers)
return extract_officers_and_shareholders_lists_from_df(
df=df,
total_number_of_shares=total_number_of_shares,
number_of_shares=NUMBER_OF_SHARES,
no_data_placeholder=NO_DATA_PLACEHOLDER,
thousands_separator=THOUSANDS_SEPARATOR)
The command call that I use for the codes on the above is python3 -m backend.officers_and_shareholders. Is there a method to pass in so that english_name returns a name, original_language_name returns a name?
I am trying to fill with Python a table in Word with DocxTemplate and I have some issues to do it properly. I want to use 2 dictionnaries to fill the data in 1 table, in the figure below.
Table to fill
The 2 dictionnaries are filled in a loop and I write the template document at the end.
The input document to create my dictionnaries is an DB extraction written in SQL.
My main issue is when I want to fill the table with my data in the 2 different dictionnaries.
In the code below I will give as an example the 2 dictionnaries with values in it.
# -*- coding: utf8 -*-
#
#
from docxtpl import DocxTemplate
if __name__ == "__main__":
document = DocxTemplate("template.docx")
DicoOccuTable = {'`num_carnet_adresses`': '`annuaire_telephonique`\n`carnet_adresses`\n`carnet_adresses_complement',
'`num_eleve`': '`CFA_apprentissage_ctrl_coherence`\n`CFA_apprentissage_ctrl_examen`}
DicoChamp = {'`num_carnet_adresses`': 72, '`num_eleve`': 66}
template_values = {}
#
template_values["keys"] = [[{"name":cle, "occu":val} for cle,val in DicoChamp.items()],
[{"table":vals} for cles,vals in DicoOccuTable.items()]]
#
document.render(template_values)
document.save('output/' + nomTable.replace('`','') + '.docx')
As a result the two lines for the table are created but nothing is written within...
I would like to add that it's only been 1 week that I work on Python, so I feel that I don't manage properly the different objects here.
If you have any suggestion to help me, I would appreciate it !
I put here the loop to create the dictionnaries, it may help you to understand why I coded it wrong :)
for c in ChampList:
with open("db_reference.sql", "r") as f:
listTable = []
line = f.readlines()
for l in line:
if 'CREATE TABLE' in l:
begin = True
linecreateTable = l
x = linecreateTable.split()
nomTable = x[2]
elif c in l and begin == True:
listTable.append(nomTable)
elif ') ENGINE=MyISAM DEFAULT CHARSET=latin1;' in l:
begin = False
nbreOccu=len(listTable)
Tables = "\n".join(listTable)
DicoChamp.update({c:nbreOccu})
DicoOccuTable.update({c:Tables})
# DicoChamp = {c:nbreOccu}
template_values = {}
Thank You very much !
Finally I found a solution for this problem. Here it is.
Instead of using 2 dictionnaries I created 1 dictionnary with this strucuture :
Dico = { Champ : [Occu , Tables] }
The full code for creating the table is detailed below :
from docxtpl import DocxTemplate
document = DocxTemplate("template.docx")
template_values = {}
Context = {}
for c in ChampList:
listTable = []
nbreOccu = 0
OccuTables = []
with open("db_reference.sql", "r") as g:
listTable = []
ligne = g.readlines()
for li in ligne:
if 'CREATE TABLE' in li:
begin = True
linecreateTable2 = li
y = linecreateTable2.split()
nomTable2 = y[2]
elif c in li and begin == True:
listTable.append(nomTable2)
elif ') ENGINE=MyISAM DEFAULT CHARSET=latin1;' in li:
begin = False
elif '/*!40101 SET COLLATION_CONNECTION=#OLD_COLLATION_CONNECTION */;' in li:
nbreOccu=len(listTable)
inter = "\n".join(listTable)
OccuTables.append(nbreOccu)
OccuTables.append(inter)
ChampNumPropre = c.replace('`','')
Context.update({ChampNumPropre:OccuTables})
else:
continue
template_values["keys"] = [{"label":cle, "cols":val} for cle,val in Context.items()]
#
document.render(template_values)
document.save('output/' + nomTable.replace('`','') + '.docx')
And I used a table with the following structure :
I hope you will find your answers here and good luck !
I am trying to implement a "user-friendly" portfolio optimization program in Python.
Since I am still a beginner I did not quite manage to realize it.
The only thing the program should use as input are the stock codes.
I tried to create a mwe below:
import numpy as np
import yfinance as yf
import pandas as pd
def daily_returns(price):
price = price.to_numpy()
shift_1 = price[1:]
shift_2 = price[:-1]
return (shift_1 - shift_2)/shift_1
def annual_returns(price):
price = price.to_numpy()
start = price[0]
end = price[len(price)-1]
return (end-start)/start
def adjusting(price):
adj = len(price)
diff = adj - adjvalue
if diff != 0:
price_new = price[:-diff]
else: price_new = price
return price_new
#Minimal Reproducible Example
#getting user input
names = input('Stock codes:')
names = names.split()
a = len(names)
msft = yf.Ticker(names[0])
aapl = yf.Ticker(names[1])
#import data
hist_msft = msft.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_msft = pd.DataFrame(hist_msft,columns=['Close'])
#hist_msft = hist_msft.to_numpy()
hist_aapl = aapl.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_aapl = pd.DataFrame(hist_aapl,columns=['Close'])
#hist_aapl = hist_aapl.to_numpy()
#daily returns
aapl_daily_returns = daily_returns(hist_aapl)
aapl_daily_returns = np.ravel(aapl_daily_returns)
msft_daily_returns = daily_returns(hist_msft)
msft_daily_returns = np.ravel(msft_daily_returns)
#adjusting for different trading periods
adjvalue = min(len(aapl_daily_returns),len(msft_daily_returns))
aapl_adj = adjusting(aapl_daily_returns)
msft_adj = adjusting(msft_daily_returns)
#annual returns
aapl_ann_returns = annual_returns(hist_aapl)
msft_ann_returns = annual_returns(hist_msft)
#inputs for optimization
cov_mat = np.cov([aapl_adj,msft_adj])*252
ann_returns = np.concatenate((aapl_ann_returns,msft_ann_returns))
Now I just want the code to work with a various, unknown number of inputs. I tried reading a lot about global variables or tried to figure it out with dictionaries but couldn't really achieve any progress.
I think using the for loop can solve your problem!
...
names = input('Stock codes:')
names = names.split()
for name in names:
#analyze here
#I don't know anything about stocks so I wont write anything here
...
I am trying to make a form where if I input a medicine's name, it will show the solution of the medicine serially. But it is kind of limited bythe way I'm making it like more lines I'll code more spaces they will get to have the number of feedback. It would be great if you could help me to make something short but have the infinity process like loop.
df = pd.DataFrame({'FEVER':['NAPA_PLUS','JERIN','PARASITAMOL'],
'GASTRIC':['SECLO40','SECLO20','ANTACID'],
'WATERINESS':['ORSALINE','TESTY_SALINE','HOME_MADE_SALINE']})
def word_list(text):
return list(filter(None, re.split('\W+', text)))
session = raw_input("INPUT THE NAME OF THE MEDICINES ONE BY ONE BY KEEPING SPACE:")
feedback = session
print(word_list(feedback))
dff = pd.DataFrame({'itemlist':[feedback]})
dff['1'] = dff['itemlist'].astype(str).str.split().str[0]
dff['2'] = dff['itemlist'].astype(str).str.split().str[1]
dff['3'] = dff['itemlist'].astype(str).str.split().str[2]
dff['4'] = dff['itemlist'].astype(str).str.split().str[3]
dff['5'] = dff['itemlist'].astype(str).str.split().str[4]
for pts1 in dff['1']:
pts1 = df.columns[df.isin([pts1]).any()]
for pts2 in dff['2']:
pts2 = df.columns[df.isin([pts2]).any()]
for pts3 in dff['3']:
pts3 = df.columns[df.isin([pts3]).any()]
for pts4 in dff['4']:
pts4 = df.columns[df.isin([pts4]).any()]
for pts5 in dff['5']:
pts5 = df.columns[df.isin([pts5]).any()]
This wraps your repeated code into two loops:
...
dff = pd.DataFrame({'itemlist':[feedback]})
limit = 5
for i in xrange(limit):
name = str(i+1)
dff[name] = dff['itemlist'].astype(str).str.split().str[i]
for pts in dff[name]:
pts = df.columns[df.isin([pts]).any()]
Is it possible to use a for loop to search through the text of tags that correspond to a certain phrase. I've been trying to create this loop but isn't hasn't been working. Any help is appreciated thanks! Here is my code:
def parse_page(self, response):
titles2 = response.xpath('//div[#id = "mainColumn"]/h1/text()').extract_first()
year = response.xpath('//div[#id = "mainColumn"]/h1/span/text()').extract()[0].strip()
aud = response.xpath('//div[#id="scorePanel"]/div[2]')
a_score = aud.xpath('./div[1]/a/div/div[2]/div[1]/span/text()').extract()
a_count = aud.xpath('./div[2]/div[2]/text()').extract()
c_score = response.xpath('//a[#id = "tomato_meter_link"]/span/span[1]/text()').extract()[0].strip()
c_count = response.xpath('//div[#id = "scoreStats"]/div[3]/span[2]/text()').extract()[0].strip()
info = response.xpath('//div[#class="panel-body content_body"]/ul')
mp_rating = info.xpath('./li[1]/div[2]/text()').extract()[0].strip()
genre = info.xpath('./li[2]/div[2]/a/text()').extract_first()
date = info.xpath('./li[5]/div[2]/time/text()').extract_first()
box = response.xpath('//section[#class = "panel panel-rt panel-box "]/div')
actor1 = box.xpath('./div/div[1]/div/a/span/text()').extract()
actor2 = box.xpath('./div/div[2]/div/a/span/text()').extract()
actor3 = box.xpath('./div/div[3]/div/a/span/text()').extract_first()
for x in info.xpath('//li'):
if info.xpath("./li[x]/div[1][contains(text(), 'Box Office: ')/text()]]
box_office = info.xpath('./li[x]/div[2]/text()')
else if info.xpath('./li[x]/div[1]/text()').extract[0] == "Runtime: "):
runtime = info.xpath('./li[x]/div[2]/time/text()')
Your for loop is completely wrong:
1. You're using info. but searching from the root
for x in info.xpath('.//li'):
2. x is a HTML node element and you can use it this way:
if x.xpath("./div[1][contains(., 'Box Office: ')]"):
box_office = x.xpath('./div[2]/text()').extract_first()
I think you might need re() or re_first() to match the certain phrase.
For example:
elif info.xpath('./li[x]/div[1]/text()').re_first('Runtime:') == "Runtime: "):
runtime = info.xpath('./li[x]/div[2]/time/text()')
And you need to modify your for loop, cuz the variable x in it is actually a Selector but not a number, so it's not right to use it like this: li[x].
gangabass in the last answer made a good point on this.