QuantLib cpibond bond example in Python - python

I am trying to get the official C++ cpibond example working in Python. The original example is here: https://github.com/lballabio/quantlib/blob/master/QuantLib/test-suite/inflationcpibond.cpp and for scala here: https://github.com/lballabio/quantlib/blob/master/QuantLib-SWIG/Scala/examples/CPIBond.scala
When I run what I have attempted I get this error:
RuntimeError: 1st iteration: failed at 1st alive instrument, maturity
September 1st, 2010, reference date September 1st, 2009: 2nd leg:
Missing UK RPI fixing for September 1st, 2009
Here is my attempt:
import QuantLib as ql
calendar = ql.UnitedKingdom()
dayCounter = ql.ActualActual();
convention = ql.ModifiedFollowing
today = ql.Date(20, 11, 2009)
evaluationDate = calendar.adjust(today)
ql.Settings.instance().setEvaluationDate(evaluationDate)
yTS = ql.YieldTermStructureHandle(ql.FlatForward(evaluationDate, 0.05, dayCounter))
from_date = ql.Date(20, ql.July, 2007);
to_date = ql.Date(20, ql.November, 2009);
tenor = ql.Period(1, ql.Months)
rpiSchedule = ql.Schedule(from_date, to_date, tenor, calendar,
convention, convention,
ql.DateGeneration.Backward, False)
cpiTS = ql.RelinkableZeroInflationTermStructureHandle()
inflationIndex = ql.UKRPI(False, cpiTS)
fixData = [206.1, 207.3, 208.0, 208.9, 209.7, 210.9,
209.8, 211.4, 212.1, 214.0, 215.1, 216.8,
216.5, 217.2, 218.4, 217.7, 216,
212.9, 210.1, 211.4, 211.3, 211.5,
212.8, 213.4, 213.4, 213.4, 214.4,213.4, 214.4]
dte_fixings=[dtes for dtes in rpiSchedule]
print len(dte_fixings)
print len(fixData)
#must be the same length
inflationIndex.addFixings(dte_fixings, fixData)
observationLag = ql.Period(2, ql.Months)
zciisData =[( ql.Date(25, ql.November, 2010), 3.0495 ),
( ql.Date(25, ql.November, 2011), 2.93 ),
( ql.Date(26, ql.November, 2012), 2.9795 ),
( ql.Date(25, ql.November, 2013), 3.029 ),
( ql.Date(25, ql.November, 2014), 3.1425 ),
( ql.Date(25, ql.November, 2015), 3.211 ),
( ql.Date(25, ql.November, 2016), 3.2675 ),
( ql.Date(25, ql.November, 2017), 3.3625 ),
( ql.Date(25, ql.November, 2018), 3.405 ),
( ql.Date(25, ql.November, 2019), 3.48 ),
( ql.Date(25, ql.November, 2021), 3.576 ),
( ql.Date(25, ql.November, 2024), 3.649 ),
( ql.Date(26, ql.November, 2029), 3.751 ),
( ql.Date(27, ql.November, 2034), 3.77225),
( ql.Date(25, ql.November, 2039), 3.77 ),
( ql.Date(25, ql.November, 2049), 3.734 ),
( ql.Date(25, ql.November, 2059), 3.714 )]
lRates=[rtes/100.0 for rtes in zip(*zciisData)[1]]
baseZeroRate = lRates[0]
zeroSwapHelpers = [ql.ZeroCouponInflationSwapHelper(a[1]/100,observationLag,
a[0], calendar, convention, dayCounter, inflationIndex) for a in zciisData]
cpiTS.linkTo(ql.PiecewiseZeroInflation(
evaluationDate, calendar, dayCounter, observationLag,
inflationIndex.frequency(), inflationIndex.interpolated(),
baseZeroRate,
yTS, zeroSwapHelpers, 1.0e-12, ql.Linear()))
notional = 1000000
fixedRates = [0.1]
fixedDayCounter = ql.Actual365Fixed()
fixedPaymentConvention = ql.ModifiedFollowing
fixedPaymentCalendar = ql.UnitedKingdom()
contractObservationLag = ql.Period(3, ql.Months)
observationInterpolation = ql.CPI.Flat
settlementDays = 3
growthOnly = True
baseCPI = 206.1
startDate = ql.Date(2, 10, 2007)
endDate = ql.Date(2, 10, 2052)
fixedSchedule = ql.Schedule(startDate, endDate,
ql.Period(6, ql.Months), fixedPaymentCalendar,
ql.Unadjusted,
ql.Unadjusted,
ql.DateGeneration.Backward, False)
bond = ql.CPIBond(settlementDays, notional, growthOnly,
baseCPI, contractObservationLag,
inflationIndex, observationInterpolation,
fixedSchedule, fixedRates, fixedDayCounter,
fixedPaymentConvention)
bondEngine=ql.DiscountingBondEngine(yTS)
bond.setPricingEngine(bondEngine)
print bond.NPV()
print bond.cleanPrice()
Most of my problem is that I am finding it difficult to get to grips with how the objects fit together.

got the above example working:
import QuantLib as ql
import datetime as dt
calendar = ql.UnitedKingdom()
dayCounter = ql.ActualActual();
convention = ql.ModifiedFollowing
lag = 3
today = ql.Date(5,3,2008)
evaluationDate = calendar.adjust(today)
issue_date = calendar.advance(evaluationDate,-1, ql.Years)
maturity_date = ql.Date(2,9,2052)
fixing_date = calendar.advance(evaluationDate,-lag, ql.Months)
ql.Settings.instance().setEvaluationDate(evaluationDate)
yTS = ql.YieldTermStructureHandle(ql.FlatForward(evaluationDate, 0.05, dayCounter))
tenor = ql.Period(1, ql.Months)
from_date = ql.Date(20, ql.July, 2007);
to_date = ql.Date(20, ql.November, 2009);
rpiSchedule = ql.Schedule(from_date, to_date, tenor, calendar,
convention, convention,
ql.DateGeneration.Backward, False)
# this is the going to be holder the inflation curve.
cpiTS = ql.RelinkableZeroInflationTermStructureHandle()
inflationIndex = ql.UKRPI(False, cpiTS)
fixData = [206.1, 207.3, 208.0, 208.9, 209.7, 210.9,
209.8, 211.4, 212.1, 214.0, 215.1, 216.8,
216.5, 217.2, 218.4, 217.7, 216,
212.9, 210.1, 211.4, 211.3, 211.5,
212.8, 213.4, 213.4, 213.4, 214.4]
dte_fixings=[dtes for dtes in rpiSchedule]
print len(fixData)
print len(dte_fixings[:len(fixData)])
#must be the same length
#inflationIndex.addFixings(dte_fixings[:len(fixData)], fixData)
#Current CPI level
#last observed rate
fixing_rate = 214.4
inflationIndex.addFixing(fixing_date, fixing_rate)
observationLag = ql.Period(lag, ql.Months)
zciisData =[( ql.Date(25, ql.November, 2010), 3.0495 ),
( ql.Date(25, ql.November, 2011), 2.93 ),
( ql.Date(26, ql.November, 2012), 2.9795 ),
( ql.Date(25, ql.November, 2013), 3.029 ),
( ql.Date(25, ql.November, 2014), 3.1425 ),
( ql.Date(25, ql.November, 2015), 3.211 ),
( ql.Date(25, ql.November, 2016), 3.2675 ),
( ql.Date(25, ql.November, 2017), 3.3625 ),
( ql.Date(25, ql.November, 2018), 3.405 ),
( ql.Date(25, ql.November, 2019), 3.48 ),
( ql.Date(25, ql.November, 2021), 3.576 ),
( ql.Date(25, ql.November, 2024), 3.649 ),
( ql.Date(26, ql.November, 2029), 3.751 ),
( ql.Date(27, ql.November, 2034), 3.77225),
( ql.Date(25, ql.November, 2039), 3.77 ),
( ql.Date(25, ql.November, 2049), 3.734 ),
( ql.Date(25, ql.November, 2059), 3.714 )]
#lRates=[rtes/100.0 for rtes in zip(*zciisData)[1]]
#baseZeroRate = lRates[0]
zeroSwapHelpers = [ql.ZeroCouponInflationSwapHelper(rate/100,observationLag,
date, calendar, convention, dayCounter, inflationIndex) for date,rate in zciisData]
# the derived inflation curve
jj=ql.PiecewiseZeroInflation(
evaluationDate, calendar, dayCounter, observationLag,
inflationIndex.frequency(), inflationIndex.interpolated(),
zciisData[0][1],#baseZeroRate,
yTS, zeroSwapHelpers, 1.0e-12, ql.Linear())
cpiTS.linkTo(jj)
notional = 1000000
fixedRates = [0.1]
fixedDayCounter = ql.Actual365Fixed()
fixedPaymentConvention = ql.ModifiedFollowing
fixedPaymentCalendar = ql.UnitedKingdom()
contractObservationLag = ql.Period(3, ql.Months)
observationInterpolation = ql.CPI.Flat
settlementDays = 3
growthOnly = False
baseCPI = 206.1
fixedSchedule = ql.Schedule(issue_date,
maturity_date,
ql.Period(ql.Semiannual),
fixedPaymentCalendar,
ql.Unadjusted,
ql.Unadjusted,
ql.DateGeneration.Backward,
False)
bond = ql.CPIBond(settlementDays,
notional,
growthOnly,
baseCPI,
contractObservationLag,
inflationIndex,
observationInterpolation,
fixedSchedule,
fixedRates,
fixedDayCounter,
fixedPaymentConvention)
#bond2= ql.QuantLib.C
bondEngine=ql.DiscountingBondEngine(yTS)
bond.setPricingEngine(bondEngine)
print bond.NPV()
print bond.cleanPrice()
compounding = ql.Compounded
yield_rate = bond.bondYield(fixedDayCounter,compounding,ql.Semiannual)
y_curve = ql.InterestRate(yield_rate,fixedDayCounter,compounding,ql.Semiannual)
##Collate results
print "Clean Price:", bond.cleanPrice()
print "Dirty Price:", bond.dirtyPrice()
print "Notional:", bond.notional()
print "Yield:", yield_rate
print "Accrued Amount:", bond.accruedAmount()
print "Settlement Value:", bond.settlementValue()
#suspect there's more to this for TIPS
print "Duration:", ql.BondFunctions.duration(bond,y_curve)
print "Convexity:", ql.BondFunctions.convexity(bond,y_curve)
print "Bps:", ql.BondFunctions.bps(bond,y_curve)
print "Basis Point Value:", ql.BondFunctions.basisPointValue(bond,y_curve)
print "Yield Value Basis Point:", ql.BondFunctions.yieldValueBasisPoint(bond,y_curve)
print "NPV:", bond.NPV()
# get the cash flows:
#cf_list=[(cf.amount(),cf.date()) for cf in bond.cashflows()]
def to_datetime(d):
return dt.datetime(d.year(),d.month(), d.dayOfMonth())
for cf in bond.cashflows():
try:
amt=cf.amount()
rte=jj.zeroRate(cf.date())
zc=yTS.zeroRate(cf.date(),fixedDayCounter,compounding,ql.Semiannual).rate()
except:
amt=0
rte=0
zc=0
print to_datetime(cf.date()),amt,rte,zc
The issue it seems was that the inflationIndex object needed one date instead of multiple index points. My assumption was it would pull out the latest valid point.
The way the pricer works is that the real coupons are increased by the inflation rate curve term structure, zciisData. The result therefore becomes a nominal future cash flow. To price then the bondpricer simply discounts these by the nominal term structure. I added some additional code to print the determined cash flows and the "growth factor" and then the discount rate.

Related

For Loops Help in python

Need your help with this?
I am trying match list newtracks(name of songs in folder) with lists filezto and album from spreadsheet.
If the name of songs in folder match list filezto and have an album of Nasb copy it to folders list(ordfldrtosrt) and if not nasb album copy songs to list(ntsubfldrs).
For some reason when ran the if statement does not synch them up in order to print that they match and copy the files to the folders. I have tried different option but cannot make it work. Thanks in advance.
albm = ["NASB", "NASB", "NASB", "NASB", "NTSB", "NTSB", "NTSB", "NTSB"]
newtracks = [
"Track1.mp3",
"Track2.mp3",
"Track3.mp3",
"Track4.mp3",
"Track5.mp3",
"Track5.mp3",
]
filezto = [
"Track1.mp3",
"Track2.mp3",
"Track3.mp3",
"Track4.mp3",
"Track5.mp3",
"Track5.mp3",
]
ordfldrtosrt = ["1-GEN-GENE", "2 - EXO - Exos", "3 - LEV - Lev", "4 - NUM - Numb"]
ntsubfldrs = ["1-MAT-Matt", "2 - MRK - Mar", "3 - LUK - LI", "4 - JHN - Jo"]
for album in albm:
for newtracks, filezto, ordfldrtosrt, ntsubfldrs in (
(ntk, ft, otm, nts)
for ntk in newtracks
for ft in filezto
for otm in ordfldrtosrt
for nts in ntsubfldrs
):
if album == "Nasb" and newtracks == filezto:
print("Yes", newtracks, " files to", filezto)
# shutil.copy('C:\\Users\\mrdrj\\Desktop\\SJ\\NASB\\'+newtracks, 'D:\\Desktop\\pythonProject\\Nasb\\'+ ordfldrtosrt+'\\'+newtracks)
else:
print("No", newtracks, " files to", filezto)
# shutil.copy('C:\\Users\\mrdrj\\Desktop\\SJ\\NASB\\'+newtracks, 'D:\\Desktop\\pythonProject\\Nasb\\'+ ntsubfldrs +'\\'+newtracks)
From your last comment, I am guessing that you have have a list of (album, track) from your spreadsheet and you are trying to match that up with a list of tracks from one of the albums in folder:
folder = [
"Track1.mp3",
"Track2.mp3",
"Track3.mp3",
"Track4.mp3",
"Track5.mp3",
"Track5.mp3",
]
spreadsheet = [
( "NASB", "Track1.mp3" ),
( "NASB", "Track2.mp3" ),
( "NASB", "Track3.mp3" ),
( "NASB", "Track4.mp3" ),
( "NASB", "Track5.mp3" ),
( "NASB", "Track5.mp3" ),
( "NTSB", "Track1.mp3" ),
( "NTSB", "Track2.mp3" ),
( "NTSB", "Track3.mp3" ),
( "NTSB", "Track4.mp3" ),
( "NTSB", "Track5.mp3" ),
( "NTSB", "Track5.mp3" )
]
for album, track in spreadsheet:
if album == 'NASB' and track in folder:
print('%s %s' % ('Yes', track))
else:
print('%s %s' % ('No', track))
with gives you the output:
Yes Track1.mp3
Yes Track2.mp3
Yes Track3.mp3
Yes Track4.mp3
Yes Track5.mp3
Yes Track5.mp3
No Track1.mp3
No Track2.mp3
No Track3.mp3
No Track4.mp3
No Track5.mp3
No Track5.mp3

python last working day of month (with CustomBusinessDay)?

I like to calculate last working day before or after a specific date(includes holidays, not just weekends)?
import datetime as dt
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
USLaborDay, USThanksgivingDay
class USTradingCalendar(AbstractHolidayCalendar):
rules = [
Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
def get_trading_close_holidays(fromyear, toyear):
inst = USTradingCalendar()
return inst.holidays(dt.datetime(fromyear-1, 12, 31), dt.datetime(toyear, 12, 31))
print(get_trading_close_holidays(2018,2018))
>> DatetimeIndex(['2018-01-01', '2018-01-15', '2018-02-19', '2018-03-30', '2018-05-28', '2018-07-04', '2018-09-03', '2018-11-22', '2018-12-25'], dtype='datetime64[ns]', freq=None)
import datetime as dt
from pandas.tseries.holiday import USFederalHolidayCalendar
bday_us = CustomBusinessDay(calendar=get_trading_close_holidays(2000,2050))
d = dt.datetime(2018, 3, 31)
d - bday_us
>> Timestamp('2018-03-30 00:00:00')
This falls on Good Friday, that holiday(as shown)... should show 1 day before = 2018-03-29...
What's the issue?
I was able to reproduce the problem and after some testing I've narrowed it down to using a DatetimeIndex as the input of the calendar parameter in CustomBusinessDay.
You can skip that and use the calendar instance directly:
import datetime as dt
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
USLaborDay, USThanksgivingDay
from pandas.tseries.offsets import CustomBusinessDay, BDay
class USTradingCalendar(AbstractHolidayCalendar):
rules = [
Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
bday_us = CustomBusinessDay(calendar=USTradingCalendar())
d = dt.datetime(2018, 3, 31)
c = d - bday_us
print(c)
The output:
2018-03-29 00:00:00

How to filter two datetime indices?

I have two datetime indices - one being a date_range of business days and the other being a list of holidays.
I filter the holiday list by a start and end date. But now I need to join them and drop any duplicates (holidays and trading days both exist).
Finally I need to convert the daterange into a list of formatted strings ie: yyyy_mm_dd that I can iterate through later.
Here is my code so far:
import datetime
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
USLaborDay, USThanksgivingDay
class USTradingCalendar(AbstractHolidayCalendar):
rules = [
Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
def get_trading_close_holidays(year):
inst = USTradingCalendar()
return inst.holidays(datetime.datetime(year-1, 12, 31),
datetime.datetime(year, 12, 31))
start_date = "2017_07_01"
end_date = "2017_08_31"
start_date = datetime.datetime.strptime(start_date,"%Y_%m_%d").date()
end_date = datetime.datetime.strptime(end_date,"%Y_%m_%d").date()
date_range = pd.bdate_range(start = start_date, end = end_date, name =
"trading_days")
holidays = get_trading_close_holidays(start_date.year)
holidays = holidays.where((holidays.date > start_date) &
(holidays.date < end_date))
holidays = holidays.dropna(how = 'any')
date_range = date_range.where(~(date_range.trading_days.isin(holidays)))
Consider filtering by boolean condition:
date_range = date_range[date_range.date != holidays.date]
print(date_range) # ONE HOLIDAY 2017-07-04 DOES NOT APPEAR
# DatetimeIndex(['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07',
# '2017-07-10', '2017-07-11', '2017-07-12', '2017-07-13',
# '2017-07-14', '2017-07-17', '2017-07-18', '2017-07-19',
# '2017-07-20', '2017-07-21', '2017-07-24', '2017-07-25',
# '2017-07-26', '2017-07-27', '2017-07-28', '2017-07-31',
# '2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
# '2017-08-07', '2017-08-08', '2017-08-09', '2017-08-10',
# '2017-08-11', '2017-08-14', '2017-08-15', '2017-08-16',
# '2017-08-17', '2017-08-18', '2017-08-21', '2017-08-22',
# '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-28',
# '2017-08-29', '2017-08-30', '2017-08-31'],
# dtype='datetime64[ns]', name='trading_days', freq=None)
And using astype() to convert the datetime index to string type array, even tostring() for list conversion:
strdates = date_range.date.astype('str').tolist()
print(strdates)
# ['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07', '2017-07-10',
# '2017-07-11', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-17',
# '2017-07-18', '2017-07-19', '2017-07-20', '2017-07-21', '2017-07-24',
# '2017-07-25', '2017-07-26', '2017-07-27', '2017-07-28', '2017-07-31',
# '2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04', '2017-08-07',
# '2017-08-08', '2017-08-09', '2017-08-10', '2017-08-11', '2017-08-14',
# '2017-08-15', '2017-08-16', '2017-08-17', '2017-08-18', '2017-08-21',
# '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-28',
# '2017-08-29', '2017-08-30', '2017-08-31']

Parse currency into numbers in Python

I just learnt from Format numbers as currency in Python that the Python module babel provides babel.numbers.format_currency to format numbers as currency. For instance,
from babel.numbers import format_currency
s = format_currency(123456.789, 'USD', locale='en_US') # u'$123,456.79'
s = format_currency(123456.789, 'EUR', locale='fr_FR') # u'123\xa0456,79\xa0\u20ac'
How about the reverse, from currency to numbers, such as $123,456,789.00 --> 123456789? babel provides babel.numbers.parse_number to parse local numbers, but I didn't found something like parse_currency. So, what is the ideal way to parse local currency into numbers?
I went through Python: removing characters except digits from string.
# Way 1
import string
all=string.maketrans('','')
nodigs=all.translate(all, string.digits)
s = '$123,456.79'
n = s.translate(all, nodigs) # 12345679, lost `.`
# Way 2
import re
n = re.sub("\D", "", s) # 12345679
It doesn't take care the decimal separator ..
Remove all non-numeric characters, except for ., from a string (refer to here),
import re
# Way 1:
s = '$123,456.79'
n = re.sub("[^0-9|.]", "", s) # 123456.79
# Way 2:
non_decimal = re.compile(r'[^\d.]+')
s = '$123,456.79'
n = non_decimal.sub('', s) # 123456.79
It does process the decimal separator ..
But the above solutions don't work when coming to, for instance,
from babel.numbers import format_currency
s = format_currency(123456.789, 'EUR', locale='fr_FR') # u'123\xa0456,79\xa0\u20ac'
new_s = s.encode('utf-8') # 123 456,79 €
As you can see, the format of currency varies. What is the ideal way to parse currency into numbers in a general way?
Below is a general currency parser that doesn't rely on the babel library.
import numpy as np
import re
def currency_parser(cur_str):
# Remove any non-numerical characters
# except for ',' '.' or '-' (e.g. EUR)
cur_str = re.sub("[^-0-9.,]", '', cur_str)
# Remove any 000s separators (either , or .)
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
if '.' in list(cur_str[-3:]):
num = float(cur_str)
elif ',' in list(cur_str[-3:]):
num = float(cur_str.replace(',', '.'))
else:
num = float(cur_str)
return np.round(num, 2)
Here is a pytest script that tests the function:
import numpy as np
import pytest
import re
def currency_parser(cur_str):
# Remove any non-numerical characters
# except for ',' '.' or '-' (e.g. EUR)
cur_str = re.sub("[^-0-9.,]", '', cur_str)
# Remove any 000s separators (either , or .)
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
if '.' in list(cur_str[-3:]):
num = float(cur_str)
elif ',' in list(cur_str[-3:]):
num = float(cur_str.replace(',', '.'))
else:
num = float(cur_str)
return np.round(num, 2)
#pytest.mark.parametrize('currency_str, expected', [
(
'.3', 0.30
),
(
'1', 1.00
),
(
'1.3', 1.30
),
(
'43,324', 43324.00
),
(
'3,424', 3424.00
),
(
'-0.00', 0.00
),
(
'EUR433,432.53', 433432.53
),
(
'25.675,26 EUR', 25675.26
),
(
'2.447,93 EUR', 2447.93
),
(
'-540,89EUR', -540.89
),
(
'67.6 EUR', 67.60
),
(
'30.998,63 CHF', 30998.63
),
(
'0,00 CHF', 0.00
),
(
'159.750,00 DKK', 159750.00
),
(
'£ 2.237,85', 2237.85
),
(
'£ 2,237.85', 2237.85
),
(
'-1.876,85 SEK', -1876.85
),
(
'59294325.3', 59294325.30
),
(
'8,53 NOK', 8.53
),
(
'0,09 NOK', 0.09
),
(
'-.9 CZK', -0.9
),
(
'35.255,40 PLN', 35255.40
),
(
'-PLN123.456,78', -123456.78
),
(
'US$123.456,79', 123456.79
),
(
'-PLN123.456,78', -123456.78
),
(
'PLN123.456,79', 123456.79
),
(
'IDR123.457', 123457
),
(
'JP¥123.457', 123457
),
(
'-JP\xc2\xa5123.457', -123457
),
(
'CN\xc2\xa5123.456,79', 123456.79
),
(
'-CN\xc2\xa5123.456,78', -123456.78
),
])
def test_currency_parse(currency_str, expected):
assert currency_parser(currency_str) == expected
Using babel
The babel documentation notes that the number parsing is not fully implemented yes but they have done a lot of work to get currency info into the library. You can use get_currency_name() and get_currency_symbol() to get currency details, and also all other get_... functions to get the normal number details (decimal point, minus sign, etc.).
Using that information you can exclude from a currency string the currency details (name, sign) and groupings (e.g. , in the US). Then you change the decimal details into the ones used by the C locale (- for minus, and . for the decimal point).
This results in this code (i added an object to keep some of the data, which may come handy in further processing):
import re, os
from babel import numbers as n
from babel.core import default_locale
class AmountInfo(object):
def __init__(self, name, symbol, value):
self.name = name
self.symbol = symbol
self.value = value
def parse_currency(value, cur):
decp = n.get_decimal_symbol()
plus = n.get_plus_sign_symbol()
minus = n.get_minus_sign_symbol()
group = n.get_group_symbol()
name = n.get_currency_name(cur)
symbol = n.get_currency_symbol(cur)
remove = [plus, name, symbol, group]
for token in remove:
# remove the pieces of information that shall be obvious
value = re.sub(re.escape(token), '', value)
# change the minus sign to a LOCALE=C minus
value = re.sub(re.escape(minus), '-', value)
# and change the decimal mark to a LOCALE=C decimal point
value = re.sub(re.escape(decp), '.', value)
# just in case remove extraneous spaces
value = re.sub('\s+', '', value)
return AmountInfo(name, symbol, value)
#cur_loc = os.environ['LC_ALL']
cur_loc = default_locale()
print('locale:', cur_loc)
test = [ (n.format_currency(123456.789, 'USD', locale=cur_loc), 'USD')
, (n.format_currency(-123456.78, 'PLN', locale=cur_loc), 'PLN')
, (n.format_currency(123456.789, 'PLN', locale=cur_loc), 'PLN')
, (n.format_currency(123456.789, 'IDR', locale=cur_loc), 'IDR')
, (n.format_currency(123456.789, 'JPY', locale=cur_loc), 'JPY')
, (n.format_currency(-123456.78, 'JPY', locale=cur_loc), 'JPY')
, (n.format_currency(123456.789, 'CNY', locale=cur_loc), 'CNY')
, (n.format_currency(-123456.78, 'CNY', locale=cur_loc), 'CNY')
]
for v,c in test:
print('As currency :', c, ':', v.encode('utf-8'))
info = parse_currency(v, c)
print('As value :', c, ':', info.value)
print('Extra info :', info.name.encode('utf-8')
, info.symbol.encode('utf-8'))
The output looks promising (in US locale):
$ export LC_ALL=en_US
$ ./cur.py
locale: en_US
As currency : USD : b'$123,456.79'
As value : USD : 123456.79
Extra info : b'US Dollar' b'$'
As currency : PLN : b'-z\xc5\x82123,456.78'
As value : PLN : -123456.78
Extra info : b'Polish Zloty' b'z\xc5\x82'
As currency : PLN : b'z\xc5\x82123,456.79'
As value : PLN : 123456.79
Extra info : b'Polish Zloty' b'z\xc5\x82'
As currency : IDR : b'Rp123,457'
As value : IDR : 123457
Extra info : b'Indonesian Rupiah' b'Rp'
As currency : JPY : b'\xc2\xa5123,457'
As value : JPY : 123457
Extra info : b'Japanese Yen' b'\xc2\xa5'
As currency : JPY : b'-\xc2\xa5123,457'
As value : JPY : -123457
Extra info : b'Japanese Yen' b'\xc2\xa5'
As currency : CNY : b'CN\xc2\xa5123,456.79'
As value : CNY : 123456.79
Extra info : b'Chinese Yuan' b'CN\xc2\xa5'
As currency : CNY : b'-CN\xc2\xa5123,456.78'
As value : CNY : -123456.78
Extra info : b'Chinese Yuan' b'CN\xc2\xa5'
And it still works in different locales (Brazil is notable for using the comma as a decimal mark):
$ export LC_ALL=pt_BR
$ ./cur.py
locale: pt_BR
As currency : USD : b'US$123.456,79'
As value : USD : 123456.79
Extra info : b'D\xc3\xb3lar americano' b'US$'
As currency : PLN : b'-PLN123.456,78'
As value : PLN : -123456.78
Extra info : b'Zloti polon\xc3\xaas' b'PLN'
As currency : PLN : b'PLN123.456,79'
As value : PLN : 123456.79
Extra info : b'Zloti polon\xc3\xaas' b'PLN'
As currency : IDR : b'IDR123.457'
As value : IDR : 123457
Extra info : b'Rupia indon\xc3\xa9sia' b'IDR'
As currency : JPY : b'JP\xc2\xa5123.457'
As value : JPY : 123457
Extra info : b'Iene japon\xc3\xaas' b'JP\xc2\xa5'
As currency : JPY : b'-JP\xc2\xa5123.457'
As value : JPY : -123457
Extra info : b'Iene japon\xc3\xaas' b'JP\xc2\xa5'
As currency : CNY : b'CN\xc2\xa5123.456,79'
As value : CNY : 123456.79
Extra info : b'Yuan chin\xc3\xaas' b'CN\xc2\xa5'
As currency : CNY : b'-CN\xc2\xa5123.456,78'
As value : CNY : -123456.78
Extra info : b'Yuan chin\xc3\xaas' b'CN\xc2\xa5'
It is worth to point out that babel has some encoding problems. That is because the locale files (in locale-data) do use different encoding themselves. If you're working with currencies you're familiar with that should not be a problem. But if you try unfamiliar currencies you might run into problems (i just learned that Poland uses iso-8859-2, not iso-8859-1).

I am unable to find a reg-exp to find the items (marked below in bold italics) in a txt file

------------------------------------------------------------------------------------- Suds ( https://fedorahosted.org/suds/ ) version: 0.3.9 (beta) build: R658-20100210
Service ( ndfdXML ) tns="http://www.weather.gov/forecasts/xml/DWMLgen/wsdl/ndfdXML.wsdl" Prefixes (2)
ns0 = "http://schemas.xmlsoap.org/soap/encoding/"
ns1 = "http://www.weather.gov/forecasts/xml/DWMLgen/schema/DWML.xsd" Ports (1):
(ndfdXMLPort)
Methods (12):
***CornerPoints***(ns1:sectorType sector, )
***GmlLatLonList***(ns1:listLatLonType listLatLon, xs:dateTime requestedTime, ns1:featureTypeType featureType, ns1:weatherParametersType weatherParameters, )
***GmlTimeSeries***(ns1:listLatLonType listLatLon, xs:dateTime startTime, xs:dateTime endTime, ns1:compTypeType compType, ns1:featureTypeType featureType, xs:string propertyName, )
***LatLonListCityNames***(ns1:displayLevelType displayLevel, )
***LatLonListLine***(xs:decimal endPoint1Lat, xs:decimal endPoint1Lon, xs:decimal endPoint2Lat, xs:decimal endPoint2Lon, )
***LatLonListSquare***(xs:decimal centerPointLat, xs:decimal centerPointLon, xs:decimal distanceLat, xs:decimal distanceLon, xs:decimal resolution, )
***LatLonListSubgrid***(xs:decimal lowerLeftLatitude, xs:decimal lowerLeftLongitude, xs:decimal upperRightLatitude, xs:decimal upperRightLongitude, xs:decimal resolution, )
***LatLonListZipCode***(ns1:zipCodeListType zipCodeList, )
**NDFDgen**(xs:decimal latitude, xs:decimal longitude, ns1:productType product, xs:dateTime startTime, xs:dateTime endTime, ns1:weatherParametersType weatherParameters, )
***NDFDgenByDay***(xs:decimal latitude, xs:decimal longitude, xs:date startDate, xs:integer numDays, ns1:formatType format, )
***NDFDgenByDayLatLonList***(ns1:listLatLonType listLatLon, xs:date startDate, xs:integer numDays, ns1:formatType format, )
***NDFDgenLatLonList***(ns1:listLatLonType listLatLon, ns1:productType product, xs:dateTime startTime, xs:dateTime endTime, ns1:weatherParametersType weatherParameters, )
Types (60):
ns0:Array
ns0:ENTITIES
ns0:ENTITY
ns0:ID
ns0:IDREF
ns0:IDREFS
ns0:NCName
ns0:NMTOKEN
ns0:NMTOKENS
ns0:NOTATION
ns0:Name
ns0:QName
ns0:Struct
ns0:anyURI
ns0:arrayCoordinate
ns0:base64
ns0:base64Binary
ns0:boolean
ns0:byte
ns1:compTypeType
ns0:date
ns0:dateTime
ns0:decimal
ns1:displayLevelType
ns0:double
ns0:duration
ns1:featureTypeType
ns0:float
ns1:formatType
ns0:gDay
ns0:gMonth
ns0:gMonthDay
ns0:gYear
ns0:gYearMonth
ns0:hexBinary
ns0:int
ns0:integer
ns0:language
ns1:latLonPairType
ns1:listCityNamesType
ns1:listLatLonType
ns0:long
ns0:negativeInteger
ns0:nonNegativeInteger
ns0:nonPositiveInteger
ns0:normalizedString
ns0:positiveInteger
ns1:productType
ns1:sectorType
ns0:short
ns0:string
ns0:time
ns0:token
ns0:unsignedByte
ns0:unsignedInt
ns0:unsignedLong
ns0:unsignedShort
ns1:weatherParametersType
ns1:zipCodeListType
ns1:zipCodeType
-------------------------------------------------------------------------------------------
import re
text='''Service ( ndfdXML ) tns="http://www.weather.gov/forecasts/xml/DWMLgen/wsdl/ndfdXML.wsdl" Prefixes (2) ns0 = "http://schemas.xmlsoap.org/soap/encoding/" ns1 = "http://www.weather.gov/forecasts/xml/DWMLgen/schema/DWML.xsd" Ports (1): (ndfdXMLPort) Methods (12): CornerPoints(ns1:sectorType sector, ) GmlLatLonList(ns1:listLatLonType listLatLon, xs:dateTime requestedTime, ns1:featureTypeType featureType, ns1:weatherParametersType weatherParameters, ) GmlTimeSeries(ns1:listLatLonType listLatLon, xs:dateTime startTime, xs:dateTime endTime, ns1:compTypeType compType, ns1:featureTypeType featureType, xs:string propertyName, ) LatLonListCityNames(ns1:displayLevelType displayLevel, ) LatLonListLine(xs:decimal endPoint1Lat, xs:decimal endPoint1Lon, xs:decimal endPoint2Lat, xs:decimal endPoint2Lon, ) LatLonListSquare(xs:decimal centerPointLat, xs:decimal centerPointLon, xs:decimal distanceLat, xs:decimal distanceLon, xs:decimal resolution, ) LatLonListSubgrid(xs:decimal lowerLeftLatitude, xs:decimal lowerLeftLongitude, xs:decimal upperRightLatitude, xs:decimal upperRightLongitude, xs:decimal resolution, ) LatLonListZipCode(ns1:zipCodeListType zipCodeList, ) NDFDgen(xs:decimal latitude, xs:decimal longitude, ns1:productType product, xs:dateTime startTime, xs:dateTime endTime, ns1:weatherParametersType weatherParameters, ) NDFDgenByDay(xs:decimal latitude, xs:decimal longitude, xs:date startDate, xs:integer numDays, ns1:formatType format, ) NDFDgenByDayLatLonList(ns1:listLatLonType listLatLon, xs:date startDate, xs:integer numDays, ns1:formatType format, ) NDFDgenLatLonList(ns1:listLatLonType listLatLon, ns1:productType product, xs:dateTime startTime, xs:dateTime endTime, ns1:weatherParametersType weatherParameters, ) Types (60): ns0:Array ns0:ENTITIES ns0:ENTITY ns0:ID ns0:IDREF ns0:IDREFS ns0:NCName ns0:NMTOKEN ns0:NMTOKENS ns0:NOTATION ns0:Name ns0:QName ns0:Struct ns0:anyURI ns0:arrayCoordinate ns0:base64 ns0:base64Binary ns0:boolean ns0:byte ns1:compTypeType ns0:date ns0:dateTime ns0:decimal ns1:displayLevelType ns0:double ns0:duration ns1:featureTypeType ns0:float ns1:formatType ns0:gDay ns0:gMonth ns0:gMonthDay ns0:gYear ns0:gYearMonth ns0:hexBinary ns0:int ns0:integer ns0:language ns1:latLonPairType ns1:listCityNamesType ns1:listLatLonType ns0:long ns0:negativeInteger ns0:nonNegativeInteger ns0:nonPositiveInteger ns0:normalizedString ns0:positiveInteger ns1:productType ns1:sectorType ns0:short ns0:string ns0:time ns0:token ns0:unsignedByte ns0:unsignedInt ns0:unsignedLong ns0:unsignedShort ns1:weatherParametersType ns1:zipCodeListType'''
for match in re.findall(r"[a-zA-Z]+?\s*\([a-z]*?[0-9]*?:",text):
print match.split("(")[0]
Do you want a regex that finds all of them, or one for each? What data do you need back (I assume the contents of the () and object name)?
Guessing from what you have, I assume something like this could work:
((CornerPoints|LatLonListZipCode|other|names)\(.*\)))
That will match one of the names in the first half, then a (data) group after it (you may want to make it non-greedy).
If you edit the question or comment with more info, I'll edit the answer.
#peachykeen: yes it needs to be non greedy,otherwise the .* races all the way to the very last ')' in the string. You also need match groups, preferably named, in order to refer to what you matched:
/(?P<key>name1|name2|...)(?P<value>\(.*?\))/

Categories