Calculate Base Effect Based On Month To Month Changes In Pandas
Let's say we have a sample dataframe df which is CPI data having date and MoM columns, I need to calculate base_effect and new_price_effect based on them: date MoM base_
Solution 1:
Try:
from dateutil.relativedelta import relativedelta
import pandas as pd
from numpy import nan
from datetime import datetime
data = [{'date': '2019-01', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-02', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-03', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-04', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-05', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-06', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-07', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-08', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-09', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-10', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-11', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2019-12', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
{'date': '2020-01',
'MoM': 1.015,
'base_effect': 1.115668347,
'new_price_effect': 1.015},
{'date': '2020-02',
'MoM': 1.015,
'base_effect': 1.104622125,
'new_price_effect': 1.030225},
{'date': '2020-03',
'MoM': 1.015,
'base_effect': 1.093685273,
'new_price_effect': 1.045678375},
{'date': '2020-04',
'MoM': 1.015,
'base_effect': 1.082856706,
'new_price_effect': 1.061363551},
{'date': '2020-05',
'MoM': 1.015,
'base_effect': 1.072135352,
'new_price_effect': 1.077284004},
{'date': '2020-06',
'MoM': 1.015,
'base_effect': 1.061520151,
'new_price_effect': 1.093443264},
{'date': '2020-07',
'MoM': 1.015,
'base_effect': 1.05101005,
'new_price_effect': 1.109844913},
{'date': '2020-08',
'MoM': 1.015,
'base_effect': 1.04060401,
'new_price_effect': 1.126492587},
{'date': '2020-09',
'MoM': 1.015,
'base_effect': 1.030301,
'new_price_effect': 1.143389975},
{'date': '2020-10',
'MoM': 1.015,
'base_effect': 1.0201,
'new_price_effect': 1.160540825},
{'date': '2020-11',
'MoM': 1.015,
'base_effect': 1.01,
'new_price_effect': 1.177948937},
{'date': '2020-12',
'MoM': 1.015,
'base_effect': 1.0,
'new_price_effect': 1.195618171}]
df = pd.DataFrame.from_dict(data)
df['date'] = pd.to_datetime(df['date'])
print(df)
def base_effect(x):
required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
periods=12 - x['date'].month)
df_sub = df[df.date.isin(required_interval)]
return df_sub['MoM'].product(min_count=1)
def new_price_effect(x):
required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
df_sub = df[df.date.isin(required_interval)]
return df_sub['MoM'].product(min_count=1)
df['base_effect'] = df.apply(base_effect, axis=1)
df['new_price_effect'] = df.apply(new_price_effect, axis=1)
print(df)
If you want to use it with other columns try:
def base_effect(x,col_name):
required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
periods=12 - x['date'].month)
df_sub = df[df.date.isin(required_interval)]
return df_sub[col_name].product(min_count=1)
def new_price_effect(x, col_name):
required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
df_sub = df[df.date.isin(required_interval)]
return df_sub[col_name].product(min_count=1)
df['base_effect'] = df.apply(base_effect, col_name='MoM', axis=1)
df['new_price_effect'] = df.apply(new_price_effect, col_name='MoM', axis=1)
print(df)
As the updated data have end dates for months, isin
wont work with MS
frequency. You can update your functions as follows:
def base_effect(x,col_name):
required_interval = list(pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
periods=12 - x['date'].month))
if required_interval:
df_sub = df[(df.date>=required_interval[0]) & (df.date<=required_interval[-1])]
return df_sub[col_name].product(min_count=1)
else:
return np.nan
Solution 2:
For the updated data, I need to floor a date to the first date of that month:
from pandas.tseries.offsets import MonthBegin
from dateutil.relativedelta import relativedelta
import pandas as pd
from numpy import nan
from datetime import datetime
data = [{'date': '2020-1-31', 'MoM': 1.014},
{'date': '2020-2-29', 'MoM': 1.008},
{'date': '2020-3-31', 'MoM': 0.988},
{'date': '2020-4-30', 'MoM': 0.991},
{'date': '2020-5-31', 'MoM': 0.992},
{'date': '2020-6-30', 'MoM': 0.999339},
{'date': '2020-7-31', 'MoM': 1.006159},
{'date': '2020-8-31', 'MoM': 1.00401},
{'date': '2020-9-30', 'MoM': 1.002325},
{'date': '2020-10-31', 'MoM': 0.997},
{'date': '2020-11-30', 'MoM': 0.9940000000000001},
{'date': '2020-12-31', 'MoM': 1.0070000000000001},
{'date': '2021-1-31', 'MoM': 1.01},
{'date': '2021-2-28', 'MoM': 1.006},
{'date': '2021-3-31', 'MoM': 0.995},
{'date': '2021-4-30', 'MoM': 0.997},
{'date': '2021-5-31', 'MoM': 0.998},
{'date': '2021-6-30', 'MoM': 0.996},
{'date': '2021-7-31', 'MoM': 1.003},
{'date': '2021-8-31', 'MoM': 1.001}]
df = pd.DataFrame.from_dict(data)
df['date'] = pd.to_datetime(df['date']) - MonthBegin(1)
def base_effect(x):
required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
periods=12 - x['date'].month)
df_sub = df[df.date.isin(required_interval)]
return df_sub['MoM'].product(min_count=1)
def new_price_effect(x):
required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
df_sub = df[df.date.isin(required_interval)]
return df_sub['MoM'].product(min_count=1)
df['base_effect'] = df.apply(base_effect, axis=1)
df['new_price_effect'] = df.apply(new_price_effect, axis=1)
print(df)
Out:
date MoM base_effect new_price_effect
0 2020-01-01 1.014000 NaN 1.014000
1 2020-02-01 1.008000 NaN 1.022112
2 2020-03-01 0.988000 NaN 1.009847
3 2020-04-01 0.991000 NaN 1.000758
4 2020-05-01 0.992000 NaN 0.992752
5 2020-06-01 0.999339 NaN 0.992096
6 2020-07-01 1.006159 NaN 0.998206
7 2020-08-01 1.004010 NaN 1.002209
8 2020-09-01 1.002325 NaN 1.004539
9 2020-10-01 0.997000 NaN 1.001525
10 2020-11-01 0.994000 NaN 0.995516
11 2020-12-01 1.007000 NaN 1.002485
12 2021-01-01 1.010000 0.988644 1.010000
13 2021-02-01 1.006000 0.980797 1.016060
14 2021-03-01 0.995000 0.992710 1.010980
15 2021-04-01 0.997000 1.001726 1.007947
16 2021-05-01 0.998000 1.009804 1.005931
17 2021-06-01 0.996000 1.010472 1.001907
18 2021-07-01 1.003000 1.004286 1.004913
19 2021-08-01 1.001000 1.000275 1.005918
Post a Comment for "Calculate Base Effect Based On Month To Month Changes In Pandas"