Skip to content Skip to sidebar Skip to footer

Calculate Base Effect Based On Month To Month Changes In Pandas

Let's say we have a sample dataframe df which is CPI data having date and MoM columns, I need to calculate base_effect and new_price_effect based on them: date MoM base_

Solution 1:

Try:

from dateutil.relativedelta import relativedelta
import pandas as pd
from numpy import nan
from datetime import datetime

data = [{'date': '2019-01', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-02', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-03', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-04', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-05', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-06', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-07', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-08', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-09', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-10', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-11', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-12', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2020-01',
         'MoM': 1.015,
         'base_effect': 1.115668347,
         'new_price_effect': 1.015},
        {'date': '2020-02',
         'MoM': 1.015,
         'base_effect': 1.104622125,
         'new_price_effect': 1.030225},
        {'date': '2020-03',
         'MoM': 1.015,
         'base_effect': 1.093685273,
         'new_price_effect': 1.045678375},
        {'date': '2020-04',
         'MoM': 1.015,
         'base_effect': 1.082856706,
         'new_price_effect': 1.061363551},
        {'date': '2020-05',
         'MoM': 1.015,
         'base_effect': 1.072135352,
         'new_price_effect': 1.077284004},
        {'date': '2020-06',
         'MoM': 1.015,
         'base_effect': 1.061520151,
         'new_price_effect': 1.093443264},
        {'date': '2020-07',
         'MoM': 1.015,
         'base_effect': 1.05101005,
         'new_price_effect': 1.109844913},
        {'date': '2020-08',
         'MoM': 1.015,
         'base_effect': 1.04060401,
         'new_price_effect': 1.126492587},
        {'date': '2020-09',
         'MoM': 1.015,
         'base_effect': 1.030301,
         'new_price_effect': 1.143389975},
        {'date': '2020-10',
         'MoM': 1.015,
         'base_effect': 1.0201,
         'new_price_effect': 1.160540825},
        {'date': '2020-11',
         'MoM': 1.015,
         'base_effect': 1.01,
         'new_price_effect': 1.177948937},
        {'date': '2020-12',
         'MoM': 1.015,
         'base_effect': 1.0,
         'new_price_effect': 1.195618171}]

df = pd.DataFrame.from_dict(data)
df['date'] = pd.to_datetime(df['date'])
print(df)


def base_effect(x):
    required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)


def new_price_effect(x):
    required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)

df['base_effect'] = df.apply(base_effect, axis=1)
df['new_price_effect'] = df.apply(new_price_effect, axis=1)
print(df)

If you want to use it with other columns try:

def base_effect(x,col_name):
    required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub[col_name].product(min_count=1)


def new_price_effect(x, col_name):
    required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub[col_name].product(min_count=1)

df['base_effect'] = df.apply(base_effect, col_name='MoM', axis=1)
df['new_price_effect'] = df.apply(new_price_effect, col_name='MoM', axis=1)
print(df)

As the updated data have end dates for months, isin wont work with MS frequency. You can update your functions as follows:

def base_effect(x,col_name):
    required_interval = list(pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month))
    if required_interval:
        df_sub = df[(df.date>=required_interval[0]) & (df.date<=required_interval[-1])]

        return df_sub[col_name].product(min_count=1)
    else:
        return np.nan

Solution 2:

For the updated data, I need to floor a date to the first date of that month:

from pandas.tseries.offsets import MonthBegin
from dateutil.relativedelta import relativedelta
import pandas as pd
from numpy import nan
from datetime import datetime

data = [{'date': '2020-1-31', 'MoM': 1.014},
 {'date': '2020-2-29', 'MoM': 1.008},
 {'date': '2020-3-31', 'MoM': 0.988},
 {'date': '2020-4-30', 'MoM': 0.991},
 {'date': '2020-5-31', 'MoM': 0.992},
 {'date': '2020-6-30', 'MoM': 0.999339},
 {'date': '2020-7-31', 'MoM': 1.006159},
 {'date': '2020-8-31', 'MoM': 1.00401},
 {'date': '2020-9-30', 'MoM': 1.002325},
 {'date': '2020-10-31', 'MoM': 0.997},
 {'date': '2020-11-30', 'MoM': 0.9940000000000001},
 {'date': '2020-12-31', 'MoM': 1.0070000000000001},
 {'date': '2021-1-31', 'MoM': 1.01},
 {'date': '2021-2-28', 'MoM': 1.006},
 {'date': '2021-3-31', 'MoM': 0.995},
 {'date': '2021-4-30', 'MoM': 0.997},
 {'date': '2021-5-31', 'MoM': 0.998},
 {'date': '2021-6-30', 'MoM': 0.996},
 {'date': '2021-7-31', 'MoM': 1.003},
 {'date': '2021-8-31', 'MoM': 1.001}]

df = pd.DataFrame.from_dict(data)
df['date'] = pd.to_datetime(df['date']) - MonthBegin(1)


def base_effect(x):
    required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)


def new_price_effect(x):
    required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)

df['base_effect'] = df.apply(base_effect, axis=1)
df['new_price_effect'] = df.apply(new_price_effect, axis=1)
print(df)

Out:

         date       MoM  base_effect  new_price_effect
0  2020-01-01  1.014000          NaN          1.014000
1  2020-02-01  1.008000          NaN          1.022112
2  2020-03-01  0.988000          NaN          1.009847
3  2020-04-01  0.991000          NaN          1.000758
4  2020-05-01  0.992000          NaN          0.992752
5  2020-06-01  0.999339          NaN          0.992096
6  2020-07-01  1.006159          NaN          0.998206
7  2020-08-01  1.004010          NaN          1.002209
8  2020-09-01  1.002325          NaN          1.004539
9  2020-10-01  0.997000          NaN          1.001525
10 2020-11-01  0.994000          NaN          0.995516
11 2020-12-01  1.007000          NaN          1.002485
12 2021-01-01  1.010000     0.988644          1.010000
13 2021-02-01  1.006000     0.980797          1.016060
14 2021-03-01  0.995000     0.992710          1.010980
15 2021-04-01  0.997000     1.001726          1.007947
16 2021-05-01  0.998000     1.009804          1.005931
17 2021-06-01  0.996000     1.010472          1.001907
18 2021-07-01  1.003000     1.004286          1.004913
19 2021-08-01  1.001000     1.000275          1.005918

Post a Comment for "Calculate Base Effect Based On Month To Month Changes In Pandas"