Skip to content Skip to sidebar Skip to footer

Python Pandas Fill Dataframe With Missing Values

I have this dataframe as an example import pandas as pd #create dataframe df = pd.DataFrame([['DE', 'Table',201705,201705, 1000], ['DE', 'Table',201705,201704, 1000],\

Solution 1:

Here is my solution.I believe some genius will provide better solution~ Let us waiting for it ~

df1=df.groupby('ISO').agg({'Billed Week' : np.max,'Created Week' : np.min})
df1['ISO']=df1.index

     Created Week  Billed Week ISO
ISO                               
AT         201704       201708  AT
DE         201701       201705  DE

ISO=[]
BilledWeek=[]
CreateWeek=[]
for i in range(len(df1)):
    BilledWeek.extend([df1.ix[i,1]]*(df1.ix[i,1]-df1.ix[i,0]+1))
    CreateWeek.extend(list(range(df1.ix[i,0],df1.ix[i,1]+1)))
    ISO.extend([df1.ix[i,2]]*(df1.ix[i,1]-df1.ix[i,0]+1))
DF=pd.DataFrame({'BilledWeek':BilledWeek,'CreateWeek':CreateWeek,'ISO':ISO})
Target=DF.merge(df,left_on=['BilledWeek','CreateWeek','ISO'],right_on=['Billed Week','Created Week','ISO'],how='left')
Target.Billings.fillna(0,inplace=True)
Target=Target.drop(['Billed Week',  'Created Week'],axis=1)
Target['Product']=Target.groupby('ISO')['Product'].ffill()

Out[75]: 
   BilledWeek  CreateWeek ISO Product  Billings
0      201708      201704  AT   Table    1000.0
1      201708      201705  AT   Table    1000.0
2      201708      201706  AT   Table    1000.0
3      201708      201707  AT   Table       0.0
4      201708      201708  AT   Table    1000.0
5      201705      201701  DE   Table    1000.0
6      201705      201702  DE   Table    1000.0
7      201705      201703  DE   Table       0.0
8      201705      201704  DE   Table    1000.0
9      201705      201705  DE   Table    1000.0

Solution 2:

def seqfix(x):
    s = x['Created Week']
    x = x.set_index('Created Week')
    x = x.reindex(range(min(s), max(s)+1))
    x['Billings'] = x['Billings'].fillna(0)
    x = x.ffill().reset_index()
    return x

df = df.groupby(['ISO', 'Billed Week']).apply(seqfix).reset_index(drop=True)
df[['Billed Week', 'Billings']] = df[['Billed Week', 'Billings']].astype(int)
df = df[['ISO', 'Product', 'Billed Week', 'Created Week', 'Billings']]

print(df)

  ISO Product  Billed Week  Created Week  Billings
0  AT   Table       201708        201704      1000
1  AT   Table       201708        201705      1000
2  AT   Table       201708        201706      1000
3  AT   Table       201708        201707         0
4  AT   Table       201708        201708      1000
5  DE   Table       201705        201701      1000
6  DE   Table       201705        201702      1000
7  DE   Table       201705        201703         0
8  DE   Table       201705        201704      1000
9  DE   Table       201705        201705      1000

Solution 3:

Build a MultiIndex with all the gaps in Created Weeks filled and then reindex the original DF.

idx = (df.groupby(['Billed Week'])
       .apply(lambda x: [(x['ISO'].min(),
                          x['Product'].min(),
                          x['Billed Week'].min(),
                          e) for e in range(x['Created Week'].min(), x['Created Week'].max()+1)])
       .tolist()
)

multi_idx = pd.MultiIndex.from_tuples(sum(idx,[]),names=['ISO','Product','Billed Week','Created Week'])

(df.set_index(['ISO','Product','Billed Week','Created Week'])
     .reindex(multi_idx)
     .reset_index()
     .fillna(0)
)

Out[671]: 
  ISO Product  Billed Week  Created Week  Billings
0  DE   Table       201705        201701    1000.0
1  DE   Table       201705        201702    1000.0
2  DE   Table       201705        201703       0.0
3  DE   Table       201705        201704    1000.0
4  DE   Table       201705        201705    1000.0
5  AT   Table       201708        201704    1000.0
6  AT   Table       201708        201705    1000.0
7  AT   Table       201708        201706    1000.0
8  AT   Table       201708        201707       0.0
9  AT   Table       201708        201708    1000.0

Post a Comment for "Python Pandas Fill Dataframe With Missing Values"