You can use groupby with new custom format column names, because if you use resample by 6M , it returns 2000 (it looks like an error):
c = df.columns.tolist() cols = c[:7] cols.append(c[-1]) print (cols) ['RegionID', 'RegionName', 'State', 'Metro', 'CountyName', 'SizeRank', '1996-04', '2016-11'] #set to index all NOT date columns tdf = df.set_index(df.columns.tolist()[:6]) tdf.columns = pd.to_datetime(tdf.columns).to_period('M') # find the columns in the period 1998-2000 cols = tdf.columns sel_cols = cols[(cols > '1997-12') & (cols < '2000')] print (sel_cols) PeriodIndex(['1998-01', '1998-02', '1998-03', '1998-04', '1998-05', '1998-06', '1998-07', '1998-08', '1998-09', '1998-10', '1998-11', '1998-12', '1999-01', '1999-02', '1999-03', '1999-04', '1999-05', '1999-06', '1999-07', '1999-08', '1999-09', '1999-10', '1999-11', '1999-12'], dtype='period[M]', freq='M') #change format columns a = tdf[sel_cols].rename(columns=lambda x: '{:}${:}'.format(x.year, [1, 2][x.quarter > 2])) print (a.columns) Index(['1998$1', '1998$1', '1998$1', '1998$1', '1998$1', '1998$1', '1998$2', '1998$2', '1998$2', '1998$2', '1998$2', '1998$2', '1999$1', '1999$1', '1999$1', '1999$1', '1999$1', '1999$1', '1999$2', '1999$2', '1999$2', '1999$2', '1999$2', '1999$2'], dtype='object') #groupby by a with new format mdf = a.groupby(a.columns, axis=1).mean().reset_index()
print (mdf.head()) RegionID RegionName State Metro CountyName \ 0 6181 New York NY New York Queens 1 12447 Los Angeles CA Los Angeles-Long Beach-Anaheim Los Angeles 2 17426 Chicago IL Chicago Cook 3 13271 Philadelphia PA Philadelphia Philadelphia 4 40326 Phoenix AZ Phoenix Maricopa SizeRank 1998$1 1998$2 1999$1 1999$2 0 1 NaN NaN NaN NaN 1 2 169183.333333 179166.666667 189116.666667 198466.666667 2 3 117700.000000 121666.666667 125550.000000 133000.000000 3 4 50550.000000 50650.000000 51150.000000 51866.666667 4 5 97583.333333 101083.333333 104816.666667 108566.666667