You can use lreshape
:
df = pd.lreshape(dff, {'c':['c1','c2'], 'd':['d1','d2']})
print (df)
a b c d
0 1 1 3 2
1 2 3 3 8
2 3 1 3 2
3 1 1 4 3
4 2 3 1 4
5 3 1 1 1
Or wide_to_long
:
dff = dff.reset_index()
a = (pd.wide_to_long(dff, stubnames=['c', 'd'], i='index', j='B')
.reset_index(drop=True)
.reindex(columns=['a','b','c', 'd']))
print (a)
a b c d
0 1 1 3 2
1 2 3 3 8
2 3 1 3 2
3 1 1 4 3
4 2 3 1 4
5 3 1 1 1
EDIT: If you want to use the columns melt
, extract
, assign
and the last : sort_values
a = dff1.melt(id_vars=['a','b'],value_vars=['cin','cout'],value_name = 'c',var_name='in/out')
b = dff1.melt(id_vars=['a','b'],value_vars=['din','dout'],value_name = 'd',var_name='in/out')
a['in/out'] = a['in/out'].str.extract('(in|out)', expand=False)
b['in/out'] = b['in/out'].str.extract('(in|out)', expand=False)
print (a)
a b in/out c
0 1 1 in 3
1 2 3 in 3
2 3 1 in 3
3 1 1 out 4
4 2 3 out 1
5 3 1 out 1
print (b)
a b in/out d
0 1 1 in 2
1 2 3 in 8
2 3 1 in 2
3 1 1 out 3
4 2 3 out 4
5 3 1 out 1
c = a.assign(d=b['d']).sort_values(['a','b'])
#same as
#c = pd.merge(a,b).sort_values(['a','b'])
print (c)
a b in/out c d
0 1 1 in 3 2
3 1 1 out 4 3
1 2 3 in 3 8
4 2 3 out 1 4
2 3 1 in 3 2
5 3 1 out 1 1
Solution rewritten for pandas 0.15.0
:
a=pd.melt(dff1,id_vars=['a','b'],value_vars=['cin','cout'],value_name='c',var_name='in/out')
b=pd.melt(dff1,id_vars=['a','b'],value_vars=['din','dout'],value_name='d',var_name='in/out')
a['in/out'] = a['in/out'].str.extract('(in|out)')
b['in/out'] = b['in/out'].str.extract('(in|out)')
c = pd.merge(a,b).sort_values(['a','b'])
Another solution from wen's remote answer is to replace
string to numeric and then use the wide_to_long
last one map
back:
#define columns
L = ['in','out']
d = dict(enumerate(L))
d1 = {v: str(k) for k, v in d.items()}
print (d)
{0: 'in', 1: 'out'}
print (d1)
{'out': '1', 'in': '0'}
dff1.columns = dff1.columns.to_series().replace(d1,regex=True)
a = pd.wide_to_long(dff1, stubnames=['c', 'd'], j='in/out', i=['a','b']).reset_index()
a['in/out'] = a['in/out'].astype(int).map(d)
a = a[['a','b','c','d','in/out']]
print (a)
a b c d in/out
0 1 1 3 2 in
1 1 1 4 3 out
2 2 3 3 8 in
3 2 3 1 4 out
4 3 1 3 2 in
5 3 1 1 1 out
EDIT:
To return the process, use:
df = df.set_index(['a', 'b', 'in/out']).unstack()
df.columns = df.columns.map(''.join)
df = df.reset_index()
print (df)
a b cin cout din dout
0 1 1 3 4 2 3
1 2 3 3 1 8 4
2 3 1 3 1 2 1