All possible Pandas Dataframe permutation columns in the same column

I had a similar question using Postgres SQL, but I realized that this kind of task in Postgres is really hard to do, and I think python / pandas will make it a lot easier, although I still can’t come up with a solution.

Now I have a Pandas Dataframe that looks like this:

df={'planid' : ['A', 'A', 'B', 'B', 'C', 'C'],
    'x' : ['a1', 'a2', 'b1', 'b2', 'c1', 'c2']}

df=pd.DataFrame(df)

df


   planid   x
0   A       a1
1   A       a2
2   B       b1
3   B       b2
4   C       c1
5   C       c2

, planid . , planid "", , , x "" planid. 8 {(a1, b1, c1), (a1, b2, c1), (a1, b1, c2), (a1, b2, c2), (a2, b1, c1), (a2, b2, c1), (a2, b1, c2), (a2, b2, c2)}.

, , planid, x , , permutation_counter. , permutation_counter. , ,

       planid   x  permutation_counter
    0   A       a1     1
    1   B       b1     1
    2   C       c1     1 
    3   A       a1     2
    4   B       b2     2
    5   C       c1     2
    6   A       a1     3
    7   B       b1     3
    8   C       c2     3
    9   A       a1     4
    10  B       b2     4
    11  C       c2     4
    12  A       a2     5
    13  B       b1     5
    14  C       c1     5
    15  A       a2     6
    16  B       b2     6
    17  C       c1     6
    18  A       a2     7
    19  B       b1     7
    20  C       c2     7
    21  A       a2     8
    22  B       b2     8
    23  C       c2     8

!

+4
2

. , , :)

df2 = pd.DataFrame(index=pd.MultiIndex.from_product([subdf['x'] for p, subdf in df.groupby('planid')], names=df.planid.unique())).reset_index().stack().reset_index()

df2.columns = ['permutation_counter', 'planid', 'x']
df2['permutation_counter'] += 1

print df2[['planid', 'x', 'permutation_counter']]

   planid   x  permutation_counter
0       A  a1                    1
1       B  b1                    1
2       C  c1                    1
3       A  a1                    2
4       B  b1                    2
5       C  c2                    2
6       A  a1                    3
7       B  b2                    3
8       C  c1                    3
9       A  a1                    4
10      B  b2                    4
11      C  c2                    4
12      A  a2                    5
13      B  b1                    5
14      C  c1                    5
15      A  a2                    6
16      B  b1                    6
17      C  c2                    6
18      A  a2                    7
19      B  b2                    7
20      C  c1                    7
21      A  a2                    8
22      B  b2                    8
23      C  c2                    8
+2

@Happy001 , , , :

import numpy as np
import pandas as pd
import itertools

x = list( itertools.product( ['a1','b2'],['b1','b2'],['c1','c2'] ) )
x = list( itertools.chain(*x) )
df = pd.DataFrame({ 'planid'  : np.tile( list('ABC'), 8 ),
                    'x'       : x,
                    'p_count' : np.repeat( range(1,9), 3 ) })

:

    p_count planid   x
0         1      A  a1
1         1      B  b1
2         1      C  c1
3         2      A  a1
4         2      B  b1
5         2      C  c2

...

21        8      A  b2
22        8      B  b2
23        8      C  c2
+2

Source: https://habr.com/ru/post/1629647/


All Articles