Insert a row into data with multiple indices

Question

Insert a row into data with multiple indices

I have a multi-index frame (multindex in rows) and I want to insert a row with a specific row number. This question has been addressed before ( How to insert a row in a Pandas DataFrame multi-index? ), But I have a slightly different problem that I want to solve

In fact. I solved it, but it is so ugly that I want to ask if there is a better way to do this.

Since there is no insert_row function in Pandas, I thought of inserting a row by first copying my data frame to a specific row to a new frame using a slice, then adding the row I want to insert, and then adding the rest of the original data frame. In fact, this approach works. Here is an example

First I create an empty data frame with

pipeinfo_index = pd.MultiIndex.from_tuples([tuple([None, None])], names=["Label", "Side"])
pipeinfoDF = pd.DataFrame(index=pipeinfo_index, columns=[])
pipeinfoDF.drop(np.nan, level='Label', inplace=True)
for i in range(4):
    label = "label{}".format(i)
    pipeinfoDF.ix[(label, "A"),"COL1"] = i
    pipeinfoDF.ix[(label, "B"),"COL1"] = 2*i+1

which looks like

             COL1
Label  Side      
label0 A        0
       B        1
label1 A        1
       B        3
label2 A        2
       B        5
label3 A        3

label1 label2, ,

             COL1
Label  Side      
label0 A        0
       B        1
label1 A        1
       B        3
oker5  A        10
       B        30
label2 A        2
       B        5
label3 A        3

,

# copy first half to temporary data frame
part1= pipeinfoDF.ix[:"label1"].copy()
# copy second half to temporary data frame
part2= pipeinfoDF.ix["label2":].copy()

# append new row to insert to first part
part1.ix[("oker5", "B"),"COL1"] = 10
part1.ix[("oker5", "A"),"COL2"] = 30

# append second half of row to new data frame
for label, side in part2.index:
     print("adding {} {}".format(label, side))
     part1.ix[(label, side),:] = part2.ix[(label, side),:]

# copy the new data frame to the initial data frame
pipeinfoDF = part1.copy()

; pipeinfoDF, , . : ( ), . ,

part3= pipeinfoDF2.loc[:"oker5"].copy()

:

Traceback (most recent call last):
  File "C:/Apps/JetBrains/PyCharm Community Edition 4.5.4/jre/jre/bin/DataEelco/.PyCharm/config/scratches/scratch", line 96, in <module>
    part3= pipeinfoDF2.loc[:"oker5"].copy()
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\indexing.py", line 1189, in __getitem__
    return self._getitem_axis(key, axis=0)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\indexing.py", line 1304, in _getitem_axis
    return self._get_slice_axis(key, axis=axis)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\indexing.py", line 1211, in _get_slice_axis
    slice_obj.step)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\index.py", line 2340, in slice_indexer
    start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\index.py", line 4990, in slice_locs
    return super(MultiIndex, self).slice_locs(start, end, step, kind=kind)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\index.py", line 2490, in slice_locs
    end_slice = self.get_slice_bound(end, 'right', kind)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\index.py", line 4961, in get_slice_bound
    return self._partial_tup_index(label, side=side)
  File "C:\Apps\Anaconda\Anaconda2\envs\py34\lib\site-packages\pandas\core\index.py", line 4996, in _partial_tup_index
    (len(tup), self.lexsort_depth))
KeyError: 'Key length (1) was greater than MultiIndex lexsort depth (0)'

, , ,

print(part1)

             COL1
Label  Side      
label0 A        0
       B        1
label1 A        1
       B        3

print(part1.index)

MultiIndex(levels=[['label0', 'label1', 'label2', 'label3'], ['A', 'B']],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['Label', 'Side'])

, 1, dataframe, , multiindex - . , , part2df, ( )

, , , . , , part1 part2, ,

def get_slice(dataframe, start_from_label=None, end_at_label=None):
    # create a empty data frame and initialise with rows copy from the dataframe starting from
    # start_from_label and ending at end_at_label
    mi = pd.MultiIndex.from_tuples([tuple([None, None])], names=dataframe.index.names)

    df_new = pd.DataFrame(index=mi, columns=[])
    df_new.drop(np.nan, level='Label', inplace=True)

    insert = False
    for label, df in dataframe.groupby(level=0):
        side_list = df.index.get_level_values('Side')
        if start_from_label is None or label == start_from_label:
            insert = True
        if insert:
            for side in side_list:
                for col in dataframe.columns:
                    df_new.ix[(label, side),col] = dataframe.ix[(label, side),col]

        if end_at_label is not None and label == end_at_label:
            break

    return df_new

, ,

# part1= pipeinfoDF.ix[:"label1"].copy()
part1 = get_slice(pipeinfoDF, end_at_label="label1")

# part2= pipeinfoDF.ix["label2":].copy()
part2 = get_slice(pipeinfoDF, start_from_label="label2")

. , 1 2 . ,

MultiIndex(levels=[['label0', 'label1'], ['A', 'B']],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['Label', 'Side'])

, :

part3= pipeinfoDF2.loc[:"oker5"].copy()
print(part3)

             COL1
Label  Side      
label0 A        0
       B        1
label1 A        1
       B        3
oker5  B       30
       A       10

, KeyError, ,

: ? , , , , , . , - .

Eelco

. , . Side - A-B, , . , / . A/B B/A. dropna.

:

for i in range(4):
    label = "label{}".format(i)
    if i!=2:
        l1 = "A"
        l2 = "B"
    else:
        l1 = "B"
        l2 = "A"

    pipeinfoDF.ix[(label, l1),"COL1"] = i
    pipeinfoDF.ix[(label, l2),"COL1"] = 2*i+1
    pipeinfoDF.ix[(label, l1),"COL2"] = 10*i
    pipeinfoDF.ix[(label, l2),"COL2"] = 10*(2*i+1)

             COL1  COL2
Label  Side            
label0 A        0     0
       B        1    10
label1 A        1    10
       B        3    30
label2 B        2    20
       A        5    50
label3 A        3    30
       B        7    70

uncack B, A, unack/stack A/B. , .

, , . , , :-)

:

# create the lines to add 
newdata = pd.DataFrame(index=pipeinfo_index, columns=[])
newdata.ix[('oker8', "B"), "COL1"] = 10
newdata.ix[('oker8', "A"lot, it ), "COL1"] = 30
newdata.ix[('oker8', "B"), "COL2"] = 108
newdata.ix[('oker8', "A"), "COL2"] = 300
newdata2 = pd.DataFrame(index=pipeinfo_index, columns=[])
newdata2.ix[('oker9', "A"), "COL1"] = 20
newdata2.ix[('oker9', "B"), "COL1"] = 50
newdata2.ix[('oker9', "A"), "COL2"] = 2lot, it 023
newdata2.ix[('oker9', "B"), "COL2"] = 5320

#get the indices to add the row
inx1=np.where(pipeinfoDF.reset_index().Label.values=='label1'); inx1=inx1[0][0]+2
inx2=np.where(pipeinfoDF.reset_index().Label.values=='label2'); inx2=inx2[0][0]

#insert the first data row
pipeinfoDF = pd.concat([pipeinfoDF.ix[:inx1], newdata, pipeinfoDF.ix[inx2:]])
pipeinfoDF.drop(np.nan, level='Label', inplace=True)

             COL1  COL2
Label  Side            
label0 A        0     0
       B        1    10
label1 A        1    10
       B        3    30
oker8  B       10   108
       A       30   300
label2 B        2    20
       A        5    50
label3 A        3    30
       B        7    70

inx1=np.where(pipeinfoDF.reset_index().Label.values=='label2'); inx1=inx1[0][0]+2
inx2=np.where(pipeinfoDF.reset_index().Label.values=='label3'); inx2=inx2[0][0]

pipeinfoDF = pd.concat([pipeinfoDF.ix[:inx1], newdata2, pipeinfoDF.ix[inx2:]])
pipeinfoDF.drop(np.nan, level='Label', inplace=True)
print(pipeinfoDF)

             COL1  COL2
Label  Side            
label0 A        0     0
       B        1    10
label1 A        1    10
       B        3    30
oker8  B       10   108
       A       30   300
label2 B        2    20
       A        5    50
oker9  A       20  2023
       B       50  5320
label3 A        3    30
       B        7    70

, . , , - , , . , ,

(pipeinfoDF.ix [: 'label2'])

KeyError, script, , :

            COL1  COL2
Label  Side            
label0 A        0     0
       B        1    10
label1 A        1    10
       B        3    30
label2 B        2    20
       A        5    50

, . , , ? , : !

Eelco

+4

python pandas insert

Eelco van Vliet 07 . '15 13:25

2

, Index Multiindex. df, dfs, . df .

import pandas as pd
import numpy as np

pipeinfo_index = pd.MultiIndex.from_tuples([tuple([None, None])], names=["Label", "Side"])
pipeinfoDF = pd.DataFrame(index=pipeinfo_index, columns=[])
pipeinfoDF.drop(np.nan, level='Label', inplace=True)
for i in range(4):
    label = "label{}".format(i)
    pipeinfoDF.ix[(label, "A"),"COL1"] = i
    pipeinfoDF.ix[(label, "B"),"COL1"] = 2*i+1

print pipeinfoDF
#
#             COL1
#Label  Side
#label0 A        0
#       B        1
#label1 A        1
#       B        3
#label2 A        2
#       B        5
#label3 A        3
#       B        7

#pivot df - Multiindex convert to Index
pipeinfoDF = pipeinfoDF.unstack()

#create df for insert - one row in 1. df or 2 row in 2. df
line = pd.DataFrame( [[10, 30]], index=['oker5'], columns=pipeinfoDF.columns)
line1 = pd.DataFrame( [[20, 50], [40,60]], index=['hron','ipel'], columns=pipeinfoDF.columns)

#concat all df
pipeinfoDF = pd.concat([pipeinfoDF.ix[:'label1'], line, line1, pipeinfoDF.ix['label2':]])

#stack data back for Multiindex
pipeinfoDF = pipeinfoDF.stack()

print pipeinfoDF
#
#             COL1
#Label  Side
#label0 A        0
#       B        1
#label1 A        1
#       B        3
#oker5  A       10
#       B       30
#hron   A       20
#       B       50
#ipel   A       40
#       B       60
#label2 A        2
#       B        5
#label3 A        3
#       B        7

0

jezrael 07 . '15 21:34

rebeling · Accepted Answer · 2015-09-07T23:03:58+0000

, . drop , KeyError.

# create new dataframe based on pipeinfo_index
newdata = pd.DataFrame(index=pipeinfo_index, columns=[])
newdata.ix[('oaker', "A"), "COL1"] = 10
newdata.ix[('oaker', "B"), "COL1"] = 30

idx = getindexof('label1')

pipeinfoDF = pd.concat([pipeinfoDF.ix[:idx], newdata]) #, pipeinfoDF.ix[idx:]])
# drop the NaN and recieve a new object
pipeinfoDF.drop(np.nan, level='Label', inplace=True)

pipeinfoDF.index
MultiIndex(levels=[[u'label0', u'label1', u'oaker'], [u'A', u'B']],
       labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
       names=[u'Label', u'Side'])

Insert a row into data with multiple indices

More articles: