How to find rows from one list based on a list of indexes from another list with some conditions in Python?

I am new to python and constantly involved in creating the best python codes. I have two lists; one with indices stored in the variable x, where the indices in x represent the index of the tuples in a list named bb with the string ('IN') and are surrounded on both sides by at least one tuple containing "NN".

What I'm trying to get from the code below is that from each index mentioned in x in bb, how many continuous lines starting with "NN" are present on both sides of the string set in the bb list.

I tried the code below, but the code is not efficient enough. Someone please help me make the code efficient.

     bb = [('The', 'RB'),
     ('company', 'NN'),
     ('whose', 'NNS'),
     ('stock', 'IN'),
     ('has', 'NNP'),
     ('been', 'NNS'),
     ('on', 'NNP'),
     ('tear', 'VBJ'),
     ('this', 'VB'),
     ('week', 'NNS'),
     ('already', 'NN'),
     ('sells', 'IN'),
     ('its', 'NNP'),
     ('graphics', 'NNS'),
     ('processing', 'VB'),
     ('units', 'VBJ'),
     ('biggest', 'NNS'),
     ('cloud', 'NN'),
     ('companies', 'IN'),
     ('just', 'NNP'),
     ('that', 'IN')]

def solvr(bb):
    x = []
    for i in range(len(bb)-1):
        if bb[i][1] == 'IN':
            if 'NN' in (bb[i-1][1]) and 'NN' in (bb[i+1][1]):
                x.append(i)
    #===============================        

    for i in range(len(bb)-1):
        if i in x:
            k=[]
            front = bb[i+1:]
            v = 0-i
            back = bb[:-v]
    #======================

    for i in back:
        if 'NN' in i[1]:
            k.append(i[0])
            [[] for i in k] 
    #================================


    for i, j in enumerate(front):
        if front[i][1][:2] == 'NN':
            k.append(front[i][0])
        else:
            break
    return(k)

>> solvr(bb)

output:

['company',
 'whose',
 'has',
 'been',
 'on',
 'week',
 'already',
 'its',
 'graphics',
 'biggest',
 'cloud',
 'just']

- , "IN", .

 [['company', 'whose', 'stock', 'has', 'been', 'on'],
 ['week', 'already', 'sells', 'its', 'graphics'],
 ['biggest', 'cloud', 'companies', 'just']]

, - - .

+4
4

itertools.groupby, , , .

:

groups = itertools.groupby(bb, lambda x: x[1][:2] in ['IN', 'NN']) 
result = [list(b) for a,b in groups if a]
result = [[w[0] for w in b] for b in result if 'IN' in [w[1] for w in b]]

print(result)

[['company', 'whose', 'stock', 'has', 'been', 'on'], 
 ['week', 'already', 'sells', 'its', 'graphics'], 
 ['biggest', 'cloud', 'companies', 'just', 'that']]

, bb , ( "IN" "NN" ) false true ( ). , , :

groups = itertools.groupby(bb, lambda x: x[1][:2] in ['IN', 'NN']) 

print([(a,list(b)) for a,b in groups])

[(False, [('The', 'RB')]),
 (True,
  [('company', 'NN'),
   ('whose', 'NNS'),
   ('stock', 'IN'),
   ('has', 'NNP'),
   ('been', 'NNS'),
   ('on', 'NNP')]),
 (False, [('tear', 'VBJ'), ('this', 'VB')]),
 (True,
  [('week', 'NNS'),
   ('already', 'NN'),
   ('sells', 'IN'),
   ('its', 'NNP'),
   ('graphics', 'NNS')]),
 (False, [('processing', 'VB'), ('units', 'VBJ')]),
 (True,
  [('biggest', 'NNS'),
   ('cloud', 'NN'),
   ('companies', 'IN'),
   ('just', 'NNP'),
   ('that', 'IN')])]

, , . , , , true ( ), , 'IN' .

, , ( ), :

[[w[0] for w in b] for b in [list(b) for a,b in itertools.groupby(bb, lambda x: x[1][:2] in ['IN', 'NN'])  if a] if 'IN' in [w[1] for w in b]]

, "IN" 'NN' , :

groups results, :

groups = itertools.groupby(bb, lambda x: x[1][:2] in ['IN', 'NN']) 
result = [list(b) for a,b in groups if a]

groupby , , 'IN':

result = [[(a,list(b)) for a,b in itertools.groupby(r, lambda x: x[1] == 'IN')] for r in result]

result , boolean groupby true (POS - 'IN'), ( 0 > -1)

result = [[b for i,(a,b) in enumerate(r) if (a and i not in [0,len(r)-1]) or not a] for r in result]

, , POS, ( . )

result = [[w[0] for sub in r for w in sub] for r in result]

print(result)

[['company', 'whose', 'stock', 'has', 'been', 'on'],
 ['week', 'already', 'sells', 'its', 'graphics'],
 ['biggest', 'cloud', 'companies', 'just']]
+3

( ):

def solve(bb):
    def _solve(lst):
        return False if not len(lst) else _solve(lst[1:]) if "NN" in lst[0][1] else "IN" in lst[0][1]
    return [bb[i][0] for i in range(len(bb)) if "NN" in bb[i][1] and (_solve(bb[0:i][::-1]) or _solve(bb[i:-1]))]

():

def solve(bb):
    s = lambda lst: False if not len(lst) else s(lst[1:]) if "NN" in lst[0][1] else "IN" in lst[0][1]
    return [bb[i][0] for i in range(len(bb)) if "NN" in bb[i][1] and (s(bb[0:i][::-1]) or s(bb[i:-1]))]
+1

, , , [N*, N*, IN, N*, N*, IN, N*][[N*, N*, IN, N*, N*], [N*, N*, IN, N*]]. , . , (N *, IN, N *). , . , , , .

def solvr(bb):

    # keep a buffer of the previous tags
    back_buffer = []

    for i in range(len(bb)-1):

        word, tag = bb[i]
        _, next_tag = bb[i+1]

        # make sure there is a minimal hit of 3 tokens
        if tag == 'IN' and next_tag.startswith('N') and len(back_buffer) > 0:
            hit = back_buffer + [word]
            for it in bb[i+1:]:
                if it[1].startswith('N'):
                    hit.append(it[0])
                else:
                    break
            yield hit

        # add to the buffer
        if tag.startswith('N'):
            back_buffer.append(word)

        # reset the buffer as the sequence of N* tags has ended
        else:
            back_buffer = []
print(list(solvr(bb)))
+1

"":

>>> bb = [('The', 'RB'), ('company', 'NN'), ('whose', 'NNS'), ('stock', 'IN'), ('has', 'NNP'), ('been', 'NNS'), ('on', 'NNP'), ('tear', 'VBJ'), ('this', 'VB'), ('week', 'NNS'), ('already', 'NN'), ('sells', 'IN'), ('its', 'NNP'), ('graphics', 'NNS'), ('processing', 'VB'), ('units', 'VBJ'), ('biggest', 'NNS'), ('cloud', 'NN'), ('companies', 'IN'), ('just', 'NNP'), ('that', 'IN')]

>>> filter(None, map(str.strip, ' '.join([word if pos.startswith('NN') or pos == 'IN'else '|' for word, pos in bb]).split('|')))
['company whose stock has been on', 'week already sells its graphics', 'biggest cloud companies just that']

, :

tmp = []
answer = []
for word, pos in bb:
    if pos.startswith('NN') or pos == 'IN':
        tmp.append(word)
    else:
        if tmp:
            answer.append(' '.join(tmp))
            tmp = []

if tmp: # Remeber to flush out the last tmp.
    answer.append(' '.join(tmp))

bb. @bunji itertools.groupby

+1

Source: https://habr.com/ru/post/1676998/


All Articles