Find the "best" full subgraphs

While optimizing the performance of my application, I encountered a huge performance bottleneck in a few lines of code (Python).

I have N tokens. Each token has a value assigned to it. Some of the tokens contradict (for example, tokens 8 and 12 cannot "live together"). My task is to find the k-best marker groups. The value of a token group is simply the sum of the token values ​​in it.

Naive algorithm (which I implemented ...):

  • find all 2 ^ N permutations of tokens-groups of tokens
  • Exclude groups of tokens that have contradictions in them
  • Calculate the value of all remaining token groups
  • Sort token groups by value
  • Select top K-marker-groups

Real numbers in the world - I need the top 10 tokens from a group of 20 tokens (for which I calculated 1,000,000 permutations (!)), Narrowed to 3,500 consistent groups of tokens. It took 5 seconds on my laptop ...

I am sure that I can somehow optimize steps 1 + 2 by creating only consistent groups of tokens.

I’m also sure that I can somehow magically find the best group of tokens in one search and find a way to cross marker groups, reducing the value, thus finding only the top 10 that I'm looking for ...

my actual code is:

all_possibilities = sum((list(itertools.combinations(token_list, i)) for i in xrange(len(token_list)+1)), [])
all_possibilities = [list(option) for option in all_possibilities if self._no_contradiction(option)] 
all_possibilities = [(option, self._probability(option)) for option in all_possibilities]
all_possibilities.sort(key = lambda result: -result[1]) # sort by descending probability

Please, help?

Tal.

+3
source share
5 answers

A really easy way to get all consistent marker groups:

#!/usr/bin/env python

token_list = ['a', 'b', 'c']

contradictions = {
    'a': set(['b']),
    'b': set(['a']),
    'c': set()
}

result = []

while token_list:
    token = token_list.pop()
    new = [set([token])]
    for r in result:
        if token not in contradictions or not r & contradictions[token]:
            new.append(r | set([token]))
    result.extend(new)

print result
+2
source

1 + 2 : ( , ). :

  • result, , conflicting ,
  • result ( ) .

, :

token_list = ['a', 'b', 'c']

contradictions = {
    'a': set(['b']),
    'b': set(['a']),
    'c': set()
}

class Generator(object):
    def __init__(self, token_list, contradictions):
        self.list = token_list
        self.contradictions = contradictions
        self.max_start = len(self.list) - 1

    def add_no(self, start, result, conflicting):
        if start < self.max_start:
            for g in self.gen(start + 1, result, conflicting):
                yield g
        else:
            yield result[:]

    def add_yes(self, token, start, result, conflicting):
        result.append(token)
        new_conflicting = conflicting | self.contradictions[token]
        for g in self.add_no(start, result, new_conflicting):
            yield g
        result.pop()

    def gen(self, start, result, conflicting):
        token = self.list[start]
        if token not in conflicting:
            for g in self.add_yes(token, start, result, conflicting):
                yield g
        for g in self.add_no(start, result, conflicting):
            yield g

    def go(self):
        return self.gen(0, [], set())

:

g = Generator(token_list, contradictions)
for x in g.go():
    print x

, (- Python), .

+3

O(n (log n)) O(n + m) n m

NP- , , "" , , 1 ( ).

, ; , , . , .

  • , ​​ [start, end) (.. , ). , .
  • . ( , ) . - .
  • (, - ) , , . , , , J, , J
  • J, : , , (), J, J. , , J, .

- , () , .

  • J token[J], , , , , token.end, K K < J token[K].end <= token[J].start: K (, , ).
  • , token[J], token[J-1].
  • token[-1] token[-1].end = 0 0 .

, . , ( ) - O (n log (n)) , , - O (log (n)) - n ; O (n log (n)). O (n), , - , , , , - log n, , : , - , O(n + m). n , .

, "", , .

? , ; , , , .

k-

top-K, , , - , K - , . , , , token[J], union k- . O(n log(n) + n k log(k)), .

+3

" " :

import itertools

# tokens in decreasing order of value (must all be > 0)
toks = 12, 11, 8, 7, 6, 2, 1

# contradictions (dict highestvaltok -> set of incompatible ones)
cont = {12: set([11, 8, 7, 2]),
    11: set([8, 7, 6]),
         7: set([2]),
     2: set([1]),
       }

rec_calls = 0

def bestgroup(toks, contdict, arein=(), contset=()):
  """Recursively compute the highest-valued non-contradictory subset of toks."""
  global rec_calls
  toks = list(toks)
  while toks:
    # find the top token compatible w/the ones in `arein`
    toptok = toks.pop(0)
    if toptok in contset:
      continue
    # try to extend with and without this toptok
    without_top = bestgroup(toks, contdict, arein, contset)
    contset = set(contset).union(c for c in contdict.get(toptok, ()))
    newarein = arein + (toptok,)
    with_top = bestgroup(toks, contdict, newarein, contset)
    rec_calls += 1
    if sum(with_top) > sum(without_top):
      return with_top
    else:
      return without_top
  return arein

def noncongroups(toks, contdict):
  """Count possible, non-contradictory subsets of toks."""
  tot = 0
  for l in range(1, len(toks) + 1):
    for c in itertools.combinations(toks, l):
      if any(cont[k].intersection(c) for k in c if k in contdict): continue
      tot += 1
  return tot


print bestgroup(toks, cont)
print 'calls: %d (vs %d of %d)' % (rec_calls, noncongroups(toks, cont), 2**len(toks))

, , () , ( , - noncongroups, , , , ; -).

" ", - ( , , , , - ;-) ( ). , , ( , , , , - , , , ).

+2

, , , .

, , t , , .

def solve(tokens, contradictions):
   if not tokens:
      yield set()
   else:
      tokens = set(tokens)
      t = tokens.pop()
      for solution in solve(tokens - contradictions[t], contradictions):
         yield solution | set([t])
      if contradictions[t] & tokens:
         for solution in solve(tokens, contradictions):
            if contradictions[t] & solution:
               yield solution

, ( memoization) .

0

Source: https://habr.com/ru/post/1752084/


All Articles