Counting No. matches from two lists with the condition for splitting the source list

I have lists of floats with some hidden information about the β€œlevel” encoded at the scale of the float, and I can separate the β€œlevels” of the floats as such:

import math
import numpy as np

all_scores = [1.0369411057174144e+22, 2.7997409854370188e+23, 1.296176382146768e+23,
6.7401171871631936e+22, 6.7401171871631936e+22, 2.022035156148958e+24, 8.65845823274041e+23,
1.6435516525621017e+24, 2.307193960221247e+24, 1.285806971089594e+24, 9603539.08653573,
17489013.841076534, 11806185.6660164, 16057293.564414097, 8546268.728385007, 53788629.47091801,
31828243.07349571, 51740168.15200098, 53788629.47091801, 22334836.315934014,
4354.0, 7474.0, 4354.0, 4030.0, 6859.0, 8635.0, 7474.0, 8635.0, 9623.0, 8479.0]

easy, med, hard = [], [], []

for i in all_scores:
    if i > math.exp(50):
        easy.append(i)
    elif i > math.exp(10):
        med.append(i)
    else:
        hard.append(i)

print ([easy, med, hard])

[output]:

[[1.0369411057174144e+22, 2.7997409854370188e+23, 1.296176382146768e+23, 6.7401171871631936e+22, 6.7401171871631936e+22, 2.022035156148958e+24, 8.65845823274041e+23, 1.6435516525621017e+24, 2.307193960221247e+24, 1.285806971089594e+24], [9603539.08653573, 17489013.841076534, 11806185.6660164, 16057293.564414097, 8546268.728385007, 53788629.47091801, 31828243.07349571, 51740168.15200098, 53788629.47091801, 22334836.315934014], [4354.0, 7474.0, 4354.0, 4030.0, 6859.0, 8635.0, 7474.0, 8635.0, 9623.0, 8479.0]]

And I have another list that will match the list all_scores:

input_scores = [0.0, 2.7997409854370188e+23, 0.0, 6.7401171871631936e+22, 0.0, 0.0, 8.6584582327404103e+23, 0.0, 2.3071939602212471e+24, 0.0, 0.0, 17489013.841076534, 11806185.6660164, 0.0, 8546268.728385007, 0.0, 31828243.073495708, 51740168.152000979, 0.0, 22334836.315934014, 4354.0, 7474.0, 4354.0, 4030.0, 0.0, 8635.0, 0.0, 0.0, 0.0, 8479.0]

I need to check how many of the simple, copper and hard matches are with all the ratings, I could do this to get a boolean from whether there is a match in the flatten list all_scoresas such:

matches = [i == j for i, j in zip(input_scores, all_scores)]
print ([i == j for i, j in zip(input_scores, all_scores)])

[output]:

[False, True, False, True, False, False, True, False, True, False, False, True, True, False, True, False, True, True, False, True, True, True, True, True, False, True, False, False, False, True]

Is there a way to find out how much easy / med / hard there are in matches and the sum of matches at a level?

I tried this and it works:

matches = [int(i == j) for i, j in zip(input_scores, all_scores)]

print(sum(matches[:len(easy)]) , len(easy), sum(np.array(easy) * matches[:len(easy)]) )
print(sum(matches[len(easy):len(easy)+len(med)]), len(med), sum(np.array(med) * matches[len(easy):len(easy)+len(med)]) )
print (sum(matches[len(easy)+len(med):]) , len(hard), sum(np.array(hard) * matches[len(easy)+len(med):]) )

[output]:

4 10 3.52041505391e+24
6 10 143744715.777
6 10 37326.0

But to achieve the same result, there must be a less accurate way.

+4
6

digitize bincount . .

categories = 'hard', 'med', 'easy'

# get group membership by splitting at e^10 and e^50
# the 'right' keyword tells digitize to include right boundaries
cat_map = np.digitize(all_scores, np.exp((10, 50)), right=True)
# cat_map has a zero in all the 'hard' places of all_scores
# a one in the 'med' places and a two in the 'easy' places

# add a fourth group to mark all non-matches
# we have to force at least one np.array for element-by-element
# comparison to work
cat_map[np.asanyarray(all_scores) != input_scores] = 3

# count
numbers = np.bincount(cat_map)
# count again, this time using all_scores as weights
sums = np.bincount(cat_map, all_scores)

# print
for c, n, s in zip(categories + ('unmatched',), numbers, sums):
    print('{:12}  {:2d}  {:6.4g}'.format(c, n, s))

# output:
#
# hard           6  3.733e+04
# med            6  1.437e+08
# easy           4  3.52e+24
# unmatched     14  5.159e+24
+1

... Counter!

, Counter dict, , , .update(), . :

from collections import Counter

counter = Counter({'a': 2})
counter.update({'a': 3})
counter['a']
> 5

, :

from collections import Counter

matches, counts, scores = [
    Counter({'easy': 0, 'med': 0, 'hard': 0}) for _ in range(3)
]

for score, inp in zip(all_scores, input_scores):
    category = (
        'easy' if score > math.exp(50) else
        'med' if score > math.exp(10) else
        'hard'
    )
    matches.update({category: score == inp})
    counts.update({category: 1})
    scores.update({category: score if score == inp else 0})

for cat in ('easy', 'med', 'hard'):
    print(matches[cat], counts[cat], scores[cat])
+3

dict:

k = ('easy', 'meduim', 'hard')    
param = dict.fromkeys(k,0) ; outlist = []
for index,i in enumerate(range(0, len(matches), 10)):
    count = {k[index]:sum(matches[i:i + 10])}
    outlist.append(count)

print(outlist)
[{'easy': 4}, {'meduim': 6}, {'hard': 6}]
+1

, , np.in1d :

# we need numpy arrays
easy = np.array(easy)
med = np.array(med)
hard = np.array(hard)

for level in [easy, med, hard]:
    matches = level[np.where(np.in1d(level, input_scores))]
    print(len(matches), len(level), np.sum(matches))

, , , , - . , 7474.0, 4354.0 hard -array. ? 6.7401171871631936e+22.

5 10 3.58781622578e+24
6 10 143744715.777
8 10 53435.0

, , , (, , ).


EDIT:. input_scores all_scores. , , , np.in1d:

scores = input_scores[np.where(np.in1d(input_scores, all_scores))]
for level in [easy, med, hard]:
    matches = scores[np.where(np.in1d(scores, level))]
    print(len(matches), len(level), np.sum(matches))

. :

4 10 3.52041505391e+24
6 10 143744715.777
6 10 37326.0

2: , np.where , .

scores = input_scores[np.in1d(input_scores, all_scores)]
for level in [easy, med, hard]:
    matches = scores[np.in1d(scores, level)]
    print(len(matches), len(level), np.sum(matches))

, .


3: . numpy, // . , , :

import math
import numpy as np

all_scores = np.array([1.0369411057174144e+22, 2.7997409854370188e+23, 1.296176382146768e+23,
6.7401171871631936e+22, 6.7401171871631936e+22, 2.022035156148958e+24, 8.65845823274041e+23,
1.6435516525621017e+24, 2.307193960221247e+24, 1.285806971089594e+24, 9603539.08653573,
17489013.841076534, 11806185.6660164, 16057293.564414097, 8546268.728385007, 53788629.47091801,
31828243.07349571, 51740168.15200098, 53788629.47091801, 22334836.315934014,
4354.0, 7474.0, 4354.0, 4030.0, 6859.0, 8635.0, 7474.0, 8635.0, 9623.0, 8479.0])

input_scores = np.array([0.0, 2.7997409854370188e+23, 0.0, 6.7401171871631936e+22, 0.0, 0.0, 8.6584582327404103e+23, 0.0, 2.3071939602212471e+24, 0.0, 0.0, 17489013.841076534, 11806185.6660164, 0.0, 8546268.728385007, 0.0, 31828243.073495708, 51740168.152000979, 0.0, 22334836.315934014, 4354.0, 7474.0, 4354.0, 4030.0, 0.0, 8635.0, 0.0, 0.0, 0.0, 8479.0])

easy = all_scores[math.exp(50) < all_scores]
med = all_scores[(math.exp(10) < all_scores)*(all_scores < math.exp(50))] # * is boolean `and`
hard = all_scores[all_scores < math.exp(10)]

scores = input_scores[np.in1d(input_scores, all_scores)]
for level in [easy, med, hard]:
    matches = scores[np.in1d(scores, level)]
    print(len(matches), len(level), np.sum(matches))
+1

dicts :

scores = defaultdict(list)  # Keeps track of which numbers belong to categories
values = defaultdict(int)  # Keeps count of the number seen
for i in all_scores:
    if i > math.exp(50):
        values["easy"] += 1
        scores[i] = "easy"
    elif i > math.exp(10):
        values["medium"] += 1
        scores[i] = "medium"
    else:
        values["hard"] += 1
        scores[i] = "hard"

input_scores = [0.0, 2.7997409854370188e+23, 0.0, 6.7401171871631936e+22, 0.0, 0.0, 8.6584582327404103e+23, 0.0, 2.3071939602212471e+24, 0.0, 0.0, 17489013.841076534, 11806185.6660164, 0.0, 8546268.728385007, 0.0, 31828243.073495708, 51740168.152000979, 0.0, 22334836.315934014, 4354.0, 7474.0, 4354.0, 4030.0, 0.0, 8635.0, 0.0, 0.0, 0.0, 8479.0]

# Find the catagories of your inputs
r = [(scores[i], i) for i in input_scores if i in scores]

# Join your catagories to get the counts
res = defaultdict(list)
for k, v in r:
    res[k].append(v)

for k, v in res.items():
    print k, len(v), values[k], sum(v)



>>> medium 6 10 143744715.777
hard 6 10 37326.0
easy 4 10 3.52041505391e+24
+1

, , ( ). , Paul Panzer .:)

    def MatchesLenghtSums(L, lst):
        """
        Compares a list, lst with a list of lists, L. If elements of lst are in L 
        Returns matching elements of lst, lenght of unpacked L, sum of lst  
        Precondition: len(L) = 3"""

        # unpack L
        easy, medium, hard = L
        # traverse lst and find if there are matching elements between lst and 
        # unpacked lists
        easyA = [e for e in lst if e in easy]
        mediumB = [m for m in lst if m in medium]
        hardC = [h for h in lst if h in hard]

        return "(Easy Matches {} Lenght {} sum {}) (Medium Matches {} Length {} sum {}) (Hard Matches {} Lenght {} sum {})".format(
                len(easyA), len(easy), sum(easyA), len(mediumB), 
        len(medium), sum(mediumB), len(hardC), len(hard), sum(hardC))

L = [easy, med, hard]
lst = input_scores
MatchesLenghtSums(L, lst)

>>>'(Easy Matches 4 Lenght 10 sum 3.520415053910622e+24) (Medium Matches 6 Length 10 sum 143744715.77690864) (Hard Matches 6 Lenght 10 sum 37326.0)'
0

Source: https://habr.com/ru/post/1672778/


All Articles