Counting the number of values ​​between the interval

Is there an efficient way in python to count the time when an array of numbers is between certain intervals? the number of intervals that I will use can become quite large.

as:

mylist = [4,4,1,18,2,15,6,14,2,16,2,17,12,3,12,4,15,5,17]

some function(mylist, startpoints):
   # startpoints = [0,10,20]
   count values in range [0,9]
   count values in range [10-19]

output = [9,10]
+3
source share
4 answers

You will have to iterate over the list at least once.

The solution below works with any sequence / interval that implements a comparison ( <, >etc.) and uses bisectto find the correct point in the interval, so it is very fast.

It will work with floats, text, or any other. Just pass a sequence and a list of intervals.

from collections import defaultdict
from bisect import bisect_left

def count_intervals(sequence, intervals):
    count = defaultdict(int)
    intervals.sort()
    for item in sequence:
        pos = bisect_left(intervals, item)
        if pos == len(intervals):
            count[None] += 1
        else:
            count[intervals[pos]] += 1
    return count

data = [4,4,1,18,2,15,6,14,2,16,2,17,12,3,12,4,15,5,17]
print count_intervals(data, [10, 20])

Will be printed

defaultdict(<type 'int'>, {10: 10, 20: 9})

, 10 < 10 9 < 20.

+4

, , , , ( ). , - ,

intervals = [frozenzet(range(10)), frozenset(range(10, 20))]
counts = [0] * len(intervals)

for n in mylist:
  for i, inter in enumerate(intervals):
    if n in inter:
      counts[i] += 1

, break . >= 0 : -, , . startpoints,

indices = [sum(i > x for x in startpoints) - 1 for i in range(max(startpoints))]

counts = [0] * len(intervals)
for n in mylist:
  if 0 <= n < len(indices):
    counts[indices[n]] += 1

, < 0 ( -min(startpoints).

"" ( decimal.Decimal s ..), , . ...?

+1

, , .

import numpy as np
mylist = [4,4,1,18,2,15,6,14,2,16,2,17,12,3,12,4,15,5,17]
np.histogram(mylist, bins=[0,9,19])
+1

value_counts() pd.cut(), .

import pandas as pd   
mylist = [4,4,1,18,2,15,6,14,2,16,2,17,12,3,12,4,15,5,17]
split_mylist = pd.cut(mylist, [0, 9, 19]).value_counts(sort = False)
print(split_mylist)

:

(0, 10] 10 (10, 20] 9 dtype: int64

to_list(), ,

split_mylist = split_mylist.tolist()
print(split_mylist)

: [10, 9]

0

Source: https://habr.com/ru/post/1747907/