How can I search for a C # dictionary using a series of keys?

I have a dictionary with data similar to this (the dictionary will contain about 100 thousand entries):

[1] -> 5
[7] -> 50
[30] -> 3
[1000] -> 1
[100000] -> 35

I also have a list of ranges (about 1000)

MyRanges
    Range
        LowerBoundInclusive -> 0
        UpperBoundExclusive -> 10
        Total
    Range
        LowerBoundInclusive -> 10
        UpperBoundExclusive -> 50
        Total
    Range
        LowerBoundInclusive -> 100
        UpperBoundExclusive -> 1000
        Total
    Range
        LowerBoundInclusive -> 1000
        UpperBoundExclusive -> 10000
        Total
    Range (the "other" range)
        LowerBoundInclusive -> null
        UpperBoundExclusive -> null
        Total

I need to calculate the total number in the dictionary for these ranges. For example, a range of 0-10 would be 55. These ranges can become very large, so I know that it doesn't make sense to just look for a dictionary for each value between these two ranges. My guess is that I should get a list of keys from the dictionary, sort it, then scroll through the ranges and search to find all the keys within the ranges. Is this the right way to do this? Is there an easy way to do this?

. . , . , , - , .

+4
4

- :

// Associate each value with the range of its key
var lookup = dictionary.ToLookup(
    kvp => ranges.FirstOrDefault(r => r.LowerBoundInclusive <= kvp.Key
                              && r.UpperBoundExclusive > kvp.Key),
    kvp => kvp.Value);

// Compute the total of values for each range
foreach (var r in ranges)
{
    r.Total = lookup[r].Sum();
}

( : , "" )

, , ... , .

:

// We're going to need finer control over the enumeration than foreach,
// so we manipulate the enumerator directly instead.
using (var dictEnumerator = dictionary.OrderBy(e => e.Key).GetEnumerator())
{
    // No point in going any further if the dictionary is empty
    if (dictEnumerator.MoveNext())
    {
        long othersTotal = 0; // total for items that don't fall in any range

        // The ranges need to be in ascending order
        // We want the "others" range at the end
        foreach (var range in ranges.OrderBy(r => r.LowerBoundInclusive ?? int.MaxValue))
        {
            if (range.LowerBoundInclusive == null && range.UpperBoundExclusive == null)
            {
                // this is the "others" range: use the precalculated total
                // of previous items that didn't fall in any other range
                range.Total = othersTotal;
            }
            else
            {
                range.Total = 0;
            }

            int lower = range.LowerBoundInclusive ?? int.MinValue;
            int upper = range.UpperBoundExclusive ?? int.MaxValue;

            bool endOfDict = false;
            var entry = dictEnumerator.Current;


            // keys that are below the current range don't belong to any range
            // (or they would have been included in the previous range)
            while (!endOfDict && entry.Key < lower)
            {
                othersTotal += entry.Value;
                endOfDict = !dictEnumerator.MoveNext();
                if (!endOfDict)
                    entry = dictEnumerator.Current;
            }

            // while the key in the the range, we keep adding the values
            while (!endOfDict  && lower <= entry.Key && upper > entry.Key)
            {
                range.Total += entry.Value;
                endOfDict = !dictEnumerator.MoveNext();
                if (!endOfDict)
                    entry = dictEnumerator.Current;
            }

            if (endOfDict) // No more entries in the dictionary, no need to go further
                break;

            // the value of the current entry is now outside the range,
            // so carry on to the next range
        }
    }
}

(, , , "" )

, , , , .

, , OrderBy on ranges.

+4

List<T> BinarySearch. , O(logn), O(qlogn), n - q :

//sorted List<int> data

foreach (var range in ranges)                             // O(q)
{
    int lowerBoundIndex = data.BinarySearch(range.Start); // O(logn)
    lowerIndex = lowerIndex < 0
        ? ~lowerIndex
        : lowerIndex;

    int upperBoundIndex = data.BinarySearch(range.End);   // O(logn)
    upperBoundIndex = upperBoundIndex < 0
        ? ~upperBoundIndex - 1
        : upperBoundIndex;

    var count = (upperBoundIndex >= lowerBoundIndex)
        ? (upperBoundIndex - lowerBoundIndex + 1)
        : 0;

    // print/store count for range
}

O(q*l), q - ( ), l - . , , .

, 100k , p.s.w.g .

+2

, .

, , . , (N + Q) * Log N, N - , Q - , .

: . , node. :

  • | 0 → 0 ( )
  • | 1 → 5 - 5
  • | 7 → 55 - 50 + 5
  • | 30 → 58 - 3 + 50 + 5
  • | 1000 → 59 - 1 + 3 + 50 + 5
  • | 100000 → 94 - 35 + 1 + 3 + 50 + 5

(.. {1, 7, 30, 1000, 100000} list) , , , .

, {0, 10}, :

  • 0, 0
  • 10, 55 7 ( 10)
  • 0 55 55.

11, 1000 :

  • 11, 7 55
  • 1000, 1000 59
  • 59-55 = 4 .
+2

. , ; , , . , , , . , :

List<KeyType> keys = dict.Keys.OrderBy(k => k).ToList();
List<RangeType> ranges = rangeList.OrderBy(r => r.LowerBound).ToList();

var iKey = 0;
var iRange = 0;
var count = 0;
// do a merge
while (iKey < keys.Count && iRange < ranges.Count)
{
    if (keys[iKey] < ranges[i].LowerBound)
    {
        // key is smaller than current range lower bound
        // move to next key

        // here you could add this key to the list of keys not found in any range
        ++iKey;
    }
    else if (keys[iKey] > ranges[i].UpperBound)
    {
        // key is larger than current range upper bound
        // move to next range
        ++iRange;
    }
    else
    {
        // key is within this range
        ++count;
        // add key to list of keys in this range
        ++iKey;
    }
}
// If there are leftover keys, then add them to the list of keys not found in a range
while (iKey < keys.Count)
{
    notFoundKeys.Add(keys[iKey]);
    ++iKey;
}

, .

O (n), n - .

, 100 000 , . , , . . , .

, , . - O (q log n), q - . log2 (100000) 16,6. , 1000 33 200 - , , .

:

foreach (var range in ranges)
{
    int firstIndex = keys.BinarySearch(range.LowerBound);

    // See explanation below
    if (firstIndex < 0) firstIndex = ~firstIndex;

    int lastIndex = keys.BinarySearch(range.UpperBound);
    if (lastIndex < 0) lastIndex = ~lastIndex-1;

    if (keys[firstIndex] >= range.LowerBound && keys[lastIndex] <= range.UpperBound)
        count += 1 + (lastIndex - firstIndex);
}

List.BinarySearch , . , , , .

, , , , . .

BinarySearch overload, . , , 0-50 27, 27 51-100 . , .

, , , , , - . , # ( List<T>.BinarySearch), , , 10 , , . , , , 5-10 . , , .

, . , . - , , , , . , , . , - 3000 , n/(2*log2(n)) 3,012.

Again, since you are talking relatively small numbers, any algorithm will most likely work well for you. If you hit this thing hundreds or thousands of times per second, then you will want to conduct a detailed analysis and perform time with representative data and a different number of ranges. If you rarely click on it, then just add something that works and worry about optimization if it becomes a performance issue.

+1
source

Source: https://habr.com/ru/post/1543624/


All Articles