What is the most pythonic way to sort date sequences?

I have a list of bites representing a month in a year (not sorted, not sequential): ['1/2013', '7/2013', '2/2013', '3/2013', '4/2014', '12/2013', '10/2013', '11/2013', '1/2014', '2/2014']

I am looking for a Pythonic method to sort all of them and split each sequential sequence, as shown below:

[ ['1/2013', '2/2013', '3/2013', '4/2013'], 
  ['7/2013'], 
  ['10/2013', '11/2013', '12/2013', '1/2014', '2/2014'] 
]

Any ideas?

+4
source share
5 answers

Based on an example from the docs that shows how to find a run of consecutive numbers using itertools.groupby():

from itertools import groupby
from pprint import pprint

def month_number(date):
    month, year = date.split('/')
    return int(year) * 12 + int(month)

L = [[date for _, date in run]
     for _, run in groupby(enumerate(sorted(months, key=month_number)),
                           key=lambda (i, date): (i - month_number(date)))]
pprint(L)

The key to the solution is different from the range generated enumerate(), so all consecutive months appear in one group (run).

Output

[['1/2013', '2/2013', '3/2013'],
 ['7/2013'],
 ['10/2013', '11/2013', '12/2013', '1/2014', '2/2014'],
 ['4/2014']]
+4
source

, : ['1/2013', '2/2017'], .. .

from datetime import datetime
from dateutil.relativedelta import relativedelta

def areAdjacent(old, new):
    return old + relativedelta(months=1) == new

def parseDate(s):
    return datetime.strptime(s, '%m/%Y')

def generateGroups(seq):
    group = []
    last = None
    for (current, formatted) in sorted((parseDate(s), s) for s in seq):
        if group and last is not None and not areAdjacent(last, current):
            yield group
            group = []
        group.append(formatted)
        last = current
    if group:
        yield group

:

[['1/2013', '2/2013', '3/2013'], 
 ['7/2013'],
 ['10/2013', '11/2013', '12/2013', '1/2014', '2/2014'],
 ['4/2014']]
+2

, key value = , Python datetime lambda d: datetime.strptime(d, '%m/%Y'), L

>>> from datetime import datetime
>>> sorted(L, key = lambda d: datetime.strptime(d, '%m/%Y'))
['1/2013', '2/2013', '3/2013', '7/2013', '10/2013', 
 '11/2013', '12/2013', '1/2014', '2/2014', '4/2014'] # indented by hand

" /" " ", script ( ), L, ( ):

def is_cm(d1, d2):
    """ is consecutive month pair?
        : Assumption d1 is older day date than d2
    """
    d1 = datetime.strptime(d1, '%m/%Y')
    d2 = datetime.strptime(d2, '%m/%Y') 

    y1, y2 = d1.year, d2.year
    m1, m2 = d1.month, d2.month

    if y1 == y2: # if years are same d2 should be in next month
        return (m2 - m1) == 1
    elif (y2 - y1) == 1: # if years are consecutive
        return (m1 == 12 and m2 == 1)

:

>>> is_cm('1/2012', '2/2012')
True # yes, consecutive
>>> is_cm('12/2012', '1/2013')
True # yes, consecutive
>>> is_cm('1/2015', '12/2012') # None --> # not consecutive
>>> is_cm('12/2012', '2/2013')
False # not consecutive

:

def result(dl):
    """
    dl: dates list - a iterator of 'month/year' strings
    type: list of strings

    returns: list of lists of strings
    """
    #Sort list:
    s_dl = sorted(dl, key=lambda d: datetime.strptime(d, '%m/%Y'))
    r_dl = [] # list to be return
    # split list into list of lists
    t_dl = [s_dl[0]] # temp list
    for d in s_dl[1:]:
        if not is_cm(t_dl[-1], d): # check if months are not consecutive
            r_dl.append(t_dl)
            t_dl = [d]
        else:
            t_dl.append(d)
    return r_dl

result(L)

from datetime import datetime, , , , .

@9000 , script check @codepad.

+1

( ) :

year = dates[0].split('/')[1]
result = []
current = []
for i in range(1, 13):
    x = "%i/%s" % (i, year)
    if x in dates:
        current.append(x)
        if len(current) == 1:
            result.append(current)
    else:
        current = []
0

, itertools , . zip. @moe .

def parseAsPair(piece):
  """Transforms things like '7/2014' into (2014, 7) """
  m, y = piece.split('/')
  return (int(y), int(m))

def goesAfter(earlier, later):
  """Returns True iff earlier goes right after later."""
  earlier_y, earlier_m = earlier
  later_y, later_m = later
  if earlier_y == later_y:  # same year?
    return later_m == earlier_m + 1 # next month
  else: # next year? must be Dec -> Jan
    return later_y == earlier_y + 1 and earlier_m == 12 and later_m == 1

def groupSequentially(months):
  result = []  # final result
  if months:
    sorted_months = sorted(months, key=parseAsPair)
    span = [sorted_months[0]]  # current span; has at least the first month
    for earlier, later in zip(sorted_months, sorted_months[1:]):
      if not goesAfter(parseAsPair(earlier), parseAsPair(later)):
        # current span is over
        result.append(span)
        span = []
      span.append(later)
    # last span was not appended because sequence ended without breaking
    result.append(span)
  return result

:

months =['1/2013', '7/2013', '2/2013', '3/2013', '4/2014', '12/2013',
         '10/2013', '11/2013', '1/2014', '2/2014']

print groupSequentially(months)  # output wrapped manually

[['1/2013', '2/2013', '3/2013'], 
 ['7/2013'], 
 ['10/2013', '11/2013', '12/2013', '1/2014', '2/2014'], 
 ['4/2014']]

We could save a little performance and cognitive load if we displayed parseAsPairthe list at the very end. Then any call parseAsPaircan be removed from groupSequentially, but we will again have to convert the result to strings.

0
source

Source: https://habr.com/ru/post/1536535/


All Articles