This is actually a fairly limited format, especially compared to all ISO 8601. Using a regular expression is basically the same as using strptime plus handling the offset itself (which strptime does not).
import datetime import re def parse_timestamp(s): """Returns (datetime, tz offset in minutes) or (None, None).""" m = re.match(""" ^ (?P<year>-?[0-9]{4}) - (?P<month>[0-9]{2}) - (?P<day>[0-9]{2}) T (?P<hour>[0-9]{2}) : (?P<minute>[0-9]{2}) : (?P<second>[0-9]{2}) (?P<microsecond>\.[0-9]{1,6})? (?P<tz> Z | (?P<tz_hr>[-+][0-9]{2}) : (?P<tz_min>[0-9]{2}) )? $ """, s, re.X) if m is not None: values = m.groupdict() if values["tz"] in ("Z", None): tz = 0 else: tz = int(values["tz_hr"]) * 60 + int(values["tz_min"]) if values["microsecond"] is None: values["microsecond"] = 0 else: values["microsecond"] = values["microsecond"][1:] values["microsecond"] += "0" * (6 - len(values["microsecond"])) values = dict((k, int(v)) for k, v in values.iteritems() if not k.startswith("tz")) try: return datetime.datetime(**values), tz except ValueError: pass return None, None
Does not handle applying a timezone offset to a time date, and negative years are a problem with datetime. Both of these issues will be fixed with another type of timestamp that handled the entire range required by xsd: dateTime.
valid = [ "2001-10-26T21:32:52", "2001-10-26T21:32:52+02:00", "2001-10-26T19:32:52Z", "2001-10-26T19:32:52+00:00", #"-2001-10-26T21:32:52", "2001-10-26T21:32:52.12679", ] for v in valid: print print v r = parse_timestamp(v) assert all(x is not None for x in r), v # quick and dirty, and slightly wrong # (doesn't distinguish +00:00 from Z among other issues) # but gets through the above cases tz = ":".join("%02d" % x for x in divmod(r[1], 60)) if r[1] else "Z" if r[1] > 0: tz = "+" + tz r = r[0].isoformat() + tz print r assert r.startswith(v[:len("CCYY-MM-DDThh:mm:ss")]), v print "---" invalid = [ "2001-10-26", "2001-10-26T21:32", "2001-10-26T25:32:52+02:00", "01-10-26T21:32", ] for v in invalid: print v r = parse_timestamp(v) assert all(x is None for x in r), v
Roger Pate
source share