diff options
Diffstat (limited to 'dateutil/parser/_parser.py')
-rw-r--r-- | dateutil/parser/_parser.py | 88 |
1 files changed, 67 insertions, 21 deletions
diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py index 0eac592..0da0f3e 100644 --- a/dateutil/parser/_parser.py +++ b/dateutil/parser/_parser.py @@ -40,7 +40,7 @@ from calendar import monthrange from io import StringIO import six -from six import binary_type, integer_types, text_type +from six import integer_types, text_type from decimal import Decimal @@ -63,7 +63,7 @@ class _timelex(object): if six.PY2: # In Python 2, we can't duck type properly because unicode has # a 'decode' function, and we'd be double-decoding - if isinstance(instream, (binary_type, bytearray)): + if isinstance(instream, (bytes, bytearray)): instream = instream.decode() else: if getattr(instream, 'decode', None) is not None: @@ -291,7 +291,7 @@ class parserinfo(object): ("s", "second", "seconds")] AMPM = [("am", "a"), ("pm", "p")] - UTCZONE = ["UTC", "GMT", "Z"] + UTCZONE = ["UTC", "GMT", "Z", "z"] PERTAIN = ["of"] TZOFFSET = {} # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", @@ -364,13 +364,23 @@ class parserinfo(object): return self.TZOFFSET.get(name) def convertyear(self, year, century_specified=False): + """ + Converts two-digit years to year within [-50, 49] + range of self._year (current local time) + """ + + # Function contract is that the year is always positive + assert year >= 0 + if year < 100 and not century_specified: + # assume current century to start year += self._century - if abs(year - self._year) >= 50: - if year < self._year: - year += 100 - else: - year -= 100 + + if year >= self._year + 50: # if too far in future + year -= 100 + elif year < self._year - 50: # if too far in past + year += 100 + return year def validate(self, res): @@ -378,7 +388,8 @@ class parserinfo(object): if res.year is not None: res.year = self.convertyear(res.year, res.century_specified) - if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z': + if ((res.tzoffset == 0 and not res.tzname) or + (res.tzname == 'Z' or res.tzname == 'z')): res.tzname = "UTC" res.tzoffset = 0 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): @@ -448,10 +459,37 @@ class _ymd(list): raise ValueError('Year is already set') self.ystridx = len(self) - 1 + def _resolve_from_stridxs(self, strids): + """ + Try to resolve the identities of year/month/day elements using + ystridx, mstridx, and dstridx, if enough of these are specified. + """ + if len(self) == 3 and len(strids) == 2: + # we can back out the remaining stridx value + missing = [x for x in range(3) if x not in strids.values()] + key = [x for x in ['y', 'm', 'd'] if x not in strids] + assert len(missing) == len(key) == 1 + key = key[0] + val = missing[0] + strids[key] = val + + assert len(self) == len(strids) # otherwise this should not be called + out = {key: self[strids[key]] for key in strids} + return (out.get('y'), out.get('m'), out.get('d')) + def resolve_ymd(self, yearfirst, dayfirst): len_ymd = len(self) year, month, day = (None, None, None) + strids = (('y', self.ystridx), + ('m', self.mstridx), + ('d', self.dstridx)) + + strids = {key: val for key, val in strids if val is not None} + if (len(self) == len(strids) > 0 or + (len(self) == 3 and len(strids) == 2)): + return self._resolve_from_stridxs(strids) + mstridx = self.mstridx if len_ymd > 3: @@ -460,13 +498,17 @@ class _ymd(list): # One member, or two members with a month string if mstridx is not None: month = self[mstridx] - del self[mstridx] + # since mstridx is 0 or 1, self[mstridx-1] always + # looks up the other element + other = self[mstridx - 1] + else: + other = self[0] if len_ymd > 1 or mstridx is None: - if self[0] > 31: - year = self[0] + if other > 31: + year = other else: - day = self[0] + day = other elif len_ymd == 2: # Two members with numbers @@ -1019,7 +1061,8 @@ class parser(object): tzname is None and tzoffset is None and len(token) <= 5 and - all(x in string.ascii_uppercase for x in token)) + (all(x in string.ascii_uppercase for x in token) + or token in self.info.UTCZONE)) def _ampm_valid(self, hour, ampm, fuzzy): """ @@ -1115,16 +1158,14 @@ class parser(object): tzdata = tzinfos(tzname, tzoffset) else: tzdata = tzinfos.get(tzname) - - if isinstance(tzdata, datetime.tzinfo): + # handle case where tzinfo is paased an options that returns None + # eg tzinfos = {'BRST' : None} + if isinstance(tzdata, datetime.tzinfo) or tzdata is None: tzinfo = tzdata elif isinstance(tzdata, text_type): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, integer_types): tzinfo = tz.tzoffset(tzname, tzdata) - else: - raise ValueError("Offset must be tzinfo subclass, " - "tz string, or int offset.") return tzinfo def _build_tzaware(self, naive, res, tzinfos): @@ -1160,7 +1201,7 @@ class parser(object): warnings.warn("tzname {tzname} identified but not understood. " "Pass `tzinfos` argument in order to correctly " "return a timezone-aware datetime. In a future " - "version, this raise an " + "version, this will raise an " "exception.".format(tzname=res.tzname), category=UnknownTimezoneWarning) aware = naive @@ -1202,10 +1243,15 @@ class parser(object): def _to_decimal(self, val): try: - return Decimal(val) + decimal_value = Decimal(val) + # See GH 662, edge case, infinite value should not be converted via `_to_decimal` + if not decimal_value.is_finite(): + raise ValueError("Converted decimal value is infinite or NaN") except Exception as e: msg = "Could not convert %s to decimal" % val six.raise_from(ValueError(msg), e) + else: + return decimal_value DEFAULTPARSER = parser() |