aboutsummaryrefslogtreecommitdiff
path: root/dateutil/parser/_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'dateutil/parser/_parser.py')
-rw-r--r--dateutil/parser/_parser.py88
1 files changed, 67 insertions, 21 deletions
diff --git a/dateutil/parser/_parser.py b/dateutil/parser/_parser.py
index 0eac592..0da0f3e 100644
--- a/dateutil/parser/_parser.py
+++ b/dateutil/parser/_parser.py
@@ -40,7 +40,7 @@ from calendar import monthrange
from io import StringIO
import six
-from six import binary_type, integer_types, text_type
+from six import integer_types, text_type
from decimal import Decimal
@@ -63,7 +63,7 @@ class _timelex(object):
if six.PY2:
# In Python 2, we can't duck type properly because unicode has
# a 'decode' function, and we'd be double-decoding
- if isinstance(instream, (binary_type, bytearray)):
+ if isinstance(instream, (bytes, bytearray)):
instream = instream.decode()
else:
if getattr(instream, 'decode', None) is not None:
@@ -291,7 +291,7 @@ class parserinfo(object):
("s", "second", "seconds")]
AMPM = [("am", "a"),
("pm", "p")]
- UTCZONE = ["UTC", "GMT", "Z"]
+ UTCZONE = ["UTC", "GMT", "Z", "z"]
PERTAIN = ["of"]
TZOFFSET = {}
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
@@ -364,13 +364,23 @@ class parserinfo(object):
return self.TZOFFSET.get(name)
def convertyear(self, year, century_specified=False):
+ """
+ Converts two-digit years to year within [-50, 49]
+ range of self._year (current local time)
+ """
+
+ # Function contract is that the year is always positive
+ assert year >= 0
+
if year < 100 and not century_specified:
+ # assume current century to start
year += self._century
- if abs(year - self._year) >= 50:
- if year < self._year:
- year += 100
- else:
- year -= 100
+
+ if year >= self._year + 50: # if too far in future
+ year -= 100
+ elif year < self._year - 50: # if too far in past
+ year += 100
+
return year
def validate(self, res):
@@ -378,7 +388,8 @@ class parserinfo(object):
if res.year is not None:
res.year = self.convertyear(res.year, res.century_specified)
- if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
+ if ((res.tzoffset == 0 and not res.tzname) or
+ (res.tzname == 'Z' or res.tzname == 'z')):
res.tzname = "UTC"
res.tzoffset = 0
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
@@ -448,10 +459,37 @@ class _ymd(list):
raise ValueError('Year is already set')
self.ystridx = len(self) - 1
+ def _resolve_from_stridxs(self, strids):
+ """
+ Try to resolve the identities of year/month/day elements using
+ ystridx, mstridx, and dstridx, if enough of these are specified.
+ """
+ if len(self) == 3 and len(strids) == 2:
+ # we can back out the remaining stridx value
+ missing = [x for x in range(3) if x not in strids.values()]
+ key = [x for x in ['y', 'm', 'd'] if x not in strids]
+ assert len(missing) == len(key) == 1
+ key = key[0]
+ val = missing[0]
+ strids[key] = val
+
+ assert len(self) == len(strids) # otherwise this should not be called
+ out = {key: self[strids[key]] for key in strids}
+ return (out.get('y'), out.get('m'), out.get('d'))
+
def resolve_ymd(self, yearfirst, dayfirst):
len_ymd = len(self)
year, month, day = (None, None, None)
+ strids = (('y', self.ystridx),
+ ('m', self.mstridx),
+ ('d', self.dstridx))
+
+ strids = {key: val for key, val in strids if val is not None}
+ if (len(self) == len(strids) > 0 or
+ (len(self) == 3 and len(strids) == 2)):
+ return self._resolve_from_stridxs(strids)
+
mstridx = self.mstridx
if len_ymd > 3:
@@ -460,13 +498,17 @@ class _ymd(list):
# One member, or two members with a month string
if mstridx is not None:
month = self[mstridx]
- del self[mstridx]
+ # since mstridx is 0 or 1, self[mstridx-1] always
+ # looks up the other element
+ other = self[mstridx - 1]
+ else:
+ other = self[0]
if len_ymd > 1 or mstridx is None:
- if self[0] > 31:
- year = self[0]
+ if other > 31:
+ year = other
else:
- day = self[0]
+ day = other
elif len_ymd == 2:
# Two members with numbers
@@ -1019,7 +1061,8 @@ class parser(object):
tzname is None and
tzoffset is None and
len(token) <= 5 and
- all(x in string.ascii_uppercase for x in token))
+ (all(x in string.ascii_uppercase for x in token)
+ or token in self.info.UTCZONE))
def _ampm_valid(self, hour, ampm, fuzzy):
"""
@@ -1115,16 +1158,14 @@ class parser(object):
tzdata = tzinfos(tzname, tzoffset)
else:
tzdata = tzinfos.get(tzname)
-
- if isinstance(tzdata, datetime.tzinfo):
+ # handle case where tzinfo is paased an options that returns None
+ # eg tzinfos = {'BRST' : None}
+ if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
tzinfo = tzdata
elif isinstance(tzdata, text_type):
tzinfo = tz.tzstr(tzdata)
elif isinstance(tzdata, integer_types):
tzinfo = tz.tzoffset(tzname, tzdata)
- else:
- raise ValueError("Offset must be tzinfo subclass, "
- "tz string, or int offset.")
return tzinfo
def _build_tzaware(self, naive, res, tzinfos):
@@ -1160,7 +1201,7 @@ class parser(object):
warnings.warn("tzname {tzname} identified but not understood. "
"Pass `tzinfos` argument in order to correctly "
"return a timezone-aware datetime. In a future "
- "version, this raise an "
+ "version, this will raise an "
"exception.".format(tzname=res.tzname),
category=UnknownTimezoneWarning)
aware = naive
@@ -1202,10 +1243,15 @@ class parser(object):
def _to_decimal(self, val):
try:
- return Decimal(val)
+ decimal_value = Decimal(val)
+ # See GH 662, edge case, infinite value should not be converted via `_to_decimal`
+ if not decimal_value.is_finite():
+ raise ValueError("Converted decimal value is infinite or NaN")
except Exception as e:
msg = "Could not convert %s to decimal" % val
six.raise_from(ValueError(msg), e)
+ else:
+ return decimal_value
DEFAULTPARSER = parser()