Merge 3.2: Issue #13703 plus some related test suite fixes.

author: Georg Brandl <georg@python.org> 2012-02-21 00:33:36 +0100
committer: Georg Brandl <georg@python.org> 2012-02-21 00:33:36 +0100
commit: 2fb477c0f0284439d40cb3f46eea45ef42446e53 (patch)
tree: c8df3747d511256d56ca4af046db7915b5c06096
parent: b5c793a0b349cb02003433c30a410595b224079f (diff)
parent: 9edceb3e591063f382ae82e14313813ffc1af0bf (diff)
download: cpython3-2fb477c0f0284439d40cb3f46eea45ef42446e53.tar.gz
38 files changed, 706 insertions, 174 deletions
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index c2b7423e0c..3af43543c4 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -252,11 +252,15 @@ always available.
    :const:`verbose`              :option:`-v`
    :const:`bytes_warning`        :option:`-b`
    :const:`quiet`                :option:`-q`
+   :const:`hash_randomization`   :option:`-R`
    ============================= =============================
 
    .. versionchanged:: 3.2
       Added ``quiet`` attribute for the new :option:`-q` flag.
 
+   .. versionadded:: 3.2.3
+      The ``hash_randomization`` attribute.
+
    .. versionchanged:: 3.3
       Removed obsolete ``division_warning`` attribute.
 
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
index 55fd76bd79..85953ad1e2 100644
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -1277,6 +1277,8 @@ Basic customization
    inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__`
    had been explicitly set to :const:`None`.
 
+   See also the :option:`-R` command-line option.
+
 
 .. method:: object.__bool__(self)
 
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 8a02b764ea..b97dbcdf32 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -24,7 +24,7 @@ Command line
 
 When invoking Python, you may specify any of these options::
 
-    python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args]
+    python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args]
 
 The most common use case is, of course, a simple invocation of a script::
 
@@ -227,6 +227,29 @@ Miscellaneous options
    .. versionadded:: 3.2
 
 
+.. cmdoption:: -R
+
+   Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes
+   and datetime objects are "salted" with an unpredictable random value.
+   Although they remain constant within an individual Python process, they are
+   not predictable between repeated invocations of Python.
+
+   This is intended to provide protection against a denial-of-service caused by
+   carefully-chosen inputs that exploit the worst case performance of a dict
+   insertion, O(n^2) complexity.  See
+   http://www.ocert.org/advisories/ocert-2011-003.html for details.
+
+   Changing hash values affects the order in which keys are retrieved from a
+   dict.  Although Python has never made guarantees about this ordering (and it
+   typically varies between 32-bit and 64-bit builds), enough real-world code
+   implicitly relies on this non-guaranteed behavior that the randomization is
+   disabled by default.
+
+   See also :envvar:`PYTHONHASHSEED`.
+
+   .. versionadded:: 3.2.3
+
+
 .. cmdoption:: -s
 
    Don't add the :data:`user site-packages directory <site.USER_SITE>` to
@@ -352,6 +375,7 @@ Options you shouldn't use
 
 .. _Jython: http://jython.org
 
+
 .. _using-on-envvars:
 
 Environment variables
@@ -460,6 +484,27 @@ These environment variables influence Python's behavior.
    option.
 
 
+.. envvar:: PYTHONHASHSEED
+
+   If this variable is set to ``random``, the effect is the same as specifying
+   the :option:`-R` option: a random value is used to seed the hashes of str,
+   bytes and datetime objects.
+
+   If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed
+   seed for generating the hash() of the types covered by the hash
+   randomization.
+
+   Its purpose is to allow repeatable hashing, such as for selftests for the
+   interpreter itself, or to allow a cluster of python processes to share hash
+   values.
+
+   The integer must be a decimal number in the range [0,4294967295].  Specifying
+   the value 0 will lead to the same hash values as when hash randomization is
+   disabled.
+
+   .. versionadded:: 3.2.3
+
+
 .. envvar:: PYTHONIOENCODING
 
    If this is set before running the interpreter, it overrides the encoding used
diff --git a/Include/object.h b/Include/object.h
index 844ff9f14a..c69becec51 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -554,6 +554,12 @@ PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
 PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
 #endif
 
+typedef struct {
+    Py_hash_t prefix;
+    Py_hash_t suffix;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
+
 /* Helper for passing objects to printf and the like */
 #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
 
diff --git a/Include/pydebug.h b/Include/pydebug.h
index 7173fe3339..97c2f8c425 100644
--- a/Include/pydebug.h
+++ b/Include/pydebug.h
@@ -19,6 +19,7 @@ PyAPI_DATA(int) Py_IgnoreEnvironmentFlag;
 PyAPI_DATA(int) Py_DontWriteBytecodeFlag;
 PyAPI_DATA(int) Py_NoUserSiteDirectory;
 PyAPI_DATA(int) Py_UnbufferedStdioFlag;
+PyAPI_DATA(int) Py_HashRandomizationFlag;
 
 /* this is a wrapper around getenv() that pays attention to
    Py_IgnoreEnvironmentFlag.  It should be used for getting variables like
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
index fc6c85434a..eeba1b7af1 100644
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -246,6 +246,8 @@ typedef void (*PyOS_sighandler_t)(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
 
+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
 
 #ifdef __cplusplus
 }
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 6d88931dcb..ba2bc1d342 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -31,7 +31,9 @@ Encoding basic Python object hierarchies::
 Compact encoding::
 
     >>> import json
-    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':'))
+    >>> from collections import OrderedDict
+    >>> mydict = OrderedDict([('4', 5), ('6', 7)])
+    >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
     '[1,2,3,{"4":5,"6":7}]'
 
 Pretty printing::
diff --git a/Lib/os.py b/Lib/os.py
index 301870cb48..fe6cb11b9c 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -852,23 +852,6 @@ try:
 except NameError: # statvfs_result may not exist
     pass
 
-if not _exists("urandom"):
-    def urandom(n):
-        """urandom(n) -> str
-
-        Return a string of n random bytes suitable for cryptographic use.
-
-        """
-        try:
-            _urandomfd = open("/dev/urandom", O_RDONLY)
-        except (OSError, IOError):
-            raise NotImplementedError("/dev/urandom (or equivalent) not found")
-        bs = b""
-        while len(bs) < n:
-            bs += read(_urandomfd, n - len(bs))
-        close(_urandomfd)
-        return bs
-
 # Supply os.popen()
 def popen(cmd, mode="r", buffering=-1):
     if not isinstance(cmd, str):
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index 85036aa7d8..5c02b3a735 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -1786,8 +1786,6 @@ class TestDateTime(TestDate):
         self.assertTrue(abs(from_timestamp - from_now) <= tolerance)
 
     def test_strptime(self):
-        import _strptime
-
         string = '2004-12-01 13:02:47.197'
         format = '%Y-%m-%d %H:%M:%S.%f'
         expected = _strptime._strptime_datetime(self.theclass, string, format)
diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py
index d2b7a59a30..bc12c7756f 100644
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase):
     def _reference(self):
         """Return a dictionary of values which are invariant by storage
         in the object under test."""
-        return {1:2, "key1":"value1", "key2":(1,2,3)}
+        return {"1": "2", "key1":"value1", "key2":(1,2,3)}
     def _empty_mapping(self):
         """Return an empty mapping object"""
         return self.type2test()
diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py
index e556eca52a..10ada6d0d1 100644
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -3,7 +3,6 @@
 
 import sys
 import os
-import re
 import os.path
 import tempfile
 import subprocess
@@ -20,11 +19,15 @@ def _assert_python(expected_success, *args, **env_vars):
     cmd_line = [sys.executable]
     if not env_vars:
         cmd_line.append('-E')
-    cmd_line.extend(args)
     # Need to preserve the original environment, for in-place testing of
     # shared library builds.
     env = os.environ.copy()
+    # But a special flag that can be set to override -- in this case, the
+    # caller is responsible to pass the full environment.
+    if env_vars.pop('__cleanenv', None):
+        env = {}
     env.update(env_vars)
+    cmd_line.extend(args)
     p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                          env=env)
diff --git a/Lib/test/support.py b/Lib/test/support.py
index 2c56e12ddf..47b94ca26f 100644
--- a/Lib/test/support.py
+++ b/Lib/test/support.py
@@ -1588,6 +1588,7 @@ def args_from_interpreter_flags():
     flag_opt_map = {
         'bytes_warning': 'b',
         'dont_write_bytecode': 'B',
+        'hash_randomization': 'R',
         'ignore_environment': 'E',
         'no_user_site': 's',
         'no_site': 'S',
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 70dfb173b7..01af9b9352 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -324,6 +324,22 @@ class CmdLineTest(unittest.TestCase):
     def test_no_std_streams(self):
         self._test_no_stdio(['stdin', 'stdout', 'stderr'])
 
+    def test_hash_randomization(self):
+        # Verify that -R enables hash randomization:
+        self.verify_valid_flag('-R')
+        hashes = []
+        for i in range(2):
+            code = 'print(hash("spam"))'
+            rc, out, err = assert_python_ok('-R', '-c', code)
+            self.assertEqual(rc, 0)
+            hashes.append(out)
+        self.assertNotEqual(hashes[0], hashes[1])
+
+        # Verify that sys.flags contains hash_randomization
+        code = 'import sys; print("random is", sys.flags.hash_randomization)'
+        rc, out, err = assert_python_ok('-R', '-c', code)
+        self.assertEqual(rc, 0)
+        self.assertIn(b'random is 1', out)
 
 def test_main():
     test.support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py
index ded2aa93ab..d9ddb32363 100644
--- a/Lib/test/test_datetime.py
+++ b/Lib/test/test_datetime.py
@@ -1,7 +1,9 @@
 import unittest
 import sys
 from test.support import import_fresh_module, run_unittest
+
 TESTS = 'test.datetimetester'
+
 # XXX: import_fresh_module() is supposed to leave sys.module cache untouched,
 # XXX: but it does not, so we have to save and restore it ourselves.
 save_sys_modules = sys.modules.copy()
@@ -15,28 +17,32 @@ finally:
     sys.modules.update(save_sys_modules)
 test_modules = [pure_tests, fast_tests]
 test_suffixes = ["_Pure", "_Fast"]
+# XXX(gb) First run all the _Pure tests, then all the _Fast tests.  You might
+# not believe this, but in spite of all the sys.modules trickery running a _Pure
+# test last will leave a mix of pure and native datetime stuff lying around.
+test_classes = []
 
 for module, suffix in zip(test_modules, test_suffixes):
     for name, cls in module.__dict__.items():
-        if isinstance(cls, type) and issubclass(cls, unittest.TestCase):
-            name += suffix
-            cls.__name__ = name
-            globals()[name] = cls
-            def setUp(self, module=module, setup=cls.setUp):
-                self._save_sys_modules = sys.modules.copy()
-                sys.modules[TESTS] = module
-                sys.modules['datetime'] = module.datetime_module
-                sys.modules['_strptime'] = module._strptime
-                setup(self)
-            def tearDown(self, teardown=cls.tearDown):
-                teardown(self)
-                sys.modules.clear()
-                sys.modules.update(self._save_sys_modules)
-            cls.setUp = setUp
-            cls.tearDown = tearDown
+        if not (isinstance(cls, type) and issubclass(cls, unittest.TestCase)):
+            continue
+        cls.__name__ = name + suffix
+        @classmethod
+        def setUpClass(cls_, module=module):
+            cls_._save_sys_modules = sys.modules.copy()
+            sys.modules[TESTS] = module
+            sys.modules['datetime'] = module.datetime_module
+            sys.modules['_strptime'] = module._strptime
+        @classmethod
+        def tearDownClass(cls_):
+            sys.modules.clear()
+            sys.modules.update(cls_._save_sys_modules)
+        cls.setUpClass = setUpClass
+        cls.tearDownClass = tearDownClass
+        test_classes.append(cls)
 
 def test_main():
-    run_unittest(__name__)
+    run_unittest(*test_classes)
 
 if __name__ == "__main__":
     test_main()
diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py
index ce96ce465e..30a39f7b89 100755
--- a/Lib/test/test_dbm_gnu.py
+++ b/Lib/test/test_dbm_gnu.py
@@ -53,7 +53,7 @@ class TestGdbm(unittest.TestCase):
         all = set(gdbm.open_flags)
         # Test standard flags (presumably "crwn").
         modes = all - set('fsu')
-        for mode in modes:
+        for mode in sorted(modes):  # put "c" mode first
             self.g = gdbm.open(filename, mode)
             self.g.close()
 
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index be38007982..1506fe9f6e 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -350,12 +350,13 @@ Variable names:
    6: args
    7: kwds
 Cell variables:
-   0: e
-   1: d
-   2: f
-   3: y
-   4: x
-   5: z"""
+   0: [edfxyz]
+   1: [edfxyz]
+   2: [edfxyz]
+   3: [edfxyz]
+   4: [edfxyz]
+   5: [edfxyz]"""
+# NOTE: the order of the cell variables above depends on dictionary order!
 
 co_tricky_nested_f = tricky.__func__.__code__.co_consts[1]
 
@@ -374,12 +375,12 @@ Names:
 Variable names:
    0: c
 Free variables:
-   0: e
-   1: d
-   2: f
-   3: y
-   4: x
-   5: z"""
+   0: [edfxyz]
+   1: [edfxyz]
+   2: [edfxyz]
+   3: [edfxyz]
+   4: [edfxyz]
+   5: [edfxyz]"""
 
 code_info_expr_str = """\
 Name:              <module>
diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
index d507a129ce..c4c4803000 100644
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -52,13 +52,18 @@ class DebuggerTests(unittest.TestCase):
 
     """Test that the debugger can debug Python."""
 
-    def run_gdb(self, *args):
+    def run_gdb(self, *args, **env_vars):
         """Runs gdb with the command line given by *args.
 
         Returns its stdout, stderr
         """
+        if env_vars:
+            env = os.environ.copy()
+            env.update(env_vars)
+        else:
+            env = None
         out, err = subprocess.Popen(
-            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env,
             ).communicate()
         return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace')
 
@@ -118,7 +123,7 @@ class DebuggerTests(unittest.TestCase):
         # print ' '.join(args)
 
         # Use "args" to invoke gdb, capturing stdout, stderr:
-        out, err = self.run_gdb(*args)
+        out, err = self.run_gdb(*args, PYTHONHASHSEED='0')
 
         # Ignore some noise on stderr due to the pending breakpoint:
         err = err.replace('Function "%s" not defined.\n' % breakpoint, '')
@@ -207,7 +212,8 @@ class PrettyPrintTests(DebuggerTests):
         'Verify the pretty-printing of dictionaries'
         self.assertGdbRepr({})
         self.assertGdbRepr({'foo': 'bar'})
-        self.assertGdbRepr({'foo': 'bar', 'douglas':42})
+        self.assertGdbRepr({'foo': 'bar', 'douglas': 42},
+                           "{'foo': 'bar', 'douglas': 42}")
 
     def test_lists(self):
         'Verify the pretty-printing of lists'
@@ -269,8 +275,8 @@ class PrettyPrintTests(DebuggerTests):
     def test_sets(self):
         'Verify the pretty-printing of sets'
         self.assertGdbRepr(set())
-        self.assertGdbRepr(set(['a', 'b']))
-        self.assertGdbRepr(set([4, 5, 6]))
+        self.assertGdbRepr(set(['a', 'b']), "{'a', 'b'}")
+        self.assertGdbRepr(set([4, 5, 6]), "{4, 5, 6}")
 
         # Ensure that we handle sets containing the "dummy" key value,
         # which happens on deletion:
@@ -282,8 +288,8 @@ id(s)''')
     def test_frozensets(self):
         'Verify the pretty-printing of frozensets'
         self.assertGdbRepr(frozenset())
-        self.assertGdbRepr(frozenset(['a', 'b']))
-        self.assertGdbRepr(frozenset([4, 5, 6]))
+        self.assertGdbRepr(frozenset(['a', 'b']), "frozenset({'a', 'b'})")
+        self.assertGdbRepr(frozenset([4, 5, 6]), "frozenset({4, 5, 6})")
 
     def test_exceptions(self):
         # Test a RuntimeError
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
index 779e4850ec..385efed7b3 100644
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -3,10 +3,16 @@
 #
 # Also test that hash implementations are inherited as expected
 
+import datetime
+import os
+import sys
 import unittest
 from test import support
+from test.script_helper import assert_python_ok
 from collections import Hashable
 
+IS_64BIT = sys.maxsize > 2**32
+
 
 class HashEqualityTestCase(unittest.TestCase):
 
@@ -117,10 +123,92 @@ class HashBuiltinsTestCase(unittest.TestCase):
         for obj in self.hashes_to_check:
             self.assertEqual(hash(obj), _default_hash(obj))
 
+class HashRandomizationTests(unittest.TestCase):
+
+    # Each subclass should define a field "repr_", containing the repr() of
+    # an object to be tested
+
+    def get_hash_command(self, repr_):
+        return 'print(hash(%s))' % repr_
+
+    def get_hash(self, repr_, seed=None):
+        env = os.environ.copy()
+        env['__cleanenv'] = True  # signal to assert_python not to do a copy
+                                  # of os.environ on its own
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        else:
+            env.pop('PYTHONHASHSEED', None)
+        out = assert_python_ok(
+            '-c', self.get_hash_command(repr_),
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash(self.repr_, seed='random')
+        run2 = self.get_hash(self.repr_, seed='random')
+        self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        if IS_64BIT:
+            known_hash_of_obj = 1453079729188098211
+        else:
+            known_hash_of_obj = -1600925533
+
+        # Randomization is disabled by default:
+        self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+        # It can also be disabled by setting the seed to 0:
+        self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        # Note that all types share the same values:
+        if IS_64BIT:
+            h = -4410911502303878509
+        else:
+            h = -206076799
+        self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr('abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr(b'abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(b""), 0)
+
+class DatetimeTests(HashRandomizationTests):
+    def get_hash_command(self, repr_):
+        return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests):
+    repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests):
+    repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests):
+    repr_ = repr(datetime.time(0))
+
+
 def test_main():
     support.run_unittest(HashEqualityTestCase,
-                              HashInheritanceTestCase,
-                              HashBuiltinsTestCase)
+                         HashInheritanceTestCase,
+                         HashBuiltinsTestCase,
+                         StrHashRandomizationTests,
+                         BytesHashRandomizationTests,
+                         DatetimeDateTests,
+                         DatetimeDatetimeTests,
+                         DatetimeTimeTests)
 
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index c5dbb95629..a0f13fd790 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -26,6 +26,7 @@ try:
     import threading
 except ImportError:
     threading = None
+from test.script_helper import assert_python_ok
 
 os.stat_float_times(True)
 st = os.stat(__file__)
@@ -794,14 +795,33 @@ class DevNullTests(unittest.TestCase):
             self.assertEqual(f.read(), b'')
 
 class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(0)), 0)
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)
 
 @contextlib.contextmanager
 def _execvpe_mockup(defpath=None):
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
index 07bfe0657e..6642440dea 100644
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -733,6 +733,17 @@ class TestBasicOps(unittest.TestCase):
         if self.repr is not None:
             self.assertEqual(repr(self.set), self.repr)
 
+    def check_repr_against_values(self):
+        text = repr(self.set)
+        self.assertTrue(text.startswith('{'))
+        self.assertTrue(text.endswith('}'))
+
+        result = text[1:-1].split(', ')
+        result.sort()
+        sorted_repr_values = [repr(value) for value in self.values]
+        sorted_repr_values.sort()
+        self.assertEqual(result, sorted_repr_values)
+
     def test_print(self):
         try:
             fo = open(support.TESTFN, "w")
@@ -891,7 +902,9 @@ class TestBasicOpsString(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 3
-        self.repr   = "{'a', 'c', 'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()
 
 #------------------------------------------------------------------------------
 
@@ -902,7 +915,9 @@ class TestBasicOpsBytes(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 3
-        self.repr   = "{b'a', b'c', b'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()
 
 #------------------------------------------------------------------------------
 
@@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 4
-        self.repr   = "{'a', b'a', 'b', b'b'}"
 
     def tearDown(self):
         self._warning_filters.__exit__(None, None, None)
 
+    def test_repr(self):
+        self.check_repr_against_values()
+
 #==============================================================================
 
 def baditer():
diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py
index a6033a4b57..1f041c80ab 100644
--- a/Lib/test/test_strlit.py
+++ b/Lib/test/test_strlit.py
@@ -65,7 +65,7 @@ class TestLiterals(unittest.TestCase):
         sys.path.insert(0, self.tmpdir)
 
     def tearDown(self):
-        sys.path = self.save_path
+        sys.path[:] = self.save_path
         shutil.rmtree(self.tmpdir, ignore_errors=True)
 
     def test_template(self):
diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py
index 685857d014..5deec426db 100644
--- a/Lib/test/test_subprocess.py
+++ b/Lib/test/test_subprocess.py
@@ -1774,7 +1774,7 @@ class CommandsWithSpaces (BaseTestCase):
         self.with_spaces([sys.executable, self.fname, "ab cd"])
 
 
-class ContextManagerTests(ProcessTestCase):
+class ContextManagerTests(BaseTestCase):
 
     def test_pipe(self):
         with subprocess.Popen([sys.executable, "-c",
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index f065bf72e6..bf22df2269 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -513,7 +513,7 @@ class SysModuleTest(unittest.TestCase):
         attrs = ("debug",
                  "inspect", "interactive", "optimize", "dont_write_bytecode",
                  "no_user_site", "no_site", "ignore_environment", "verbose",
-                 "bytes_warning", "quiet")
+                 "bytes_warning", "quiet", "hash_randomization")
         for attr in attrs:
             self.assertTrue(hasattr(sys.flags, attr), attr)
             self.assertEqual(type(getattr(sys.flags, attr)), int, attr)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index e403bbf167..b2680edd42 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -13,6 +13,7 @@ import sys
 import tempfile
 
 from base64 import b64encode
+import collections
 
 def hexescape(char):
     """Escape char as RFC 2396 specifies"""
@@ -953,8 +954,9 @@ class urlencode_Tests(unittest.TestCase):
         self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
         self.assertEqual("a=None&a=a",
                          urllib.parse.urlencode({"a": [None, "a"]}, True))
+        data = collections.OrderedDict([("a", 1), ("b", 1)])
         self.assertEqual("a=a&a=b",
-                         urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
+                         urllib.parse.urlencode({"a": data}, True))
 
     def test_urlencode_encoding(self):
         # ASCII encoding. Expect %3F with errors="replace'
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index a6e7ee8e1c..ada0ca8788 100644..100755
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -769,7 +769,8 @@ class UrlParseTestCase(unittest.TestCase):
         # Other tests incidentally urlencode things; test non-covered cases:
         # Sequence and object values.
         result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
-        self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5')
+        # we cannot rely on ordering here
+        assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
 
         class Trivial:
             def __str__(self):
diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py
index df593cd710..2303e4cd46 100644
--- a/Lib/tkinter/test/test_ttk/test_functions.py
+++ b/Lib/tkinter/test/test_ttk/test_functions.py
@@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase):
             ('a', 'b', 'c')), ("test {a b} c", ()))
         # state spec and options
         self.assertEqual(ttk._format_elemcreate('image', False, 'test',
-            ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y")))
+            ('a', 'b'), a='x'), ("test a b", ("-a", "x")))
         # format returned values as a tcl script
         # state spec with multiple states and an option with a multivalue
         self.assertEqual(ttk._format_elemcreate('image', True, 'test',
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 46f461ad2b..a13d1c3649 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -322,6 +322,7 @@ PYTHON_OBJS=	\
 		Python/pystate.o \
 		Python/pythonrun.o \
 		Python/pytime.o \
+		Python/random.o \
 		Python/structmember.o \
 		Python/symtable.o \
 		Python/sysmodule.o \
diff --git a/Misc/NEWS b/Misc/NEWS
index 6e010b7a2f..1a6ce289fd 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -16,6 +16,11 @@ Core and Builtins
 - Issue #14051: Allow arbitrary attributes to be set of classmethod and
   staticmethod.
 
+- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED
+  environment variable, to provide an opt-in way to protect against denial of
+  service attacks due to hash collisions within the dict and set types.  Patch
+  by David Malcolm, based on work by Victor Stinner.
+
 - Issue #13020: Fix a reference leak when allocating a structsequence object
   fails.  Patch by Suman Saha.
 
diff --git a/Misc/python.man b/Misc/python.man
index eaa3ec7646..ef42c4ef94 100644
--- a/Misc/python.man
+++ b/Misc/python.man
@@ -37,6 +37,9 @@ python \- an interpreted, interactive, object-oriented programming language
 .B \-OO
 ]
 [
+.B \-R
+]
+[
 .B \-s
 ]
 [
@@ -148,6 +151,18 @@ Discard docstrings in addition to the \fB-O\fP optimizations.
 Do not print the version and copyright messages. These messages are 
 also suppressed in non-interactive mode.
 .TP
+.B \-R
+Turn on "hash randomization", so that the hash() values of str, bytes and
+datetime objects are "salted" with an unpredictable pseudo-random value.
+Although they remain constant within an individual Python process, they are
+not predictable between repeated invocations of Python.
+.IP
+This is intended to provide protection against a denial of service
+caused by carefully-chosen inputs that exploit the worst case performance
+of a dict insertion, O(n^2) complexity.  See
+http://www.ocert.org/advisories/ocert-2011-003.html
+for details.
+.TP
 .B \-s
 Don't add user site directory to sys.path.
 .TP
@@ -402,6 +417,20 @@ specifying \fB\-v\fP multiple times.
 .IP PYTHONWARNINGS
 If this is set to a comma-separated string it is equivalent to
 specifying the \fB\-W\fP option for each separate value.
+.IP PYTHONHASHSEED
+If this variable is set to "random", the effect is the same as specifying
+the \fB-R\fP option: a random value is used to seed the hashes of str,
+bytes and datetime objects.
+
+If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for
+generating the hash() of the types covered by the hash randomization.  Its
+purpose is to allow repeatable hashing, such as for selftests for the
+interpreter itself, or to allow a cluster of python processes to share hash
+values.
+
+The integer must be a decimal number in the range [0,4294967295].  Specifying
+the value 0 will lead to the same hash values as when hash randomization is
+disabled.
 .SH AUTHOR
 The Python Software Foundation: http://www.python.org/psf
 .SH INTERNET RESOURCES
diff --git a/Modules/main.c b/Modules/main.c
index d8c5172108..a820a9eb44 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -47,7 +47,7 @@ static wchar_t **orig_argv;
 static int  orig_argc;
 
 /* command line options */
-#define BASE_OPTS L"bBc:dEhiJm:OqsStuvVW:xX:?"
+#define BASE_OPTS L"bBc:dEhiJm:OqRsStuvVW:xX:?"
 
 #define PROGRAM_OPTS BASE_OPTS
 
@@ -73,6 +73,9 @@ static char *usage_2 = "\
 -O     : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\
 -OO    : remove doc-strings in addition to the -O optimizations\n\
 -q     : don't print version and copyright messages on interactive startup\n\
+-R     : use a pseudo-random salt to make hash() values of various types be\n\
+         unpredictable between separate invocations of the interpreter, as\n\
+         a defence against denial-of-service attacks\n\
 -s     : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\
 -S     : don't imply 'import site' on initialization\n\
 ";
@@ -101,8 +104,14 @@ static char *usage_5 =
 "               The default module search path uses %s.\n"
 "PYTHONCASEOK : ignore case in 'import' statements (Windows).\n"
 "PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n"
-"PYTHONFAULTHANDLER: dump the Python traceback on fatal errors.\n"
-;
+"PYTHONFAULTHANDLER: dump the Python traceback on fatal errors.\n\
+";
+static char *usage_6 = "\
+PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\
+   as specifying the :option:`-R` option: a random value is used to seed the\n\
+   hashes of str, bytes and datetime objects.  It can also be set to an integer\n\
+   in the range [0,4294967295] to get hash values with a predictable seed.\n\
+";
 
 static int
 usage(int exitcode, wchar_t* program)
@@ -118,6 +127,7 @@ usage(int exitcode, wchar_t* program)
         fputs(usage_3, f);
         fprintf(f, usage_4, DELIM);
         fprintf(f, usage_5, DELIM, PYTHONHOMEHELP);
+        fputs(usage_6, f);
     }
 #if defined(__VMS)
     if (exitcode == 0) {
@@ -431,6 +441,10 @@ Py_Main(int argc, wchar_t **argv)
             Py_QuietFlag++;
             break;
 
+        case 'R':
+            Py_HashRandomizationFlag++;
+            break;
+
         /* This space reserved for other options */
 
         default:
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 0553c761bd..dbace1a891 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -9317,82 +9317,6 @@ posix_getloadavg(PyObject *self, PyObject *noargs)
 }
 #endif
 
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
-              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
-              DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
-              BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
-   system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
-
-static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
-
-    /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
-        return PyErr_Format(PyExc_ValueError,
-                            "negative argument not allowed");
-
-    if (hCryptProv == 0) {
-        HINSTANCE hAdvAPI32 = NULL;
-        CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
-        /* Obtain handle to the DLL containing CryptoAPI
-           This should not fail         */
-        hAdvAPI32 = GetModuleHandleW(L"advapi32.dll");
-        if(hAdvAPI32 == NULL)
-            return win32_error("GetModuleHandle", NULL);
-
-        /* Obtain pointers to the CryptoAPI functions
-           This will fail on some early versions of Win95 */
-        pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
-                                        hAdvAPI32,
-                                        "CryptAcquireContextA");
-        if (pCryptAcquireContext == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptAcquireContextA not found");
-
-        pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
-                                        hAdvAPI32, "CryptGenRandom");
-        if (pCryptGenRandom == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptGenRandom not found");
-
-        /* Acquire context */
-        if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
-                                   PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
-            return win32_error("CryptAcquireContext", NULL);
-    }
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
-        if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
-                              PyBytes_AS_STRING(result))) {
-            Py_DECREF(result);
-            return win32_error("CryptGenRandom", NULL);
-        }
-    }
-    return result;
-}
-#endif
-
 PyDoc_STRVAR(device_encoding__doc__,
 "device_encoding(fd) -> str\n\n\
 Return a string describing the encoding of the device\n\
@@ -10490,6 +10414,36 @@ posix_flistxattr(PyObject *self, PyObject *args)
 #endif /* USE_XATTRS */
 
 
+PyDoc_STRVAR(posix_urandom__doc__,
+"urandom(n) -> str\n\n\
+Return n random bytes suitable for cryptographic use.");
+
+static PyObject *
+posix_urandom(PyObject *self, PyObject *args)
+{
+    Py_ssize_t size;
+    PyObject *result;
+    int ret;
+
+     /* Read arguments */
+    if (!PyArg_ParseTuple(args, "n:urandom", &size))
+        return NULL;
+    if (size < 0)
+        return PyErr_Format(PyExc_ValueError,
+                            "negative argument not allowed");
+    result = PyBytes_FromStringAndSize(NULL, size);
+    if (result == NULL)
+        return NULL;
+
+    ret = _PyOS_URandom(PyBytes_AS_STRING(result),
+                        PyBytes_GET_SIZE(result));
+    if (ret == -1) {
+        Py_DECREF(result);
+        return NULL;
+    }
+    return result;
+}
+
 /* Terminal size querying */
 
 static PyTypeObject TerminalSizeType;
@@ -10984,12 +10938,7 @@ static PyMethodDef posix_methods[] = {
 #ifdef HAVE_GETLOADAVG
     {"getloadavg",      posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
 #endif
- #ifdef MS_WINDOWS
-    {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
-    {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+    {"urandom",         posix_urandom,   METH_VARARGS, posix_urandom__doc__},
 #ifdef HAVE_SETRESUID
     {"setresuid",       posix_setresuid, METH_VARARGS, posix_setresuid__doc__},
 #endif
diff --git a/Objects/object.c b/Objects/object.c
index 81348258a4..bb18d47191 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -759,10 +759,19 @@ _Py_HashBytes(unsigned char *p, Py_ssize_t len)
     Py_uhash_t x;
     Py_ssize_t i;
 
-    x = (Py_uhash_t) *p << 7;
+    /*
+      We make the hash of the empty string be 0, rather than using
+      (prefix ^ suffix), since this slightly obfuscates the hash secret
+    */
+    if (len == 0) {
+        return 0;
+    }
+    x = (Py_uhash_t) _Py_HashSecret.prefix;
+    x ^= (Py_uhash_t) *p << 7;
     for (i = 0; i < len; i++)
         x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
     x ^= (Py_uhash_t) len;
+    x ^= (Py_uhash_t) _Py_HashSecret.suffix;
     if (x == -1)
         x = -2;
     return x;
@@ -776,6 +785,8 @@ PyObject_HashNotImplemented(PyObject *v)
     return -1;
 }
 
+_Py_HashSecret_t _Py_HashSecret;
+
 Py_hash_t
 PyObject_Hash(PyObject *v)
 {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 07d3eb8569..716ca3f26a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11221,11 +11221,12 @@ unicode_hash(PyObject *self)
     len = PyUnicode_GET_LENGTH(self);
 
     /* The hash function as a macro, gets expanded three times below. */
-#define HASH(P) \
-    x = (Py_uhash_t)*P << 7; \
-    while (--len >= 0) \
-        x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++;
+#define HASH(P)                                            \
+    x ^= (Py_uhash_t) *P << 7;                             \
+    while (--len >= 0)                                     \
+        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++;  \
 
+    x = (Py_uhash_t) _Py_HashSecret.prefix;
     switch (PyUnicode_KIND(self)) {
     case PyUnicode_1BYTE_KIND: {
         const unsigned char *c = PyUnicode_1BYTE_DATA(self);
@@ -11246,7 +11247,8 @@ unicode_hash(PyObject *self)
         break;
     }
     }
-    x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
+    x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self);
+    x ^= (Py_uhash_t) _Py_HashSecret.suffix;
 
     if (x == -1)
         x = -2;
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj
index 69a8ca8c7c..8b908aa7ab 100644
--- a/PCbuild/pythoncore.vcproj
+++ b/PCbuild/pythoncore.vcproj
@@ -1890,6 +1890,10 @@
 				RelativePath="..\Python\pythonrun.c"
 				>
 			</File>
+ 			<File
+				RelativePath="..\Python\random.c"
+				>
+			</File>
 			<File
 				RelativePath="..\Python\structmember.c"
 				>
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 44b817f546..54d39a5a91 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -73,6 +73,7 @@ extern int _PyLong_Init(void);
 extern void PyLong_Fini(void);
 extern int _PyFaulthandler_Init(void);
 extern void _PyFaulthandler_Fini(void);
+extern void _PyRandom_Init(void);
 
 #ifdef WITH_THREAD
 extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@@ -92,6 +93,7 @@ int Py_FrozenFlag; /* Needed by getpath.c */
 int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */
 int Py_NoUserSiteDirectory = 0; /* for -s and site.py */
 int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */
+int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */
 
 PyThreadState *_Py_Finalizing = NULL;
 
@@ -218,6 +220,12 @@ Py_InitializeEx(int install_sigs)
         Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p);
     if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
         Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
+    /* The variable is only tested for existence here; _PyRandom_Init will
+       check its value further. */
+    if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
+        Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);
+
+    _PyRandom_Init();
 
     interp = PyInterpreterState_New();
     if (interp == NULL)
diff --git a/Python/random.c b/Python/random.c
new file mode 100644
index 0000000000..01cd83aa56
--- /dev/null
+++ b/Python/random.c
@@ -0,0 +1,302 @@
+#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#else
+#include <fcntl.h>
+#endif
+
+static int random_initialized = 0;
+
+#ifdef MS_WINDOWS
+typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
+              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
+              DWORD dwFlags );
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
+              BYTE *pbBuffer );
+
+static CRYPTGENRANDOM pCryptGenRandom = NULL;
+/* This handle is never explicitly released. Instead, the operating
+   system will release it when the process terminates. */
+static HCRYPTPROV hCryptProv = 0;
+
+static int
+win32_urandom_init(int raise)
+{
+    HINSTANCE hAdvAPI32 = NULL;
+    CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
+
+    /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
+    hAdvAPI32 = GetModuleHandle("advapi32.dll");
+    if(hAdvAPI32 == NULL)
+        goto error;
+
+    /* Obtain pointers to the CryptoAPI functions. This will fail on some early
+       versions of Win95. */
+    pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
+                               hAdvAPI32, "CryptAcquireContextA");
+    if (pCryptAcquireContext == NULL)
+        goto error;
+
+    pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
+                                                     "CryptGenRandom");
+    if (pCryptGenRandom == NULL)
+        goto error;
+
+    /* Acquire context */
+    if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
+                               PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
+        goto error;
+
+    return 0;
+
+error:
+    if (raise)
+        PyErr_SetFromWindowsErr(0);
+    else
+        Py_FatalError("Failed to initialize Windows random API (CryptoGen)");
+    return -1;
+}
+
+/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
+   API. Return 0 on success, or -1 on error. */
+static int
+win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+    Py_ssize_t chunk;
+
+    if (hCryptProv == 0)
+    {
+        if (win32_urandom_init(raise) == -1)
+            return -1;
+    }
+
+    while (size > 0)
+    {
+        chunk = size > INT_MAX ? INT_MAX : size;
+        if (!pCryptGenRandom(hCryptProv, chunk, buffer))
+        {
+            /* CryptGenRandom() failed */
+            if (raise)
+                PyErr_SetFromWindowsErr(0);
+            else
+                Py_FatalError("Failed to initialized the randomized hash "
+                        "secret using CryptoGen)");
+            return -1;
+        }
+        buffer += chunk;
+        size -= chunk;
+    }
+    return 0;
+}
+#endif /* MS_WINDOWS */
+
+
+#ifdef __VMS
+/* Use openssl random routine */
+#include <openssl/rand.h>
+static int
+vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+    if (RAND_pseudo_bytes(buffer, size) < 0) {
+        if (raise) {
+            PyErr_Format(PyExc_ValueError,
+                         "RAND_pseudo_bytes");
+        } else {
+            Py_FatalError("Failed to initialize the randomized hash "
+                          "secret using RAND_pseudo_bytes");
+        }
+        return -1;
+    }
+    return 0;
+}
+#endif /* __VMS */
+
+
+#if !defined(MS_WINDOWS) && !defined(__VMS)
+
+/* Read size bytes from /dev/urandom into buffer.
+   Call Py_FatalError() on error. */
+static void
+dev_urandom_noraise(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    assert (0 < size);
+
+    fd = open("/dev/urandom", O_RDONLY);
+    if (fd < 0)
+        Py_FatalError("Failed to open /dev/urandom");
+
+    while (0 < size)
+    {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+        {
+            /* stop on error or if read(size) returned 0 */
+            Py_FatalError("Failed to read bytes from /dev/urandom");
+            break;
+        }
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    }
+    close(fd);
+}
+
+/* Read size bytes from /dev/urandom into buffer.
+   Return 0 on success, raise an exception and return -1 on error. */
+static int
+dev_urandom_python(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    if (size <= 0)
+        return 0;
+
+    Py_BEGIN_ALLOW_THREADS
+    fd = open("/dev/urandom", O_RDONLY);
+    Py_END_ALLOW_THREADS
+    if (fd < 0)
+    {
+        PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");
+        return -1;
+    }
+
+    Py_BEGIN_ALLOW_THREADS
+    do {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+            break;
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    } while (0 < size);
+    Py_END_ALLOW_THREADS
+
+    if (n <= 0)
+    {
+        /* stop on error or if read(size) returned 0 */
+        if (n < 0)
+            PyErr_SetFromErrno(PyExc_OSError);
+        else
+            PyErr_Format(PyExc_RuntimeError,
+                         "Failed to read %zi bytes from /dev/urandom",
+                         size);
+        close(fd);
+        return -1;
+    }
+    close(fd);
+    return 0;
+}
+#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */
+
+/* Fill buffer with pseudo-random bytes generated by a linear congruent
+   generator (LCG):
+
+       x(n+1) = (x(n) * 214013 + 2531011) % 2^32
+
+   Use bits 23..16 of x(n) to generate a byte. */
+static void
+lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
+{
+    size_t index;
+    unsigned int x;
+
+    x = x0;
+    for (index=0; index < size; index++) {
+        x *= 214013;
+        x += 2531011;
+        /* modulo 2 ^ (8 * sizeof(int)) */
+        buffer[index] = (x >> 16) & 0xff;
+    }
+}
+
+/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
+   use, from the operating random number generator (RNG).
+
+   Return 0 on success, raise an exception and return -1 on error. */
+int
+_PyOS_URandom(void *buffer, Py_ssize_t size)
+{
+    if (size < 0) {
+        PyErr_Format(PyExc_ValueError,
+                     "negative argument not allowed");
+        return -1;
+    }
+    if (size == 0)
+        return 0;
+
+#ifdef MS_WINDOWS
+    return win32_urandom((unsigned char *)buffer, size, 1);
+#else
+# ifdef __VMS
+    return vms_urandom((unsigned char *)buffer, size, 1);
+# else
+    return dev_urandom_python((char*)buffer, size);
+# endif
+#endif
+}
+
+void
+_PyRandom_Init(void)
+{
+    char *env;
+    void *secret = &_Py_HashSecret;
+    Py_ssize_t secret_size = sizeof(_Py_HashSecret);
+
+    if (random_initialized)
+        return;
+    random_initialized = 1;
+
+    /*
+      By default, hash randomization is disabled, and only
+      enabled if PYTHONHASHSEED is set to non-empty or if
+      "-R" is provided at the command line:
+    */
+    if (!Py_HashRandomizationFlag) {
+        /* Disable the randomized hash: */
+        memset(secret, 0, secret_size);
+        return;
+    }
+
+    /*
+      Hash randomization is enabled.  Generate a per-process secret,
+      using PYTHONHASHSEED if provided.
+    */
+
+    env = Py_GETENV("PYTHONHASHSEED");
+    if (env && *env != '\0' && strcmp(env, "random") != 0) {
+        char *endptr = env;
+        unsigned long seed;
+        seed = strtoul(env, &endptr, 10);
+        if (*endptr != '\0'
+            || seed > 4294967295UL
+            || (errno == ERANGE && seed == ULONG_MAX))
+        {
+            Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer "
+                          "in range [0; 4294967295]");
+        }
+        if (seed == 0) {
+            /* disable the randomized hash */
+            memset(secret, 0, secret_size);
+        }
+        else {
+            lcg_urandom(seed, (unsigned char*)secret, secret_size);
+        }
+    }
+    else {
+#ifdef MS_WINDOWS
+        (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+#else /* #ifdef MS_WINDOWS */
+# ifdef __VMS
+        vms_urandom((unsigned char *)secret, secret_size, 0);
+# else
+        dev_urandom_noraise((char*)secret, secret_size);
+# endif
+#endif
+    }
+}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 955219f228..c434b5a81e 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1332,6 +1332,7 @@ static PyStructSequence_Field flags_fields[] = {
     /* {"skip_first",                   "-x"}, */
     {"bytes_warning",           "-b"},
     {"quiet",                   "-q"},
+    {"hash_randomization",      "-R"},
     {0}
 };
 
@@ -1340,9 +1341,9 @@ static PyStructSequence_Desc flags_desc = {
     flags__doc__,       /* doc */
     flags_fields,       /* fields */
 #ifdef RISCOS
-    12
+    13
 #else
-    11
+    12
 #endif
 };
 
@@ -1375,6 +1376,7 @@ make_flags(void)
     /* SetFlag(skipfirstline); */
     SetFlag(Py_BytesWarningFlag);
     SetFlag(Py_QuietFlag);
+    SetFlag(Py_HashRandomizationFlag);
 #undef SetFlag
 
     if (PyErr_Occurred()) {
diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py
index f750e192a0..fb7ce5c07c 100755
--- a/Tools/scripts/run_tests.py
+++ b/Tools/scripts/run_tests.py
@@ -25,6 +25,7 @@ def main(regrtest_args):
             '-W', 'default',      # Warnings set to 'default'
             '-bb',                # Warnings about bytes/bytearray
             '-E',                 # Ignore environment variables
+            '-R',                 # Randomize hashing
             ]
     # Allow user-specified interpreter options to override our defaults.
     args.extend(test.support.args_from_interpreter_flags())
author	Georg Brandl <georg@python.org>	2012-02-21 00:33:36 +0100
committer	Georg Brandl <georg@python.org>	2012-02-21 00:33:36 +0100
commit	2fb477c0f0284439d40cb3f46eea45ef42446e53 (patch)
tree	c8df3747d511256d56ca4af046db7915b5c06096
parent	b5c793a0b349cb02003433c30a410595b224079f (diff)
parent	9edceb3e591063f382ae82e14313813ffc1af0bf (diff)
download	cpython3-2fb477c0f0284439d40cb3f46eea45ef42446e53.tar.gz