diff options
Diffstat (limited to 'lib/python2.7/pickle.py')
-rw-r--r-- | lib/python2.7/pickle.py | 1391 |
1 files changed, 0 insertions, 1391 deletions
diff --git a/lib/python2.7/pickle.py b/lib/python2.7/pickle.py deleted file mode 100644 index 508e858..0000000 --- a/lib/python2.7/pickle.py +++ /dev/null @@ -1,1391 +0,0 @@ -"""Create portable serialized representations of Python objects. - -See module cPickle for a (much) faster implementation. -See module copy_reg for a mechanism for registering custom picklers. -See module pickletools source for extensive comments. - -Classes: - - Pickler - Unpickler - -Functions: - - dump(object, file) - dumps(object) -> string - load(file) -> object - loads(string) -> object - -Misc variables: - - __version__ - format_version - compatible_formats - -""" - -__version__ = "$Revision: 72223 $" # Code version - -from types import * -from copy_reg import dispatch_table -from copy_reg import _extension_registry, _inverted_registry, _extension_cache -import marshal -import sys -import struct -import re - -__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", - "Unpickler", "dump", "dumps", "load", "loads"] - -# These are purely informational; no code uses these. -format_version = "2.0" # File format version we write -compatible_formats = ["1.0", # Original protocol 0 - "1.1", # Protocol 0 with INST added - "1.2", # Original protocol 1 - "1.3", # Protocol 1 with BINFLOAT added - "2.0", # Protocol 2 - ] # Old format versions we can read - -# Keep in synch with cPickle. This is the highest protocol number we -# know how to read. -HIGHEST_PROTOCOL = 2 - -# Why use struct.pack() for pickling but marshal.loads() for -# unpickling? struct.pack() is 40% faster than marshal.dumps(), but -# marshal.loads() is twice as fast as struct.unpack()! -mloads = marshal.loads - -class PickleError(Exception): - """A common base class for the other pickling exceptions.""" - pass - -class PicklingError(PickleError): - """This exception is raised when an unpicklable object is passed to the - dump() method. - - """ - pass - -class UnpicklingError(PickleError): - """This exception is raised when there is a problem unpickling an object, - such as a security violation. - - Note that other exceptions may also be raised during unpickling, including - (but not necessarily limited to) AttributeError, EOFError, ImportError, - and IndexError. - - """ - pass - -# An instance of _Stop is raised by Unpickler.load_stop() in response to -# the STOP opcode, passing the object that is the result of unpickling. -class _Stop(Exception): - def __init__(self, value): - self.value = value - -# Jython has PyStringMap; it's a dict subclass with string keys -try: - from org.python.core import PyStringMap -except ImportError: - PyStringMap = None - -# UnicodeType may or may not be exported (normally imported from types) -try: - UnicodeType -except NameError: - UnicodeType = None - -# Pickle opcodes. See pickletools.py for extensive docs. The listing -# here is in kind-of alphabetical order of 1-character pickle code. -# pickletools groups them by purpose. - -MARK = '(' # push special markobject on stack -STOP = '.' # every pickle ends with STOP -POP = '0' # discard topmost stack item -POP_MARK = '1' # discard stack top through topmost markobject -DUP = '2' # duplicate top stack item -FLOAT = 'F' # push float object; decimal string argument -INT = 'I' # push integer or bool; decimal string argument -BININT = 'J' # push four-byte signed int -BININT1 = 'K' # push 1-byte unsigned int -LONG = 'L' # push long; decimal string argument -BININT2 = 'M' # push 2-byte unsigned int -NONE = 'N' # push None -PERSID = 'P' # push persistent object; id is taken from string arg -BINPERSID = 'Q' # " " " ; " " " " stack -REDUCE = 'R' # apply callable to argtuple, both on stack -STRING = 'S' # push string; NL-terminated string argument -BINSTRING = 'T' # push string; counted binary string argument -SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes -UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument -BINUNICODE = 'X' # " " " ; counted UTF-8 string argument -APPEND = 'a' # append stack top to list below it -BUILD = 'b' # call __setstate__ or __dict__.update() -GLOBAL = 'c' # push self.find_class(modname, name); 2 string args -DICT = 'd' # build a dict from stack items -EMPTY_DICT = '}' # push empty dict -APPENDS = 'e' # extend list on stack by topmost stack slice -GET = 'g' # push item from memo on stack; index is string arg -BINGET = 'h' # " " " " " " ; " " 1-byte arg -INST = 'i' # build & push class instance -LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg -LIST = 'l' # build list from topmost stack items -EMPTY_LIST = ']' # push empty list -OBJ = 'o' # build & push class instance -PUT = 'p' # store stack top in memo; index is string arg -BINPUT = 'q' # " " " " " ; " " 1-byte arg -LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg -SETITEM = 's' # add key+value pair to dict -TUPLE = 't' # build tuple from topmost stack items -EMPTY_TUPLE = ')' # push empty tuple -SETITEMS = 'u' # modify dict by adding topmost key+value pairs -BINFLOAT = 'G' # push float; arg is 8-byte float encoding - -TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py -FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py - -# Protocol 2 - -PROTO = '\x80' # identify pickle protocol -NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple -EXT1 = '\x82' # push object from extension registry; 1-byte index -EXT2 = '\x83' # ditto, but 2-byte index -EXT4 = '\x84' # ditto, but 4-byte index -TUPLE1 = '\x85' # build 1-tuple from stack top -TUPLE2 = '\x86' # build 2-tuple from two topmost stack items -TUPLE3 = '\x87' # build 3-tuple from three topmost stack items -NEWTRUE = '\x88' # push True -NEWFALSE = '\x89' # push False -LONG1 = '\x8a' # push long from < 256 bytes -LONG4 = '\x8b' # push really big long - -_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] - - -__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) -del x - - -# Pickling machinery - -class Pickler: - - def __init__(self, file, protocol=None): - """This takes a file-like object for writing a pickle data stream. - - The optional protocol argument tells the pickler to use the - given protocol; supported protocols are 0, 1, 2. The default - protocol is 0, to be backwards compatible. (Protocol 0 is the - only protocol that can be written to a file opened in text - mode and read back successfully. When using a protocol higher - than 0, make sure the file is opened in binary mode, both when - pickling and unpickling.) - - Protocol 1 is more efficient than protocol 0; protocol 2 is - more efficient than protocol 1. - - Specifying a negative protocol version selects the highest - protocol version supported. The higher the protocol used, the - more recent the version of Python needed to read the pickle - produced. - - The file parameter must have a write() method that accepts a single - string argument. It can thus be an open file object, a StringIO - object, or any other custom object that meets this interface. - - """ - if protocol is None: - protocol = 0 - if protocol < 0: - protocol = HIGHEST_PROTOCOL - elif not 0 <= protocol <= HIGHEST_PROTOCOL: - raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) - self.write = file.write - self.memo = {} - self.proto = int(protocol) - self.bin = protocol >= 1 - self.fast = 0 - - def clear_memo(self): - """Clears the pickler's "memo". - - The memo is the data structure that remembers which objects the - pickler has already seen, so that shared or recursive objects are - pickled by reference and not by value. This method is useful when - re-using picklers. - - """ - self.memo.clear() - - def dump(self, obj): - """Write a pickled representation of obj to the open file.""" - if self.proto >= 2: - self.write(PROTO + chr(self.proto)) - self.save(obj) - self.write(STOP) - - def memoize(self, obj): - """Store an object in the memo.""" - - # The Pickler memo is a dictionary mapping object ids to 2-tuples - # that contain the Unpickler memo key and the object being memoized. - # The memo key is written to the pickle and will become - # the key in the Unpickler's memo. The object is stored in the - # Pickler memo so that transient objects are kept alive during - # pickling. - - # The use of the Unpickler memo length as the memo key is just a - # convention. The only requirement is that the memo values be unique. - # But there appears no advantage to any other scheme, and this - # scheme allows the Unpickler memo to be implemented as a plain (but - # growable) array, indexed by memo key. - if self.fast: - return - assert id(obj) not in self.memo - memo_len = len(self.memo) - self.write(self.put(memo_len)) - self.memo[id(obj)] = memo_len, obj - - # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. - def put(self, i, pack=struct.pack): - if self.bin: - if i < 256: - return BINPUT + chr(i) - else: - return LONG_BINPUT + pack("<i", i) - - return PUT + repr(i) + '\n' - - # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. - def get(self, i, pack=struct.pack): - if self.bin: - if i < 256: - return BINGET + chr(i) - else: - return LONG_BINGET + pack("<i", i) - - return GET + repr(i) + '\n' - - def save(self, obj): - # Check for persistent id (defined by a subclass) - pid = self.persistent_id(obj) - if pid: - self.save_pers(pid) - return - - # Check the memo - x = self.memo.get(id(obj)) - if x: - self.write(self.get(x[0])) - return - - # Check the type dispatch table - t = type(obj) - f = self.dispatch.get(t) - if f: - f(self, obj) # Call unbound method with explicit self - return - - # Check copy_reg.dispatch_table - reduce = dispatch_table.get(t) - if reduce: - rv = reduce(obj) - else: - # Check for a class with a custom metaclass; treat as regular class - try: - issc = issubclass(t, TypeType) - except TypeError: # t is not a class (old Boost; see SF #502085) - issc = 0 - if issc: - self.save_global(obj) - return - - # Check for a __reduce_ex__ method, fall back to __reduce__ - reduce = getattr(obj, "__reduce_ex__", None) - if reduce: - rv = reduce(self.proto) - else: - reduce = getattr(obj, "__reduce__", None) - if reduce: - rv = reduce() - else: - raise PicklingError("Can't pickle %r object: %r" % - (t.__name__, obj)) - - # Check for string returned by reduce(), meaning "save as global" - if type(rv) is StringType: - self.save_global(obj, rv) - return - - # Assert that reduce() returned a tuple - if type(rv) is not TupleType: - raise PicklingError("%s must return string or tuple" % reduce) - - # Assert that it returned an appropriately sized tuple - l = len(rv) - if not (2 <= l <= 5): - raise PicklingError("Tuple returned by %s must have " - "two to five elements" % reduce) - - # Save the reduce() output and finally memoize the object - self.save_reduce(obj=obj, *rv) - - def persistent_id(self, obj): - # This exists so a subclass can override it - return None - - def save_pers(self, pid): - # Save a persistent id reference - if self.bin: - self.save(pid) - self.write(BINPERSID) - else: - self.write(PERSID + str(pid) + '\n') - - def save_reduce(self, func, args, state=None, - listitems=None, dictitems=None, obj=None): - # This API is called by some subclasses - - # Assert that args is a tuple or None - if not isinstance(args, TupleType): - raise PicklingError("args from reduce() should be a tuple") - - # Assert that func is callable - if not hasattr(func, '__call__'): - raise PicklingError("func from reduce should be callable") - - save = self.save - write = self.write - - # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ - if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__": - # A __reduce__ implementation can direct protocol 2 to - # use the more efficient NEWOBJ opcode, while still - # allowing protocol 0 and 1 to work normally. For this to - # work, the function returned by __reduce__ should be - # called __newobj__, and its first argument should be a - # new-style class. The implementation for __newobj__ - # should be as follows, although pickle has no way to - # verify this: - # - # def __newobj__(cls, *args): - # return cls.__new__(cls, *args) - # - # Protocols 0 and 1 will pickle a reference to __newobj__, - # while protocol 2 (and above) will pickle a reference to - # cls, the remaining args tuple, and the NEWOBJ code, - # which calls cls.__new__(cls, *args) at unpickling time - # (see load_newobj below). If __reduce__ returns a - # three-tuple, the state from the third tuple item will be - # pickled regardless of the protocol, calling __setstate__ - # at unpickling time (see load_build below). - # - # Note that no standard __newobj__ implementation exists; - # you have to provide your own. This is to enforce - # compatibility with Python 2.2 (pickles written using - # protocol 0 or 1 in Python 2.3 should be unpicklable by - # Python 2.2). - cls = args[0] - if not hasattr(cls, "__new__"): - raise PicklingError( - "args[0] from __newobj__ args has no __new__") - if obj is not None and cls is not obj.__class__: - raise PicklingError( - "args[0] from __newobj__ args has the wrong class") - args = args[1:] - save(cls) - save(args) - write(NEWOBJ) - else: - save(func) - save(args) - write(REDUCE) - - if obj is not None: - self.memoize(obj) - - # More new special cases (that work with older protocols as - # well): when __reduce__ returns a tuple with 4 or 5 items, - # the 4th and 5th item should be iterators that provide list - # items and dict items (as (key, value) tuples), or None. - - if listitems is not None: - self._batch_appends(listitems) - - if dictitems is not None: - self._batch_setitems(dictitems) - - if state is not None: - save(state) - write(BUILD) - - # Methods below this point are dispatched through the dispatch table - - dispatch = {} - - def save_none(self, obj): - self.write(NONE) - dispatch[NoneType] = save_none - - def save_bool(self, obj): - if self.proto >= 2: - self.write(obj and NEWTRUE or NEWFALSE) - else: - self.write(obj and TRUE or FALSE) - dispatch[bool] = save_bool - - def save_int(self, obj, pack=struct.pack): - if self.bin: - # If the int is small enough to fit in a signed 4-byte 2's-comp - # format, we can store it more efficiently than the general - # case. - # First one- and two-byte unsigned ints: - if obj >= 0: - if obj <= 0xff: - self.write(BININT1 + chr(obj)) - return - if obj <= 0xffff: - self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8)) - return - # Next check for 4-byte signed ints: - high_bits = obj >> 31 # note that Python shift sign-extends - if high_bits == 0 or high_bits == -1: - # All high bits are copies of bit 2**31, so the value - # fits in a 4-byte signed int. - self.write(BININT + pack("<i", obj)) - return - # Text pickle, or int too big to fit in signed 4-byte format. - self.write(INT + repr(obj) + '\n') - dispatch[IntType] = save_int - - def save_long(self, obj, pack=struct.pack): - if self.proto >= 2: - bytes = encode_long(obj) - n = len(bytes) - if n < 256: - self.write(LONG1 + chr(n) + bytes) - else: - self.write(LONG4 + pack("<i", n) + bytes) - return - self.write(LONG + repr(obj) + '\n') - dispatch[LongType] = save_long - - def save_float(self, obj, pack=struct.pack): - if self.bin: - self.write(BINFLOAT + pack('>d', obj)) - else: - self.write(FLOAT + repr(obj) + '\n') - dispatch[FloatType] = save_float - - def save_string(self, obj, pack=struct.pack): - if self.bin: - n = len(obj) - if n < 256: - self.write(SHORT_BINSTRING + chr(n) + obj) - else: - self.write(BINSTRING + pack("<i", n) + obj) - else: - self.write(STRING + repr(obj) + '\n') - self.memoize(obj) - dispatch[StringType] = save_string - - def save_unicode(self, obj, pack=struct.pack): - if self.bin: - encoding = obj.encode('utf-8') - n = len(encoding) - self.write(BINUNICODE + pack("<i", n) + encoding) - else: - obj = obj.replace("\\", "\\u005c") - obj = obj.replace("\n", "\\u000a") - self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n') - self.memoize(obj) - dispatch[UnicodeType] = save_unicode - - if StringType is UnicodeType: - # This is true for Jython - def save_string(self, obj, pack=struct.pack): - unicode = obj.isunicode() - - if self.bin: - if unicode: - obj = obj.encode("utf-8") - l = len(obj) - if l < 256 and not unicode: - self.write(SHORT_BINSTRING + chr(l) + obj) - else: - s = pack("<i", l) - if unicode: - self.write(BINUNICODE + s + obj) - else: - self.write(BINSTRING + s + obj) - else: - if unicode: - obj = obj.replace("\\", "\\u005c") - obj = obj.replace("\n", "\\u000a") - obj = obj.encode('raw-unicode-escape') - self.write(UNICODE + obj + '\n') - else: - self.write(STRING + repr(obj) + '\n') - self.memoize(obj) - dispatch[StringType] = save_string - - def save_tuple(self, obj): - write = self.write - proto = self.proto - - n = len(obj) - if n == 0: - if proto: - write(EMPTY_TUPLE) - else: - write(MARK + TUPLE) - return - - save = self.save - memo = self.memo - if n <= 3 and proto >= 2: - for element in obj: - save(element) - # Subtle. Same as in the big comment below. - if id(obj) in memo: - get = self.get(memo[id(obj)][0]) - write(POP * n + get) - else: - write(_tuplesize2code[n]) - self.memoize(obj) - return - - # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple - # has more than 3 elements. - write(MARK) - for element in obj: - save(element) - - if id(obj) in memo: - # Subtle. d was not in memo when we entered save_tuple(), so - # the process of saving the tuple's elements must have saved - # the tuple itself: the tuple is recursive. The proper action - # now is to throw away everything we put on the stack, and - # simply GET the tuple (it's already constructed). This check - # could have been done in the "for element" loop instead, but - # recursive tuples are a rare thing. - get = self.get(memo[id(obj)][0]) - if proto: - write(POP_MARK + get) - else: # proto 0 -- POP_MARK not available - write(POP * (n+1) + get) - return - - # No recursion. - self.write(TUPLE) - self.memoize(obj) - - dispatch[TupleType] = save_tuple - - # save_empty_tuple() isn't used by anything in Python 2.3. However, I - # found a Pickler subclass in Zope3 that calls it, so it's not harmless - # to remove it. - def save_empty_tuple(self, obj): - self.write(EMPTY_TUPLE) - - def save_list(self, obj): - write = self.write - - if self.bin: - write(EMPTY_LIST) - else: # proto 0 -- can't use EMPTY_LIST - write(MARK + LIST) - - self.memoize(obj) - self._batch_appends(iter(obj)) - - dispatch[ListType] = save_list - - # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets - # out of synch, though. - _BATCHSIZE = 1000 - - def _batch_appends(self, items): - # Helper to batch up APPENDS sequences - save = self.save - write = self.write - - if not self.bin: - for x in items: - save(x) - write(APPEND) - return - - r = xrange(self._BATCHSIZE) - while items is not None: - tmp = [] - for i in r: - try: - x = items.next() - tmp.append(x) - except StopIteration: - items = None - break - n = len(tmp) - if n > 1: - write(MARK) - for x in tmp: - save(x) - write(APPENDS) - elif n: - save(tmp[0]) - write(APPEND) - # else tmp is empty, and we're done - - def save_dict(self, obj): - write = self.write - - if self.bin: - write(EMPTY_DICT) - else: # proto 0 -- can't use EMPTY_DICT - write(MARK + DICT) - - self.memoize(obj) - self._batch_setitems(obj.iteritems()) - - dispatch[DictionaryType] = save_dict - if not PyStringMap is None: - dispatch[PyStringMap] = save_dict - - def _batch_setitems(self, items): - # Helper to batch up SETITEMS sequences; proto >= 1 only - save = self.save - write = self.write - - if not self.bin: - for k, v in items: - save(k) - save(v) - write(SETITEM) - return - - r = xrange(self._BATCHSIZE) - while items is not None: - tmp = [] - for i in r: - try: - tmp.append(items.next()) - except StopIteration: - items = None - break - n = len(tmp) - if n > 1: - write(MARK) - for k, v in tmp: - save(k) - save(v) - write(SETITEMS) - elif n: - k, v = tmp[0] - save(k) - save(v) - write(SETITEM) - # else tmp is empty, and we're done - - def save_inst(self, obj): - cls = obj.__class__ - - memo = self.memo - write = self.write - save = self.save - - if hasattr(obj, '__getinitargs__'): - args = obj.__getinitargs__() - len(args) # XXX Assert it's a sequence - _keep_alive(args, memo) - else: - args = () - - write(MARK) - - if self.bin: - save(cls) - for arg in args: - save(arg) - write(OBJ) - else: - for arg in args: - save(arg) - write(INST + cls.__module__ + '\n' + cls.__name__ + '\n') - - self.memoize(obj) - - try: - getstate = obj.__getstate__ - except AttributeError: - stuff = obj.__dict__ - else: - stuff = getstate() - _keep_alive(stuff, memo) - save(stuff) - write(BUILD) - - dispatch[InstanceType] = save_inst - - def save_global(self, obj, name=None, pack=struct.pack): - write = self.write - memo = self.memo - - if name is None: - name = obj.__name__ - - module = getattr(obj, "__module__", None) - if module is None: - module = whichmodule(obj, name) - - try: - __import__(module) - mod = sys.modules[module] - klass = getattr(mod, name) - except (ImportError, KeyError, AttributeError): - raise PicklingError( - "Can't pickle %r: it's not found as %s.%s" % - (obj, module, name)) - else: - if klass is not obj: - raise PicklingError( - "Can't pickle %r: it's not the same object as %s.%s" % - (obj, module, name)) - - if self.proto >= 2: - code = _extension_registry.get((module, name)) - if code: - assert code > 0 - if code <= 0xff: - write(EXT1 + chr(code)) - elif code <= 0xffff: - write("%c%c%c" % (EXT2, code&0xff, code>>8)) - else: - write(EXT4 + pack("<i", code)) - return - - write(GLOBAL + module + '\n' + name + '\n') - self.memoize(obj) - - dispatch[ClassType] = save_global - dispatch[FunctionType] = save_global - dispatch[BuiltinFunctionType] = save_global - dispatch[TypeType] = save_global - -# Pickling helpers - -def _keep_alive(x, memo): - """Keeps a reference to the object x in the memo. - - Because we remember objects by their id, we have - to assure that possibly temporary objects are kept - alive by referencing them. - We store a reference at the id of the memo, which should - normally not be used unless someone tries to deepcopy - the memo itself... - """ - try: - memo[id(memo)].append(x) - except KeyError: - # aha, this is the first one :-) - memo[id(memo)]=[x] - - -# A cache for whichmodule(), mapping a function object to the name of -# the module in which the function was found. - -classmap = {} # called classmap for backwards compatibility - -def whichmodule(func, funcname): - """Figure out the module in which a function occurs. - - Search sys.modules for the module. - Cache in classmap. - Return a module name. - If the function cannot be found, return "__main__". - """ - # Python functions should always get an __module__ from their globals. - mod = getattr(func, "__module__", None) - if mod is not None: - return mod - if func in classmap: - return classmap[func] - - for name, module in sys.modules.items(): - if module is None: - continue # skip dummy package entries - if name != '__main__' and getattr(module, funcname, None) is func: - break - else: - name = '__main__' - classmap[func] = name - return name - - -# Unpickling machinery - -class Unpickler: - - def __init__(self, file): - """This takes a file-like object for reading a pickle data stream. - - The protocol version of the pickle is detected automatically, so no - proto argument is needed. - - The file-like object must have two methods, a read() method that - takes an integer argument, and a readline() method that requires no - arguments. Both methods should return a string. Thus file-like - object can be a file object opened for reading, a StringIO object, - or any other custom object that meets this interface. - """ - self.readline = file.readline - self.read = file.read - self.memo = {} - - def load(self): - """Read a pickled object representation from the open file. - - Return the reconstituted object hierarchy specified in the file. - """ - self.mark = object() # any new unique object - self.stack = [] - self.append = self.stack.append - read = self.read - dispatch = self.dispatch - try: - while 1: - key = read(1) - dispatch[key](self) - except _Stop, stopinst: - return stopinst.value - - # Return largest index k such that self.stack[k] is self.mark. - # If the stack doesn't contain a mark, eventually raises IndexError. - # This could be sped by maintaining another stack, of indices at which - # the mark appears. For that matter, the latter stack would suffice, - # and we wouldn't need to push mark objects on self.stack at all. - # Doing so is probably a good thing, though, since if the pickle is - # corrupt (or hostile) we may get a clue from finding self.mark embedded - # in unpickled objects. - def marker(self): - stack = self.stack - mark = self.mark - k = len(stack)-1 - while stack[k] is not mark: k = k-1 - return k - - dispatch = {} - - def load_eof(self): - raise EOFError - dispatch[''] = load_eof - - def load_proto(self): - proto = ord(self.read(1)) - if not 0 <= proto <= 2: - raise ValueError, "unsupported pickle protocol: %d" % proto - dispatch[PROTO] = load_proto - - def load_persid(self): - pid = self.readline()[:-1] - self.append(self.persistent_load(pid)) - dispatch[PERSID] = load_persid - - def load_binpersid(self): - pid = self.stack.pop() - self.append(self.persistent_load(pid)) - dispatch[BINPERSID] = load_binpersid - - def load_none(self): - self.append(None) - dispatch[NONE] = load_none - - def load_false(self): - self.append(False) - dispatch[NEWFALSE] = load_false - - def load_true(self): - self.append(True) - dispatch[NEWTRUE] = load_true - - def load_int(self): - data = self.readline() - if data == FALSE[1:]: - val = False - elif data == TRUE[1:]: - val = True - else: - try: - val = int(data) - except ValueError: - val = long(data) - self.append(val) - dispatch[INT] = load_int - - def load_binint(self): - self.append(mloads('i' + self.read(4))) - dispatch[BININT] = load_binint - - def load_binint1(self): - self.append(ord(self.read(1))) - dispatch[BININT1] = load_binint1 - - def load_binint2(self): - self.append(mloads('i' + self.read(2) + '\000\000')) - dispatch[BININT2] = load_binint2 - - def load_long(self): - self.append(long(self.readline()[:-1], 0)) - dispatch[LONG] = load_long - - def load_long1(self): - n = ord(self.read(1)) - bytes = self.read(n) - self.append(decode_long(bytes)) - dispatch[LONG1] = load_long1 - - def load_long4(self): - n = mloads('i' + self.read(4)) - bytes = self.read(n) - self.append(decode_long(bytes)) - dispatch[LONG4] = load_long4 - - def load_float(self): - self.append(float(self.readline()[:-1])) - dispatch[FLOAT] = load_float - - def load_binfloat(self, unpack=struct.unpack): - self.append(unpack('>d', self.read(8))[0]) - dispatch[BINFLOAT] = load_binfloat - - def load_string(self): - rep = self.readline()[:-1] - for q in "\"'": # double or single quote - if rep.startswith(q): - if len(rep) < 2 or not rep.endswith(q): - raise ValueError, "insecure string pickle" - rep = rep[len(q):-len(q)] - break - else: - raise ValueError, "insecure string pickle" - self.append(rep.decode("string-escape")) - dispatch[STRING] = load_string - - def load_binstring(self): - len = mloads('i' + self.read(4)) - self.append(self.read(len)) - dispatch[BINSTRING] = load_binstring - - def load_unicode(self): - self.append(unicode(self.readline()[:-1],'raw-unicode-escape')) - dispatch[UNICODE] = load_unicode - - def load_binunicode(self): - len = mloads('i' + self.read(4)) - self.append(unicode(self.read(len),'utf-8')) - dispatch[BINUNICODE] = load_binunicode - - def load_short_binstring(self): - len = ord(self.read(1)) - self.append(self.read(len)) - dispatch[SHORT_BINSTRING] = load_short_binstring - - def load_tuple(self): - k = self.marker() - self.stack[k:] = [tuple(self.stack[k+1:])] - dispatch[TUPLE] = load_tuple - - def load_empty_tuple(self): - self.stack.append(()) - dispatch[EMPTY_TUPLE] = load_empty_tuple - - def load_tuple1(self): - self.stack[-1] = (self.stack[-1],) - dispatch[TUPLE1] = load_tuple1 - - def load_tuple2(self): - self.stack[-2:] = [(self.stack[-2], self.stack[-1])] - dispatch[TUPLE2] = load_tuple2 - - def load_tuple3(self): - self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] - dispatch[TUPLE3] = load_tuple3 - - def load_empty_list(self): - self.stack.append([]) - dispatch[EMPTY_LIST] = load_empty_list - - def load_empty_dictionary(self): - self.stack.append({}) - dispatch[EMPTY_DICT] = load_empty_dictionary - - def load_list(self): - k = self.marker() - self.stack[k:] = [self.stack[k+1:]] - dispatch[LIST] = load_list - - def load_dict(self): - k = self.marker() - d = {} - items = self.stack[k+1:] - for i in range(0, len(items), 2): - key = items[i] - value = items[i+1] - d[key] = value - self.stack[k:] = [d] - dispatch[DICT] = load_dict - - # INST and OBJ differ only in how they get a class object. It's not - # only sensible to do the rest in a common routine, the two routines - # previously diverged and grew different bugs. - # klass is the class to instantiate, and k points to the topmost mark - # object, following which are the arguments for klass.__init__. - def _instantiate(self, klass, k): - args = tuple(self.stack[k+1:]) - del self.stack[k:] - instantiated = 0 - if (not args and - type(klass) is ClassType and - not hasattr(klass, "__getinitargs__")): - try: - value = _EmptyClass() - value.__class__ = klass - instantiated = 1 - except RuntimeError: - # In restricted execution, assignment to inst.__class__ is - # prohibited - pass - if not instantiated: - try: - value = klass(*args) - except TypeError, err: - raise TypeError, "in constructor for %s: %s" % ( - klass.__name__, str(err)), sys.exc_info()[2] - self.append(value) - - def load_inst(self): - module = self.readline()[:-1] - name = self.readline()[:-1] - klass = self.find_class(module, name) - self._instantiate(klass, self.marker()) - dispatch[INST] = load_inst - - def load_obj(self): - # Stack is ... markobject classobject arg1 arg2 ... - k = self.marker() - klass = self.stack.pop(k+1) - self._instantiate(klass, k) - dispatch[OBJ] = load_obj - - def load_newobj(self): - args = self.stack.pop() - cls = self.stack[-1] - obj = cls.__new__(cls, *args) - self.stack[-1] = obj - dispatch[NEWOBJ] = load_newobj - - def load_global(self): - module = self.readline()[:-1] - name = self.readline()[:-1] - klass = self.find_class(module, name) - self.append(klass) - dispatch[GLOBAL] = load_global - - def load_ext1(self): - code = ord(self.read(1)) - self.get_extension(code) - dispatch[EXT1] = load_ext1 - - def load_ext2(self): - code = mloads('i' + self.read(2) + '\000\000') - self.get_extension(code) - dispatch[EXT2] = load_ext2 - - def load_ext4(self): - code = mloads('i' + self.read(4)) - self.get_extension(code) - dispatch[EXT4] = load_ext4 - - def get_extension(self, code): - nil = [] - obj = _extension_cache.get(code, nil) - if obj is not nil: - self.append(obj) - return - key = _inverted_registry.get(code) - if not key: - raise ValueError("unregistered extension code %d" % code) - obj = self.find_class(*key) - _extension_cache[code] = obj - self.append(obj) - - def find_class(self, module, name): - # Subclasses may override this - __import__(module) - mod = sys.modules[module] - klass = getattr(mod, name) - return klass - - def load_reduce(self): - stack = self.stack - args = stack.pop() - func = stack[-1] - value = func(*args) - stack[-1] = value - dispatch[REDUCE] = load_reduce - - def load_pop(self): - del self.stack[-1] - dispatch[POP] = load_pop - - def load_pop_mark(self): - k = self.marker() - del self.stack[k:] - dispatch[POP_MARK] = load_pop_mark - - def load_dup(self): - self.append(self.stack[-1]) - dispatch[DUP] = load_dup - - def load_get(self): - self.append(self.memo[self.readline()[:-1]]) - dispatch[GET] = load_get - - def load_binget(self): - i = ord(self.read(1)) - self.append(self.memo[repr(i)]) - dispatch[BINGET] = load_binget - - def load_long_binget(self): - i = mloads('i' + self.read(4)) - self.append(self.memo[repr(i)]) - dispatch[LONG_BINGET] = load_long_binget - - def load_put(self): - self.memo[self.readline()[:-1]] = self.stack[-1] - dispatch[PUT] = load_put - - def load_binput(self): - i = ord(self.read(1)) - self.memo[repr(i)] = self.stack[-1] - dispatch[BINPUT] = load_binput - - def load_long_binput(self): - i = mloads('i' + self.read(4)) - self.memo[repr(i)] = self.stack[-1] - dispatch[LONG_BINPUT] = load_long_binput - - def load_append(self): - stack = self.stack - value = stack.pop() - list = stack[-1] - list.append(value) - dispatch[APPEND] = load_append - - def load_appends(self): - stack = self.stack - mark = self.marker() - list = stack[mark - 1] - list.extend(stack[mark + 1:]) - del stack[mark:] - dispatch[APPENDS] = load_appends - - def load_setitem(self): - stack = self.stack - value = stack.pop() - key = stack.pop() - dict = stack[-1] - dict[key] = value - dispatch[SETITEM] = load_setitem - - def load_setitems(self): - stack = self.stack - mark = self.marker() - dict = stack[mark - 1] - for i in range(mark + 1, len(stack), 2): - dict[stack[i]] = stack[i + 1] - - del stack[mark:] - dispatch[SETITEMS] = load_setitems - - def load_build(self): - stack = self.stack - state = stack.pop() - inst = stack[-1] - setstate = getattr(inst, "__setstate__", None) - if setstate: - setstate(state) - return - slotstate = None - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - if state: - try: - d = inst.__dict__ - try: - for k, v in state.iteritems(): - d[intern(k)] = v - # keys in state don't have to be strings - # don't blow up, but don't go out of our way - except TypeError: - d.update(state) - - except RuntimeError: - # XXX In restricted execution, the instance's __dict__ - # is not accessible. Use the old way of unpickling - # the instance variables. This is a semantic - # difference when unpickling in restricted - # vs. unrestricted modes. - # Note, however, that cPickle has never tried to do the - # .update() business, and always uses - # PyObject_SetItem(inst.__dict__, key, value) in a - # loop over state.items(). - for k, v in state.items(): - setattr(inst, k, v) - if slotstate: - for k, v in slotstate.items(): - setattr(inst, k, v) - dispatch[BUILD] = load_build - - def load_mark(self): - self.append(self.mark) - dispatch[MARK] = load_mark - - def load_stop(self): - value = self.stack.pop() - raise _Stop(value) - dispatch[STOP] = load_stop - -# Helper class for load_inst/load_obj - -class _EmptyClass: - pass - -# Encode/decode longs in linear time. - -import binascii as _binascii - -def encode_long(x): - r"""Encode a long to a two's complement little-endian binary string. - Note that 0L is a special case, returning an empty string, to save a - byte in the LONG1 pickling context. - - >>> encode_long(0L) - '' - >>> encode_long(255L) - '\xff\x00' - >>> encode_long(32767L) - '\xff\x7f' - >>> encode_long(-256L) - '\x00\xff' - >>> encode_long(-32768L) - '\x00\x80' - >>> encode_long(-128L) - '\x80' - >>> encode_long(127L) - '\x7f' - >>> - """ - - if x == 0: - return '' - if x > 0: - ashex = hex(x) - assert ashex.startswith("0x") - njunkchars = 2 + ashex.endswith('L') - nibbles = len(ashex) - njunkchars - if nibbles & 1: - # need an even # of nibbles for unhexlify - ashex = "0x0" + ashex[2:] - elif int(ashex[2], 16) >= 8: - # "looks negative", so need a byte of sign bits - ashex = "0x00" + ashex[2:] - else: - # Build the 256's-complement: (1L << nbytes) + x. The trick is - # to find the number of bytes in linear time (although that should - # really be a constant-time task). - ashex = hex(-x) - assert ashex.startswith("0x") - njunkchars = 2 + ashex.endswith('L') - nibbles = len(ashex) - njunkchars - if nibbles & 1: - # Extend to a full byte. - nibbles += 1 - nbits = nibbles * 4 - x += 1L << nbits - assert x > 0 - ashex = hex(x) - njunkchars = 2 + ashex.endswith('L') - newnibbles = len(ashex) - njunkchars - if newnibbles < nibbles: - ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:] - if int(ashex[2], 16) < 8: - # "looks positive", so need a byte of sign bits - ashex = "0xff" + ashex[2:] - - if ashex.endswith('L'): - ashex = ashex[2:-1] - else: - ashex = ashex[2:] - assert len(ashex) & 1 == 0, (x, ashex) - binary = _binascii.unhexlify(ashex) - return binary[::-1] - -def decode_long(data): - r"""Decode a long from a two's complement little-endian binary string. - - >>> decode_long('') - 0L - >>> decode_long("\xff\x00") - 255L - >>> decode_long("\xff\x7f") - 32767L - >>> decode_long("\x00\xff") - -256L - >>> decode_long("\x00\x80") - -32768L - >>> decode_long("\x80") - -128L - >>> decode_long("\x7f") - 127L - """ - - nbytes = len(data) - if nbytes == 0: - return 0L - ashex = _binascii.hexlify(data[::-1]) - n = long(ashex, 16) # quadratic time before Python 2.3; linear now - if data[-1] >= '\x80': - n -= 1L << (nbytes * 8) - return n - -# Shorthands - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -def dump(obj, file, protocol=None): - Pickler(file, protocol).dump(obj) - -def dumps(obj, protocol=None): - file = StringIO() - Pickler(file, protocol).dump(obj) - return file.getvalue() - -def load(file): - return Unpickler(file).load() - -def loads(str): - file = StringIO(str) - return Unpickler(file).load() - -# Doctest - -def _test(): - import doctest - return doctest.testmod() - -if __name__ == "__main__": - _test() |