# Copyright (c) 2015-2020 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. """CPython magic- and version-independent Python object deserialization (unmarshal). This is needed when the bytecode extracted is from a different version than the currently-running Python. When the running interpreter and the read-in bytecode are the same, you can simply use Python's built-in marshal.loads() to produce a code object """ import sys from struct import unpack from xdis.magics import magic_int2float from xdis.codetype import to_portable from xdis.version_info import PYTHON3, PYTHON_VERSION, IS_PYPY # FIXME: When working from Python3 bytecode in Python2, we need # to distinguish types. internStrings = [] internObjects = [] if PYTHON3: def long(n): return n else: import unicodedata def compat_str(s): if PYTHON3: try: return s.decode("utf-8") except UnicodeDecodeError: # If not Unicode, return bytes # and it will get converted to str when needed return s return s.decode() else: return str(s) def compat_u2s(u): if PYTHON_VERSION < 3.0: # See also unaccent.py which can be found using google. I # found it and this code via # https://www.peterbe.com/plog/unicode-to-ascii where it is a # dead link. That can potentially do better job in converting accents. s = unicodedata.normalize("NFKD", u) try: return s.encode("ascii") except UnicodeEncodeError: return s else: return str(u) def load_code(fp, magic_int, code_objects={}): """ marshal.load() written in Python. When the Python bytecode magic loaded is the same magic for the running Python interpreter, we can simply use the Python-supplied marshal.load(). However we need to use this when versions are different since the internal code structures are different. Sigh. """ global internStrings, internObjects internStrings = [] internObjects = [] seek_pos = fp.tell() # Do a sanity check. Is this a code type? b = ord(fp.read(1)) save_ref = False if b & 0x80: save_ref = True internObjects.append(None) b = b & 0x7F c = chr(b) if c == "c" or (magic_int in (39170, 39171) and c == "C"): fp.seek(seek_pos) else: raise TypeError( "File %s doesn't smell like Python bytecode:\n" "expecting code indicator 'c'; got '%s'" % (fp.name, c) ) code = load_code_internal(fp, magic_int, code_objects=code_objects) if save_ref: internObjects[0] = code return code def load_code_type(fp, magic_int, bytes_for_s=False, code_objects={}): # FIXME: use tables to simplify this? # FIXME: Python 1.0 .. 1.3 isn't well known version = magic_int2float(magic_int) if version >= 2.3: co_argcount = unpack("= 1.3: co_argcount = unpack("= 3.8: co_posonlyargcount = 0 else: co_posonlyargcount = None if version >= 3.0: kwonlyargcount = unpack("= 2.3: co_nlocals = unpack("= 1.3: co_nlocals = unpack("= 2.3: co_stacksize = unpack("= 1.5: co_stacksize = unpack("= 2.3: co_flags = unpack("= 1.3: co_flags = unpack("= 3.0 and version >= 3.0 co_consts = load_code_internal(fp, magic_int, bytes_for_s=bytes_for_s, code_objects=code_objects) co_names = load_code_internal(fp, magic_int, code_objects=code_objects) if version >= 1.3: co_varnames = load_code_internal(fp, magic_int, code_objects=code_objects) else: co_varnames = [] if version >= 2.0: co_freevars = load_code_internal(fp, magic_int, code_objects=code_objects) co_cellvars = load_code_internal(fp, magic_int, code_objects=code_objects) else: co_freevars = tuple() co_cellvars = tuple() co_filename = load_code_internal(fp, magic_int, code_objects=code_objects) co_name = load_code_internal(fp, magic_int) if version >= 1.5: if version >= 2.3: co_firstlineno = unpack(" 0: ret += (load_code_internal(fp, magic_int, bytes_for_s=bytes_for_s, code_objects=code_objects),) tuplesize -= 1 pass return r_ref_insert(ret, i) UNMARSHAL_DISPATCH_TABLE[")"] = t_small_tuple def t_tuple(fp, save_ref, bytes_for_s=None, magic_int=None, code_objects=None): tuplesize = unpack(" 0: ret += (load_code_internal(fp, magic_int, bytes_for_s=bytes_for_s, code_objects=code_objects),) tuplesize -= 1 return ret UNMARSHAL_DISPATCH_TABLE["("] = t_tuple def t_list(fp, save_ref, bytes_for_s=None, magic_int=None, code_objects=None): # FIXME: check me n = unpack(" 0: ret += (load_code_internal(fp, magic_int, bytes_for_s=bytes_for_s, code_objects=code_objects),) n -= 1 return ret UNMARSHAL_DISPATCH_TABLE["["] = t_list def t_frozenset(fp, save_ref, bytes_for_s=None, magic_int=None, code_objects=None): setsize = unpack(" 0: ret += (load_code_internal(fp, magic_int, bytes_for_s=bytes_for_s, code_objects=code_objects),) setsize -= 1 return r_ref_insert(frozenset(ret), i) UNMARSHAL_DISPATCH_TABLE["<"] = t_frozenset def t_set(fp, save_ref, bytes_for_s=None, magic_int=None, code_objects=None): setsize = unpack(" 0: ret += (load_code_internal(fp, magic_int, bytes_for_s=bytes_for_s, code_objects=code_objects),) setsize -= 1 return r_ref_insert(set(ret), i) UNMARSHAL_DISPATCH_TABLE[">"] = t_set def t_int32(fp, save_ref, bytes_for_s=None, magic_int=None, code_objects=None): return r_ref(int(unpack("