summaryrefslogtreecommitdiff
path: root/lib/python2.7/mimetypes.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/mimetypes.py')
-rw-r--r--lib/python2.7/mimetypes.py594
1 files changed, 594 insertions, 0 deletions
diff --git a/lib/python2.7/mimetypes.py b/lib/python2.7/mimetypes.py
new file mode 100644
index 0000000..18ade73
--- /dev/null
+++ b/lib/python2.7/mimetypes.py
@@ -0,0 +1,594 @@
+"""Guess the MIME type of a file.
+
+This module defines two useful functions:
+
+guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
+
+guess_extension(type, strict=1) -- guess the extension for a given MIME type.
+
+It also contains the following, for tuning the behavior:
+
+Data:
+
+knownfiles -- list of files to parse
+inited -- flag set when init() has been called
+suffix_map -- dictionary mapping suffixes to suffixes
+encodings_map -- dictionary mapping suffixes to encodings
+types_map -- dictionary mapping suffixes to types
+
+Functions:
+
+init([files]) -- parse a list of files, default knownfiles (on Windows, the
+ default values are taken from the registry)
+read_mime_types(file) -- parse one file, return a dictionary or None
+"""
+
+import os
+import sys
+import posixpath
+import urllib
+try:
+ import _winreg
+except ImportError:
+ _winreg = None
+
+__all__ = [
+ "guess_type","guess_extension","guess_all_extensions",
+ "add_type","read_mime_types","init"
+]
+
+knownfiles = [
+ "/etc/mime.types",
+ "/etc/httpd/mime.types", # Mac OS X
+ "/etc/httpd/conf/mime.types", # Apache
+ "/etc/apache/mime.types", # Apache 1
+ "/etc/apache2/mime.types", # Apache 2
+ "/usr/local/etc/httpd/conf/mime.types",
+ "/usr/local/lib/netscape/mime.types",
+ "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
+ "/usr/local/etc/mime.types", # Apache 1.3
+ ]
+
+inited = False
+_db = None
+
+
+class MimeTypes:
+ """MIME-types datastore.
+
+ This datastore can handle information from mime.types-style files
+ and supports basic determination of MIME type from a filename or
+ URL, and can guess a reasonable extension given a MIME type.
+ """
+
+ def __init__(self, filenames=(), strict=True):
+ if not inited:
+ init()
+ self.encodings_map = encodings_map.copy()
+ self.suffix_map = suffix_map.copy()
+ self.types_map = ({}, {}) # dict for (non-strict, strict)
+ self.types_map_inv = ({}, {})
+ for (ext, type) in types_map.items():
+ self.add_type(type, ext, True)
+ for (ext, type) in common_types.items():
+ self.add_type(type, ext, False)
+ for name in filenames:
+ self.read(name, strict)
+
+ def add_type(self, type, ext, strict=True):
+ """Add a mapping between a type and an extension.
+
+ When the extension is already known, the new
+ type will replace the old one. When the type
+ is already known the extension will be added
+ to the list of known extensions.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ self.types_map[strict][ext] = type
+ exts = self.types_map_inv[strict].setdefault(type, [])
+ if ext not in exts:
+ exts.append(ext)
+
+ def guess_type(self, url, strict=True):
+ """Guess the type of a file based on its URL.
+
+ Return value is a tuple (type, encoding) where type is None if
+ the type can't be guessed (no or unknown suffix) or a string
+ of the form type/subtype, usable for a MIME Content-type
+ header; and encoding is None for no encoding or the name of
+ the program used to encode (e.g. compress or gzip). The
+ mappings are table driven. Encoding suffixes are case
+ sensitive; type suffixes are first tried case sensitive, then
+ case insensitive.
+
+ The suffixes .tgz, .taz and .tz (case sensitive!) are all
+ mapped to '.tar.gz'. (This is table-driven too, using the
+ dictionary suffix_map.)
+
+ Optional `strict' argument when False adds a bunch of commonly found,
+ but non-standard types.
+ """
+ scheme, url = urllib.splittype(url)
+ if scheme == 'data':
+ # syntax of data URLs:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ # type/subtype defaults to "text/plain"
+ comma = url.find(',')
+ if comma < 0:
+ # bad data URL
+ return None, None
+ semi = url.find(';', 0, comma)
+ if semi >= 0:
+ type = url[:semi]
+ else:
+ type = url[:comma]
+ if '=' in type or '/' not in type:
+ type = 'text/plain'
+ return type, None # never compressed, so encoding is None
+ base, ext = posixpath.splitext(url)
+ while ext in self.suffix_map:
+ base, ext = posixpath.splitext(base + self.suffix_map[ext])
+ if ext in self.encodings_map:
+ encoding = self.encodings_map[ext]
+ base, ext = posixpath.splitext(base)
+ else:
+ encoding = None
+ types_map = self.types_map[True]
+ if ext in types_map:
+ return types_map[ext], encoding
+ elif ext.lower() in types_map:
+ return types_map[ext.lower()], encoding
+ elif strict:
+ return None, encoding
+ types_map = self.types_map[False]
+ if ext in types_map:
+ return types_map[ext], encoding
+ elif ext.lower() in types_map:
+ return types_map[ext.lower()], encoding
+ else:
+ return None, encoding
+
+ def guess_all_extensions(self, type, strict=True):
+ """Guess the extensions for a file based on its MIME type.
+
+ Return value is a list of strings giving the possible filename
+ extensions, including the leading dot ('.'). The extension is not
+ guaranteed to have been associated with any particular data stream,
+ but would be mapped to the MIME type `type' by guess_type().
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ type = type.lower()
+ extensions = self.types_map_inv[True].get(type, [])
+ if not strict:
+ for ext in self.types_map_inv[False].get(type, []):
+ if ext not in extensions:
+ extensions.append(ext)
+ return extensions
+
+ def guess_extension(self, type, strict=True):
+ """Guess the extension for a file based on its MIME type.
+
+ Return value is a string giving a filename extension,
+ including the leading dot ('.'). The extension is not
+ guaranteed to have been associated with any particular data
+ stream, but would be mapped to the MIME type `type' by
+ guess_type(). If no extension can be guessed for `type', None
+ is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ extensions = self.guess_all_extensions(type, strict)
+ if not extensions:
+ return None
+ return extensions[0]
+
+ def read(self, filename, strict=True):
+ """
+ Read a single mime.types-format file, specified by pathname.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ with open(filename) as fp:
+ self.readfp(fp, strict)
+
+ def readfp(self, fp, strict=True):
+ """
+ Read a single mime.types-format file.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ words = line.split()
+ for i in range(len(words)):
+ if words[i][0] == '#':
+ del words[i:]
+ break
+ if not words:
+ continue
+ type, suffixes = words[0], words[1:]
+ for suff in suffixes:
+ self.add_type(type, '.' + suff, strict)
+
+ def read_windows_registry(self, strict=True):
+ """
+ Load the MIME types database from Windows registry.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+
+ # Windows only
+ if not _winreg:
+ return
+
+ def enum_types(mimedb):
+ i = 0
+ while True:
+ try:
+ ctype = _winreg.EnumKey(mimedb, i)
+ except EnvironmentError:
+ break
+ try:
+ ctype = ctype.encode(default_encoding) # omit in 3.x!
+ except UnicodeEncodeError:
+ pass
+ else:
+ yield ctype
+ i += 1
+
+ default_encoding = sys.getdefaultencoding()
+ with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT,
+ r'MIME\Database\Content Type') as mimedb:
+ for ctype in enum_types(mimedb):
+ try:
+ with _winreg.OpenKey(mimedb, ctype) as key:
+ suffix, datatype = _winreg.QueryValueEx(key,
+ 'Extension')
+ except EnvironmentError:
+ continue
+ if datatype != _winreg.REG_SZ:
+ continue
+ try:
+ suffix = suffix.encode(default_encoding) # omit in 3.x!
+ except UnicodeEncodeError:
+ continue
+ self.add_type(ctype, suffix, strict)
+
+
+def guess_type(url, strict=True):
+ """Guess the type of a file based on its URL.
+
+ Return value is a tuple (type, encoding) where type is None if the
+ type can't be guessed (no or unknown suffix) or a string of the
+ form type/subtype, usable for a MIME Content-type header; and
+ encoding is None for no encoding or the name of the program used
+ to encode (e.g. compress or gzip). The mappings are table
+ driven. Encoding suffixes are case sensitive; type suffixes are
+ first tried case sensitive, then case insensitive.
+
+ The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+ to ".tar.gz". (This is table-driven too, using the dictionary
+ suffix_map).
+
+ Optional `strict' argument when false adds a bunch of commonly found, but
+ non-standard types.
+ """
+ if _db is None:
+ init()
+ return _db.guess_type(url, strict)
+
+
+def guess_all_extensions(type, strict=True):
+ """Guess the extensions for a file based on its MIME type.
+
+ Return value is a list of strings giving the possible filename
+ extensions, including the leading dot ('.'). The extension is not
+ guaranteed to have been associated with any particular data
+ stream, but would be mapped to the MIME type `type' by
+ guess_type(). If no extension can be guessed for `type', None
+ is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ if _db is None:
+ init()
+ return _db.guess_all_extensions(type, strict)
+
+def guess_extension(type, strict=True):
+ """Guess the extension for a file based on its MIME type.
+
+ Return value is a string giving a filename extension, including the
+ leading dot ('.'). The extension is not guaranteed to have been
+ associated with any particular data stream, but would be mapped to the
+ MIME type `type' by guess_type(). If no extension can be guessed for
+ `type', None is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
+ """
+ if _db is None:
+ init()
+ return _db.guess_extension(type, strict)
+
+def add_type(type, ext, strict=True):
+ """Add a mapping between a type and an extension.
+
+ When the extension is already known, the new
+ type will replace the old one. When the type
+ is already known the extension will be added
+ to the list of known extensions.
+
+ If strict is true, information will be added to
+ list of standard types, else to the list of non-standard
+ types.
+ """
+ if _db is None:
+ init()
+ return _db.add_type(type, ext, strict)
+
+
+def init(files=None):
+ global suffix_map, types_map, encodings_map, common_types
+ global inited, _db
+ inited = True # so that MimeTypes.__init__() doesn't call us again
+ db = MimeTypes()
+ if files is None:
+ if _winreg:
+ db.read_windows_registry()
+ files = knownfiles
+ for file in files:
+ if os.path.isfile(file):
+ db.read(file)
+ encodings_map = db.encodings_map
+ suffix_map = db.suffix_map
+ types_map = db.types_map[True]
+ common_types = db.types_map[False]
+ # Make the DB a global variable now that it is fully initialized
+ _db = db
+
+
+def read_mime_types(file):
+ try:
+ f = open(file)
+ except IOError:
+ return None
+ db = MimeTypes()
+ db.readfp(f, True)
+ return db.types_map[True]
+
+
+def _default_mime_types():
+ global suffix_map
+ global encodings_map
+ global types_map
+ global common_types
+
+ suffix_map = {
+ '.tgz': '.tar.gz',
+ '.taz': '.tar.gz',
+ '.tz': '.tar.gz',
+ '.tbz2': '.tar.bz2',
+ '.txz': '.tar.xz',
+ }
+
+ encodings_map = {
+ '.gz': 'gzip',
+ '.Z': 'compress',
+ '.bz2': 'bzip2',
+ '.xz': 'xz',
+ }
+
+ # Before adding new types, make sure they are either registered with IANA,
+ # at http://www.isi.edu/in-notes/iana/assignments/media-types
+ # or extensions, i.e. using the x- prefix
+
+ # If you add to these, please keep them sorted!
+ types_map = {
+ '.a' : 'application/octet-stream',
+ '.ai' : 'application/postscript',
+ '.aif' : 'audio/x-aiff',
+ '.aifc' : 'audio/x-aiff',
+ '.aiff' : 'audio/x-aiff',
+ '.au' : 'audio/basic',
+ '.avi' : 'video/x-msvideo',
+ '.bat' : 'text/plain',
+ '.bcpio' : 'application/x-bcpio',
+ '.bin' : 'application/octet-stream',
+ '.bmp' : 'image/x-ms-bmp',
+ '.c' : 'text/plain',
+ # Duplicates :(
+ '.cdf' : 'application/x-cdf',
+ '.cdf' : 'application/x-netcdf',
+ '.cpio' : 'application/x-cpio',
+ '.csh' : 'application/x-csh',
+ '.css' : 'text/css',
+ '.dll' : 'application/octet-stream',
+ '.doc' : 'application/msword',
+ '.dot' : 'application/msword',
+ '.dvi' : 'application/x-dvi',
+ '.eml' : 'message/rfc822',
+ '.eps' : 'application/postscript',
+ '.etx' : 'text/x-setext',
+ '.exe' : 'application/octet-stream',
+ '.gif' : 'image/gif',
+ '.gtar' : 'application/x-gtar',
+ '.h' : 'text/plain',
+ '.hdf' : 'application/x-hdf',
+ '.htm' : 'text/html',
+ '.html' : 'text/html',
+ '.ico' : 'image/vnd.microsoft.icon',
+ '.ief' : 'image/ief',
+ '.jpe' : 'image/jpeg',
+ '.jpeg' : 'image/jpeg',
+ '.jpg' : 'image/jpeg',
+ '.js' : 'application/javascript',
+ '.ksh' : 'text/plain',
+ '.latex' : 'application/x-latex',
+ '.m1v' : 'video/mpeg',
+ '.man' : 'application/x-troff-man',
+ '.me' : 'application/x-troff-me',
+ '.mht' : 'message/rfc822',
+ '.mhtml' : 'message/rfc822',
+ '.mif' : 'application/x-mif',
+ '.mov' : 'video/quicktime',
+ '.movie' : 'video/x-sgi-movie',
+ '.mp2' : 'audio/mpeg',
+ '.mp3' : 'audio/mpeg',
+ '.mp4' : 'video/mp4',
+ '.mpa' : 'video/mpeg',
+ '.mpe' : 'video/mpeg',
+ '.mpeg' : 'video/mpeg',
+ '.mpg' : 'video/mpeg',
+ '.ms' : 'application/x-troff-ms',
+ '.nc' : 'application/x-netcdf',
+ '.nws' : 'message/rfc822',
+ '.o' : 'application/octet-stream',
+ '.obj' : 'application/octet-stream',
+ '.oda' : 'application/oda',
+ '.p12' : 'application/x-pkcs12',
+ '.p7c' : 'application/pkcs7-mime',
+ '.pbm' : 'image/x-portable-bitmap',
+ '.pdf' : 'application/pdf',
+ '.pfx' : 'application/x-pkcs12',
+ '.pgm' : 'image/x-portable-graymap',
+ '.pl' : 'text/plain',
+ '.png' : 'image/png',
+ '.pnm' : 'image/x-portable-anymap',
+ '.pot' : 'application/vnd.ms-powerpoint',
+ '.ppa' : 'application/vnd.ms-powerpoint',
+ '.ppm' : 'image/x-portable-pixmap',
+ '.pps' : 'application/vnd.ms-powerpoint',
+ '.ppt' : 'application/vnd.ms-powerpoint',
+ '.ps' : 'application/postscript',
+ '.pwz' : 'application/vnd.ms-powerpoint',
+ '.py' : 'text/x-python',
+ '.pyc' : 'application/x-python-code',
+ '.pyo' : 'application/x-python-code',
+ '.qt' : 'video/quicktime',
+ '.ra' : 'audio/x-pn-realaudio',
+ '.ram' : 'application/x-pn-realaudio',
+ '.ras' : 'image/x-cmu-raster',
+ '.rdf' : 'application/xml',
+ '.rgb' : 'image/x-rgb',
+ '.roff' : 'application/x-troff',
+ '.rtx' : 'text/richtext',
+ '.sgm' : 'text/x-sgml',
+ '.sgml' : 'text/x-sgml',
+ '.sh' : 'application/x-sh',
+ '.shar' : 'application/x-shar',
+ '.snd' : 'audio/basic',
+ '.so' : 'application/octet-stream',
+ '.src' : 'application/x-wais-source',
+ '.sv4cpio': 'application/x-sv4cpio',
+ '.sv4crc' : 'application/x-sv4crc',
+ '.swf' : 'application/x-shockwave-flash',
+ '.t' : 'application/x-troff',
+ '.tar' : 'application/x-tar',
+ '.tcl' : 'application/x-tcl',
+ '.tex' : 'application/x-tex',
+ '.texi' : 'application/x-texinfo',
+ '.texinfo': 'application/x-texinfo',
+ '.tif' : 'image/tiff',
+ '.tiff' : 'image/tiff',
+ '.tr' : 'application/x-troff',
+ '.tsv' : 'text/tab-separated-values',
+ '.txt' : 'text/plain',
+ '.ustar' : 'application/x-ustar',
+ '.vcf' : 'text/x-vcard',
+ '.wav' : 'audio/x-wav',
+ '.wiz' : 'application/msword',
+ '.wsdl' : 'application/xml',
+ '.xbm' : 'image/x-xbitmap',
+ '.xlb' : 'application/vnd.ms-excel',
+ # Duplicates :(
+ '.xls' : 'application/excel',
+ '.xls' : 'application/vnd.ms-excel',
+ '.xml' : 'text/xml',
+ '.xpdl' : 'application/xml',
+ '.xpm' : 'image/x-xpixmap',
+ '.xsl' : 'application/xml',
+ '.xwd' : 'image/x-xwindowdump',
+ '.zip' : 'application/zip',
+ }
+
+ # These are non-standard types, commonly found in the wild. They will
+ # only match if strict=0 flag is given to the API methods.
+
+ # Please sort these too
+ common_types = {
+ '.jpg' : 'image/jpg',
+ '.mid' : 'audio/midi',
+ '.midi': 'audio/midi',
+ '.pct' : 'image/pict',
+ '.pic' : 'image/pict',
+ '.pict': 'image/pict',
+ '.rtf' : 'application/rtf',
+ '.xul' : 'text/xul'
+ }
+
+
+_default_mime_types()
+
+
+if __name__ == '__main__':
+ import getopt
+
+ USAGE = """\
+Usage: mimetypes.py [options] type
+
+Options:
+ --help / -h -- print this message and exit
+ --lenient / -l -- additionally search of some common, but non-standard
+ types.
+ --extension / -e -- guess extension instead of type
+
+More than one type argument may be given.
+"""
+
+ def usage(code, msg=''):
+ print USAGE
+ if msg: print msg
+ sys.exit(code)
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'hle',
+ ['help', 'lenient', 'extension'])
+ except getopt.error, msg:
+ usage(1, msg)
+
+ strict = 1
+ extension = 0
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ usage(0)
+ elif opt in ('-l', '--lenient'):
+ strict = 0
+ elif opt in ('-e', '--extension'):
+ extension = 1
+ for gtype in args:
+ if extension:
+ guess = guess_extension(gtype, strict)
+ if not guess: print "I don't know anything about type", gtype
+ else: print guess
+ else:
+ guess, encoding = guess_type(gtype, strict)
+ if not guess: print "I don't know anything about type", gtype
+ else: print 'type:', guess, 'encoding:', encoding