diff options
author | Trevor Johns <trevorjohns@google.com> | 2010-09-01 03:34:05 -0700 |
---|---|---|
committer | Trevor Johns <trevorjohns@google.com> | 2012-01-27 20:16:15 -0800 |
commit | 662b8f1c9a05bea55187513d6eb7f4ff1125ca1b (patch) | |
tree | 7603c908925e92f73fb7209cd94515931d6cfc59 | |
parent | e52271aff9f4efe9f0eee9ee4fa218f38776aeb4 (diff) | |
download | development-662b8f1c9a05bea55187513d6eb7f4ff1125ca1b.tar.gz |
Adding manual redirect support to developer.android.com.
Internal bug: 2347145
Change-Id: I0cdcec8a23704ab80878e8cc781b735fd2173011
-rw-r--r-- | scripts/app_engine_server/memcache_zipserve.py | 303 | ||||
-rw-r--r-- | scripts/app_engine_server/redirects.yaml | 51 |
2 files changed, 279 insertions, 75 deletions
diff --git a/scripts/app_engine_server/memcache_zipserve.py b/scripts/app_engine_server/memcache_zipserve.py index 34f00c6c1..75d1b9759 100644 --- a/scripts/app_engine_server/memcache_zipserve.py +++ b/scripts/app_engine_server/memcache_zipserve.py @@ -31,7 +31,10 @@ __author__ = 'jmatt@google.com (Justin Mattson)' import email.Utils import logging import mimetypes +import re +import sys import time +import yaml import zipfile from google.appengine.api import memcache @@ -94,9 +97,21 @@ class MemcachedZipHandler(webapp.RequestHandler): PUBLIC = True # public cache setting CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL + REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data + REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table + REDIRECT_SRC = 'src' # Name of the 'source' attribute for a + # redirect table entry + REDIRECT_DST = 'dst' # Name of the 'destination' attribute for + # a redirect table entry + REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a + # redirect table entry + REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301 + # redirect should be served + REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302 + # Redirect should be served intlString = 'intl/' validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW'] - + def TrueGet(self, reqUri): """The top-level entry point to serving requests. @@ -118,7 +133,7 @@ class MemcachedZipHandler(webapp.RequestHandler): isStripped = False # Try to retrieve the user's lang pref from the cookie. If there is no - # lang pref cookie in the request, add set-cookie to the response with the + # lang pref cookie in the request, add set-cookie to the response with the # default value of 'en'. try: langName = self.request.cookies['android_developer_pref_lang'] @@ -127,64 +142,201 @@ class MemcachedZipHandler(webapp.RequestHandler): #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName) logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie) + # Do some prep for handling intl requests. Parse the url and validate + # the intl/lang substring, extract the url lang code (urlLangName) and the + # the uri that follows the intl/lang substring(contentUri) + sections = reqUri.split("/", 2) + isIntl = len(sections) > 2 and (sections[0] == "intl") + if isIntl: + isValidIntl = sections[1] in self.validLangs + urlLangName = sections[1] + contentUri = sections[2] + logging.info(' Content URI is [%s]...', contentUri) + if isValidIntl: + if (langName != urlLangName) or (langName == 'en'): + # if the lang code in the request is different from that in + # the cookie, or if the target lang is en, strip the + # intl/nn substring. It will later be redirected to + # the user's preferred language url. + # logging.info(' Handling a MISMATCHED intl request') + reqUri = contentUri + isStripped = True + isValidIntl = False + isIntl = False + #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName) + #else: + # logging.info('INTL PREP no need to reset langName') + else: + contentUri = reqUri + + # Apply manual redirects from redirects.yaml. This occurs before any + # other mutations are performed, to avoid odd redirect behavior + # (For example, a user may want to redirect a directory without having + # /index.html appended.) + did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl) + if did_redirect: + return + # Preprocess the req url. If it references a directory or the domain itself, # append '/index.html' to the url and 302 redirect. Otherwise, continue # processing the request below. - name = self.PreprocessUrl(reqUri, langName) - if name: - # Do some prep for handling intl requests. Parse the url and validate - # the intl/lang substring, extract the url lang code (urlLangName) and the - # the uri that follows the intl/lang substring(contentUri) - sections = name.split("/", 2) - contentUri = 0 - isIntl = len(sections) > 1 and (sections[0] == "intl") - if isIntl: - isValidIntl = sections[1] in self.validLangs - if isValidIntl: - urlLangName = sections[1] - contentUri = sections[2] - logging.info(' Content URI is [%s]...', contentUri) - if (urlLangName != langName) or (langName == 'en'): - # if the lang code in the request is different from that in - # the cookie, or if the target lang is en, strip the - # intl/nn substring. It will later be redirected to - # the user's preferred language url. - # logging.info(' Handling a MISMATCHED intl request') - name = contentUri - isStripped = True - isValidIntl = False - isIntl = False - - # Send for processing - if self.isCleanUrl(name, langName, isValidIntl, isStripped): - # handle a 'clean' request. - # Try to form a response using the actual request url. - # logging.info(' Request being handled as clean: [%s]', name) - if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie): - # If CreateResponse returns False, there was no such document - # in the intl/lang tree. Before going to 404, see if there is an - # English-language version of the doc in the default - # default tree and return it, else go to 404. - self.CreateResponse(contentUri, langName, False, resetLangCookie) - - elif isIntl: - # handle the case where we need to pass through an invalid intl req - # for processing (so as to get 404 as appropriate). This is needed - # because intl urls are passed through clean and retried in English, - # if necessary. - # logging.info(' Handling an invalid intl request...') - self.CreateResponse(name, langName, isValidIntl, resetLangCookie) + did_redirect = self.PreprocessUrl(reqUri, langName) + if did_redirect: + return + + # Send for processing + if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped): + # handle a 'clean' request. + # Try to form a response using the actual request url. + # logging.info(' Request being handled as clean: [%s]', name) + if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie): + # If CreateResponse returns False, there was no such document + # in the intl/lang tree. Before going to 404, see if there is an + # English-language version of the doc in the default + # default tree and return it, else go to 404. + self.CreateResponse(contentUri, langName, False, resetLangCookie) + + elif isIntl: + # handle the case where we need to pass through an invalid intl req + # for processing (so as to get 404 as appropriate). This is needed + # because intl urls are passed through clean and retried in English, + # if necessary. + # logging.info(' Handling an invalid intl request...') + self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie) - else: - # handle the case where we have a non-clean url (usually a non-intl - # url) that we need to interpret in the context of any lang pref - # that is set. Prepend an intl/lang string to the request url and - # send it as a 302 redirect. After the redirect, the subsequent - # request will be handled as a clean url. - self.RedirToIntl(name, self.intlString, langName) + else: + # handle the case where we have a non-clean url (usually a non-intl + # url) that we need to interpret in the context of any lang pref + # that is set. Prepend an intl/lang string to the request url and + # send it as a 302 redirect. After the redirect, the subsequent + # request will be handled as a clean url. + self.RedirToIntl(reqUri, self.intlString, langName) + + def ProcessManualRedirects(self, contentUri, langName, isIntl): + """Compute any manual redirects for a request and execute them. + + This allows content authors to manually define a set of regex rules which, + when matched, will cause an HTTP redirect to be performed. + + Redirect rules are typically stored in a file named redirects.yaml. See the + comments in that file for more information about formatting. + + Redirect computations are stored in memcache for performance. + + Note that international URIs are handled automatically, and are assumed to + mirror redirects for non-intl requests. + + Args: + contentUri: The relative URI (without leading slash) that was requested. + This should NOT contain an intl-prefix, if otherwise present. + langName: The requested language. + isIntl: True if contentUri originally contained an intl prefix. + + Results: + boolean: True if a redirect has been set, False otherwise. + """ + # Redirect data is stored in memcache for performance + memcache_key = self.REDIRECT_PREFIX + contentUri + redirect_data = memcache.get(memcache_key) + if redirect_data is None: + logging.info('Redirect cache miss. Computing new redirect data.\n' + 'Memcache Key: ' + memcache_key) + redirect_data = self.ComputeManualRedirectUrl(contentUri) + memcache.set(memcache_key, redirect_data) + contentUri = redirect_data[0] + redirectType = redirect_data[1] + + # If this is an international URL, prepend intl path to minimize + # number of redirects + if isIntl: + contentUri = '/%s%s%s' % (self.intlString, langName, contentUri) + + if redirectType is None: + # No redirect necessary + return False + elif redirectType == self.REDIRECT_TYPE_PERM: + logging.info('Sending permanent redirect: ' + contentUri); + self.redirect(contentUri, permanent=True) + return True + elif redirectType == self.REDIRECT_TYPE_TEMP: + logging.info('Sending temporary redirect: ' + contentUri); + self.redirect(contentUri, permanent=False) + return True + else: + # Invalid redirect type + logging.error('Invalid redirect type: %s', redirectType) + raise ('Invalid redirect type: %s', redirectType) + + def ComputeManualRedirectUrl(self, uri): + """Read redirects file and evaluate redirect rules for a given URI. + + Args: + uri: The relative URI (without leading slash) for which redirect data + should be computed. No special handling of intl URIs is pefromed + at this level. + + Returns: + tuple: The computed redirect data. This tuple has two parts: + redirect_uri: The new URI that should be used. (If no redirect rule is + found, the original input to 'uri' will be returned. + redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary' + for an HTTP 302 redirect, or None if no redirect should be performed. + """ + # Redircts are defined in a file named redirects.yaml. + try: + f = open(self.REDIRECT_FILE) + data = yaml.load(f) + f.close() + except IOError, e: + logging.warning('Error opening redirect file (' + self.REDIRECT_FILE + + '): ' + e.strerror) + return (uri, None) + + # The incoming path is missing a leading slash. However, many parts of the + # redirect system require leading slashes to distinguish between relative + # and absolute redirects. So, to compensate for this, we'll add a leading + # slash here as well. + uri = '/' + uri + + # Check to make sure we actually got an iterable list out of the YAML file + if data is None: + logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid ' + 'YAML.') + elif 'redirects' not in data: + logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not ' + 'properly formatted -- no \'redirects:\' header.') + elif hasattr(data['redirects'], '__iter__'): + # Iterate through redirect data, try to find a redirect that matches. + for redirect in data['redirects']: + # Note: re.search adds an implied '^' to the beginning of the regex + # This means that the regex must match from the beginning of the + # string. + try: + if re.match(redirect[self.REDIRECT_SRC], uri): + # Match found. Apply redirect rule. + redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC], + redirect[self.REDIRECT_DST], uri) + logging.info('Redirect rule matched.\n' + 'Rule: %s\n' + 'Src: %s\n' + 'Dst: %s', + redirect[self.REDIRECT_SRC], uri, redirect_uri) + if self.REDIRECT_TYPE in redirect: + redirect_type = redirect[self.REDIRECT_TYPE] + else: + # Default redirect type, if unspecified + redirect_type = self.REDIRECT_TYPE_PERM + return (redirect_uri, redirect_type) + except: + e = sys.exc_info()[1] + raise ('Error while processing redirect rule.\n' + 'Rule: %s\n' + 'Error: %s' % (redirect[self.REDIRECT_SRC], e)) + # No redirect found, return URL unchanged + return (uri, None) def isCleanUrl(self, name, langName, isValidIntl, isStripped): - """Determine whether to pass an incoming url straight to processing. + """Determine whether to pass an incoming url straight to processing. Args: name: The incoming URL @@ -208,9 +360,10 @@ class MemcachedZipHandler(webapp.RequestHandler): name: The incoming URL Returns: - False if the request was redirected to '/index.html', or - The processed URL, otherwise + True if the request was redirected to '/index.html'. + Otherewise False. """ + # determine if this is a request for a directory final_path_segment = name final_slash_offset = name.rfind('/') @@ -224,16 +377,16 @@ class MemcachedZipHandler(webapp.RequestHandler): uri = ''.join(['/', name, 'index.html']) # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName) self.redirect(uri, False) - return False + return True else: - return name + return False def RedirToIntl(self, name, intlString, langName): """Redirect an incoming request to the appropriate intl uri. For non-en langName, builds the intl/lang string from a - base (en) string and redirects (302) the request to look for - a version of the file in langName. For en langName, simply + base (en) string and redirects (302) the request to look for + a version of the file in langName. For en langName, simply redirects a stripped uri string (intl/nn removed). Args: @@ -247,25 +400,25 @@ class MemcachedZipHandler(webapp.RequestHandler): else: builtIntlLangUri = name uri = ''.join(['/', builtIntlLangUri]) - logging.info('-->>REDIRECTING %s to %s', name, uri) + logging.info('-->REDIRECTING %s to %s', name, uri) self.redirect(uri, False) return uri def CreateResponse(self, name, langName, isValidIntl, resetLangCookie): """Process the url and form a response, if appropriate. - Attempts to retrieve the requested file (name) from cache, - negative cache, or store (zip) and form the response. - For intl requests that are not found (in the localized tree), + Attempts to retrieve the requested file (name) from cache, + negative cache, or store (zip) and form the response. + For intl requests that are not found (in the localized tree), returns False rather than forming a response, so that - the request can be retried with the base url (this is the - fallthrough to default language). + the request can be retried with the base url (this is the + fallthrough to default language). For requests that are found, forms the headers and adds the content to the response entity. If the request was - for an intl (localized) url, also resets the language cookie - to the language specified in the url if needed, to ensure that - the client language and response data remain harmonious. + for an intl (localized) url, also resets the language cookie + to the language specified in the url if needed, to ensure that + the client language and response data remain harmonious. Args: name: The incoming, preprocessed URL @@ -281,7 +434,7 @@ class MemcachedZipHandler(webapp.RequestHandler): False: No response was created. """ # see if we have the page in the memcache - logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]', + logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]', name, langName, isValidIntl, resetLangCookie) resp_data = self.GetFromCache(name) if resp_data is None: @@ -314,12 +467,12 @@ class MemcachedZipHandler(webapp.RequestHandler): logging.info(' Resetting android_developer_pref_lang cookie to [%s]', langName) expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10 - self.response.headers.add_header('Set-Cookie', - 'android_developer_pref_lang=%s; path=/; expires=%s' % + self.response.headers.add_header('Set-Cookie', + 'android_developer_pref_lang=%s; path=/; expires=%s' % (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate)))) mustRevalidate = False if ('.html' in name): - # revalidate html files -- workaround for cache inconsistencies for + # revalidate html files -- workaround for cache inconsistencies for # negotiated responses mustRevalidate = True #logging.info(' Adding [Vary: Cookie] to response...') @@ -391,7 +544,7 @@ class MemcachedZipHandler(webapp.RequestHandler): x = False if resp_data is not None: logging.info('%s read from %s', file_path, archive_name) - + try: archive_name = file_itr.next()[0] except (StopIteration), err: @@ -459,7 +612,7 @@ class MemcachedZipHandler(webapp.RequestHandler): We say that file1 is lexigraphically before file2 if the last non-matching path segment of file1 is alphabetically before file2. - + Args: file1: the first file path file2: the second file path diff --git a/scripts/app_engine_server/redirects.yaml b/scripts/app_engine_server/redirects.yaml new file mode 100644 index 000000000..6bdc2673f --- /dev/null +++ b/scripts/app_engine_server/redirects.yaml @@ -0,0 +1,51 @@ +# Redirect file. +# This file contains the list of rewrite rules that are applied when serving +# pages. +# +# Each redirect has four parts: +# +# - src: The path to redirect. This is a regex rule prefixed with an implied +# '^'. Unless you're doing something advanced, your path should start with +# '/' character. +# +# - dst: The path to redirect to. If the path begins with a slash, +# it is considered a relative redirect. Otherwise, it is an absolute +# redirct (and should probably begin with http: or http://). You may use +# capturing groups to preserve part of the source path. To referece a +# capturing group, use \N, where N is the (1-based) index of desired group. +# +# - type: Either 'permanent' or 'temporary', depending on whether you want an +# HTTP 301 or HTTP 302 redirect, respectiviely. See RFC 2616 for the +# difference between these: +# +# http://tools.ietf.org/html/rfc2616 +# +# If you don't specify a type, 'permanent' will be used by default. Note that +# this is different from the Apache convention (which uses 'temporary' by +# default.) +# +# - comment: Currently ignored by the computer, but useful for humans. +# +# Example: +# +# redirects: +# - src: /foo +# dst: /bar +# # Redirect /foo to /bar. This will also redirect foo/ and +# # foo/test.html. Note that the redirect type is optional. This will be +# # treated as a permanent redirect. +# +# - src: /(.+droid(/.*)?)$ +# dst: /droids/\1 +# type: permanent +# # Redirect /android to /droids/android and /bugdroid to +# # /droids/bugdroid. However, it will not redirect /droid or +# # /bugdroids. +# +# - src: /google +# dst: http://www.google.com +# type: temporary +# # This is an example of a redirect to an absolute URI. +# + +redirects: |