diff options
Diffstat (limited to 'cwp/interpreter/app_engine_pull.py')
-rw-r--r-- | cwp/interpreter/app_engine_pull.py | 253 |
1 files changed, 253 insertions, 0 deletions
diff --git a/cwp/interpreter/app_engine_pull.py b/cwp/interpreter/app_engine_pull.py new file mode 100644 index 00000000..d092e2a3 --- /dev/null +++ b/cwp/interpreter/app_engine_pull.py @@ -0,0 +1,253 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +# Author: mrdmnd@ (Matt Redmond) +"""A client to pull data from Bartlett. + +Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py + +The server houses perf.data.gz, board, chrome version for each upload. +This script first authenticates with a proper @google.com account, then +downloads a sample (if it's not already cached) and unzips perf.data + + Authenticate(): Gets login info and returns an auth token + DownloadSamples(): Download and unzip samples. + _GetServePage(): Pulls /serve page from the app engine server + _DownloadSampleFromServer(): Downloads a local compressed copy of a sample + _UncompressSample(): Decompresses a sample, deleting the compressed version. +""" +import cookielib +import getpass +import gzip +import optparse +import os +import urllib +import urllib2 + +SERVER_NAME = 'http://chromeoswideprofiling.appspot.com' +APP_NAME = 'chromeoswideprofiling' +DELIMITER = '~' + + +def Authenticate(server_name): + """Gets credentials from user and attempts to retrieve auth token. + TODO: Accept OAuth2 instead of password. + Args: + server_name: (string) URL that the app engine code is living on. + Returns: + authtoken: (string) The authorization token that can be used + to grab other pages. + """ + + if server_name.endswith('/'): + server_name = server_name.rstrip('/') + # Grab username and password from user through stdin. + username = raw_input('Email (must be @google.com account): ') + password = getpass.getpass('Password: ') + # Use a cookie to authenticate with GAE. + cookiejar = cookielib.LWPCookieJar() + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) + urllib2.install_opener(opener) + # Get an AuthToken from Google accounts service. + auth_uri = 'https://www.google.com/accounts/ClientLogin' + authreq_data = urllib.urlencode({'Email': username, + 'Passwd': password, + 'service': 'ah', + 'source': APP_NAME, + 'accountType': 'HOSTED_OR_GOOGLE'}) + auth_req = urllib2.Request(auth_uri, data=authreq_data) + try: + auth_resp = urllib2.urlopen(auth_req) + except urllib2.URLError: + print 'Error logging in to Google accounts service.' + return None + body = auth_resp.read() + # Auth response contains several fields. + # We care about the part after Auth= + auth_resp_dict = dict(x.split('=') for x in body.split('\n') if x) + authtoken = auth_resp_dict['Auth'] + return authtoken + + +def DownloadSamples(server_name, authtoken, output_dir, start, stop): + """Download every sample and write unzipped version + to output directory. + Args: + server_name: (string) URL that the app engine code is living on. + authtoken: (string) Authorization token. + output_dir (string) Filepath to write output to. + start: (int) Index to start downloading from, starting at top. + stop: (int) Index to stop downloading, non-inclusive. -1 for end. + Returns: + None + """ + + if server_name.endswith('/'): + server_name = server_name.rstrip('/') + + serve_page_string = _GetServePage(server_name, authtoken) + if serve_page_string is None: + print 'Error getting /serve page.' + return + + sample_list = serve_page_string.split('</br>') + print 'Will download:' + sample_list_subset = sample_list[start:stop] + for sample in sample_list_subset: + print sample + for sample in sample_list_subset: + assert sample, 'Sample should be valid.' + sample_info = [s.strip() for s in sample.split(DELIMITER)] + key = sample_info[0] + time = sample_info[1] + time = time.replace(' ', '_') # No space between date and time. + # sample_md5 = sample_info[2] + board = sample_info[3] + version = sample_info[4] + + # Put a compressed copy of the samples in output directory. + _DownloadSampleFromServer(server_name, authtoken, key, time, board, version, + output_dir) + _UncompressSample(key, time, board, version, output_dir) + + +def _BuildFilenameFromParams(key, time, board, version): + """Return the filename for our sample. + Args: + key: (string) Key indexing our sample in the datastore. + time: (string) Date that the sample was uploaded. + board: (string) Board that the sample was taken on. + version: (string) Version string from /etc/lsb-release + Returns: + filename (string) + """ + filename = DELIMITER.join([key, time, board, version]) + return filename + + +def _DownloadSampleFromServer(server_name, authtoken, key, time, board, version, + output_dir): + """Downloads sample_$(samplekey).gz to current dir. + Args: + server_name: (string) URL that the app engine code is living on. + authtoken: (string) Authorization token. + key: (string) Key indexing our sample in the datastore + time: (string) Date that the sample was uploaded. + board: (string) Board that the sample was taken on. + version: (string) Version string from /etc/lsb-release + output_dir: (string) Filepath to write to output to. + Returns: + None + """ + filename = _BuildFilenameFromParams(key, time, board, version) + compressed_filename = filename + '.gz' + + if os.path.exists(os.path.join(output_dir, filename)): + print 'Already downloaded %s, skipping.' % filename + return + + serv_uri = server_name + '/serve/' + key + serv_args = {'continue': serv_uri, 'auth': authtoken} + full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args) + serv_req = urllib2.Request(full_serv_uri) + serv_resp = urllib2.urlopen(serv_req) + f = open(os.path.join(output_dir, compressed_filename), 'w+') + f.write(serv_resp.read()) + f.close() + + +def _UncompressSample(key, time, board, version, output_dir): + """Uncompresses a given sample.gz file and deletes the compressed version. + Args: + key: (string) Sample key to uncompress. + time: (string) Date that the sample was uploaded. + board: (string) Board that the sample was taken on. + version: (string) Version string from /etc/lsb-release + output_dir: (string) Filepath to find sample key in. + Returns: + None + """ + filename = _BuildFilenameFromParams(key, time, board, version) + compressed_filename = filename + '.gz' + + if os.path.exists(os.path.join(output_dir, filename)): + print 'Already decompressed %s, skipping.' % filename + return + + out_file = open(os.path.join(output_dir, filename), 'wb') + in_file = gzip.open(os.path.join(output_dir, compressed_filename), 'rb') + out_file.write(in_file.read()) + in_file.close() + out_file.close() + os.remove(os.path.join(output_dir, compressed_filename)) + + +def _DeleteSampleFromServer(server_name, authtoken, key): + """Opens the /delete page with the specified key + to delete the sample off the datastore. + Args: + server_name: (string) URL that the app engine code is living on. + authtoken: (string) Authorization token. + key: (string) Key to delete. + Returns: + None + """ + + serv_uri = server_name + '/del/' + key + serv_args = {'continue': serv_uri, 'auth': authtoken} + full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args) + serv_req = urllib2.Request(full_serv_uri) + urllib2.urlopen(serv_req) + + +def _GetServePage(server_name, authtoken): + """Opens the /serve page and lists all keys. + Args: + server_name: (string) URL the app engine code is living on. + authtoken: (string) Authorization token. + Returns: + The text of the /serve page (including HTML tags) + """ + + serv_uri = server_name + '/serve' + serv_args = {'continue': serv_uri, 'auth': authtoken} + full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args) + serv_req = urllib2.Request(full_serv_uri) + serv_resp = urllib2.urlopen(serv_req) + return serv_resp.read() + + +def main(): + parser = optparse.OptionParser() + parser.add_option('--output_dir', + dest='output_dir', + action='store', + help='Path to output perf data files.') + parser.add_option('--start', + dest='start_ind', + action='store', + default=0, + help='Start index.') + parser.add_option('--stop', + dest='stop_ind', + action='store', + default=-1, + help='Stop index.') + options = parser.parse_args()[0] + if not options.output_dir: + print 'Must specify --output_dir.' + return 1 + if not os.path.exists(options.output_dir): + print 'Specified output_dir does not exist.' + return 1 + + authtoken = Authenticate(SERVER_NAME) + if not authtoken: + print 'Could not obtain authtoken, exiting.' + return 1 + DownloadSamples(SERVER_NAME, authtoken, options.output_dir, options.start_ind, + options.stop_ind) + print 'Downloaded samples.' + return 0 + + +if __name__ == '__main__': + exit(main()) |