aboutsummaryrefslogtreecommitdiff
path: root/cwp/interpreter/app_engine_pull.py
blob: d092e2a391d2a789d558dc1225f1b8dab7a72522 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# Copyright 2012 Google Inc. All Rights Reserved.
# Author: mrdmnd@ (Matt Redmond)
"""A client to pull data from Bartlett.

Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py

The server houses perf.data.gz, board, chrome version for each upload.
This script first authenticates with a proper @google.com account, then
downloads a sample (if it's not already cached) and unzips perf.data

  Authenticate(): Gets login info and returns an auth token
  DownloadSamples(): Download and unzip samples.
  _GetServePage(): Pulls /serve page from the app engine server
  _DownloadSampleFromServer(): Downloads a local compressed copy of a sample
  _UncompressSample(): Decompresses a sample, deleting the compressed version.
"""
import cookielib
import getpass
import gzip
import optparse
import os
import urllib
import urllib2

SERVER_NAME = 'http://chromeoswideprofiling.appspot.com'
APP_NAME = 'chromeoswideprofiling'
DELIMITER = '~'


def Authenticate(server_name):
  """Gets credentials from user and attempts to retrieve auth token.
     TODO: Accept OAuth2 instead of password.
  Args:
    server_name: (string) URL that the app engine code is living on.
  Returns:
    authtoken: (string) The authorization token that can be used
                        to grab other pages.
  """

  if server_name.endswith('/'):
    server_name = server_name.rstrip('/')
  # Grab username and password from user through stdin.
  username = raw_input('Email (must be @google.com account): ')
  password = getpass.getpass('Password: ')
  # Use a cookie to authenticate with GAE.
  cookiejar = cookielib.LWPCookieJar()
  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
  urllib2.install_opener(opener)
  # Get an AuthToken from Google accounts service.
  auth_uri = 'https://www.google.com/accounts/ClientLogin'
  authreq_data = urllib.urlencode({'Email': username,
                                   'Passwd': password,
                                   'service': 'ah',
                                   'source': APP_NAME,
                                   'accountType': 'HOSTED_OR_GOOGLE'})
  auth_req = urllib2.Request(auth_uri, data=authreq_data)
  try:
    auth_resp = urllib2.urlopen(auth_req)
  except urllib2.URLError:
    print 'Error logging in to Google accounts service.'
    return None
  body = auth_resp.read()
  # Auth response contains several fields.
  # We care about the part after Auth=
  auth_resp_dict = dict(x.split('=') for x in body.split('\n') if x)
  authtoken = auth_resp_dict['Auth']
  return authtoken


def DownloadSamples(server_name, authtoken, output_dir, start, stop):
  """Download every sample and write unzipped version
     to output directory.
  Args:
    server_name: (string) URL that the app engine code is living on.
    authtoken:   (string) Authorization token.
    output_dir   (string) Filepath to write output to.
    start:       (int)    Index to start downloading from, starting at top.
    stop:        (int)    Index to stop downloading, non-inclusive. -1 for end.
  Returns:
    None
  """

  if server_name.endswith('/'):
    server_name = server_name.rstrip('/')

  serve_page_string = _GetServePage(server_name, authtoken)
  if serve_page_string is None:
    print 'Error getting /serve page.'
    return

  sample_list = serve_page_string.split('</br>')
  print 'Will download:'
  sample_list_subset = sample_list[start:stop]
  for sample in sample_list_subset:
    print sample
  for sample in sample_list_subset:
    assert sample, 'Sample should be valid.'
    sample_info = [s.strip() for s in sample.split(DELIMITER)]
    key = sample_info[0]
    time = sample_info[1]
    time = time.replace(' ', '_')  # No space between date and time.
    # sample_md5 = sample_info[2]
    board = sample_info[3]
    version = sample_info[4]

    # Put a compressed copy of the samples in output directory.
    _DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
                              output_dir)
    _UncompressSample(key, time, board, version, output_dir)


def _BuildFilenameFromParams(key, time, board, version):
  """Return the filename for our sample.
  Args:
    key:  (string) Key indexing our sample in the datastore.
    time: (string) Date that the sample was uploaded.
    board: (string) Board that the sample was taken on.
    version: (string) Version string from /etc/lsb-release
  Returns:
    filename (string)
  """
  filename = DELIMITER.join([key, time, board, version])
  return filename


def _DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
                              output_dir):
  """Downloads sample_$(samplekey).gz to current dir.
  Args:
    server_name: (string) URL that the app engine code is living on.
    authtoken:   (string) Authorization token.
    key:  (string) Key indexing our sample in the datastore
    time: (string) Date that the sample was uploaded.
    board: (string) Board that the sample was taken on.
    version: (string) Version string from /etc/lsb-release
    output_dir:  (string) Filepath to write to output to.
  Returns:
    None
  """
  filename = _BuildFilenameFromParams(key, time, board, version)
  compressed_filename = filename + '.gz'

  if os.path.exists(os.path.join(output_dir, filename)):
    print 'Already downloaded %s, skipping.' % filename
    return

  serv_uri = server_name + '/serve/' + key
  serv_args = {'continue': serv_uri, 'auth': authtoken}
  full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
  serv_req = urllib2.Request(full_serv_uri)
  serv_resp = urllib2.urlopen(serv_req)
  f = open(os.path.join(output_dir, compressed_filename), 'w+')
  f.write(serv_resp.read())
  f.close()


def _UncompressSample(key, time, board, version, output_dir):
  """Uncompresses a given sample.gz file and deletes the compressed version.
  Args:
    key: (string) Sample key to uncompress.
    time: (string) Date that the sample was uploaded.
    board: (string) Board that the sample was taken on.
    version: (string) Version string from /etc/lsb-release
    output_dir: (string) Filepath to find sample key in.
  Returns:
    None
  """
  filename = _BuildFilenameFromParams(key, time, board, version)
  compressed_filename = filename + '.gz'

  if os.path.exists(os.path.join(output_dir, filename)):
    print 'Already decompressed %s, skipping.' % filename
    return

  out_file = open(os.path.join(output_dir, filename), 'wb')
  in_file = gzip.open(os.path.join(output_dir, compressed_filename), 'rb')
  out_file.write(in_file.read())
  in_file.close()
  out_file.close()
  os.remove(os.path.join(output_dir, compressed_filename))


def _DeleteSampleFromServer(server_name, authtoken, key):
  """Opens the /delete page with the specified key
     to delete the sample off the datastore.
    Args:
      server_name: (string) URL that the app engine code is living on.
      authtoken:   (string) Authorization token.
      key:  (string) Key to delete.
    Returns:
      None
  """

  serv_uri = server_name + '/del/' + key
  serv_args = {'continue': serv_uri, 'auth': authtoken}
  full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
  serv_req = urllib2.Request(full_serv_uri)
  urllib2.urlopen(serv_req)


def _GetServePage(server_name, authtoken):
  """Opens the /serve page and lists all keys.
  Args:
    server_name: (string) URL the app engine code is living on.
    authtoken:   (string) Authorization token.
  Returns:
    The text of the /serve page (including HTML tags)
  """

  serv_uri = server_name + '/serve'
  serv_args = {'continue': serv_uri, 'auth': authtoken}
  full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
  serv_req = urllib2.Request(full_serv_uri)
  serv_resp = urllib2.urlopen(serv_req)
  return serv_resp.read()


def main():
  parser = optparse.OptionParser()
  parser.add_option('--output_dir',
                    dest='output_dir',
                    action='store',
                    help='Path to output perf data files.')
  parser.add_option('--start',
                    dest='start_ind',
                    action='store',
                    default=0,
                    help='Start index.')
  parser.add_option('--stop',
                    dest='stop_ind',
                    action='store',
                    default=-1,
                    help='Stop index.')
  options = parser.parse_args()[0]
  if not options.output_dir:
    print 'Must specify --output_dir.'
    return 1
  if not os.path.exists(options.output_dir):
    print 'Specified output_dir does not exist.'
    return 1

  authtoken = Authenticate(SERVER_NAME)
  if not authtoken:
    print 'Could not obtain authtoken, exiting.'
    return 1
  DownloadSamples(SERVER_NAME, authtoken, options.output_dir, options.start_ind,
                  options.stop_ind)
  print 'Downloaded samples.'
  return 0


if __name__ == '__main__':
  exit(main())