# Copyright (c) 2011 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Library for interacting with gdata (i.e. Google Docs, Tracker, etc).""" from __future__ import print_function import functools import getpass import os import pickle import re import urllib import xml.dom.minidom import gdata.projecthosting.client import gdata.service import gdata.spreadsheet import gdata.spreadsheet.service from chromite.lib import operation # pylint: disable=attribute-defined-outside-init,access-member-before-definition TOKEN_FILE = os.path.join(os.environ['HOME'], '.gdata_token') CRED_FILE = os.path.join(os.environ['HOME'], '.gdata_cred.txt') oper = operation.Operation('gdata_lib') _BAD_COL_CHARS_REGEX = re.compile(r'[ /_]') def PrepColNameForSS(col): """Translate a column name for spreadsheet interface.""" # Spreadsheet interface requires column names to be # all lowercase and with no spaces or other special characters. return _BAD_COL_CHARS_REGEX.sub('', col.lower()) # TODO(mtennant): Rename PrepRowValuesForSS def PrepRowForSS(row): """Make sure spreadsheet handles all values in row as strings.""" return dict((key, PrepValForSS(val)) for key, val in row.items()) # Regex to detect values that the spreadsheet will auto-format as numbers. _NUM_REGEX = re.compile(r'^[\d\.]+$') def PrepValForSS(val): """Make sure spreadsheet handles this value as a string.""" # The main reason for this is version strings (e.g. for portage packages), # which Sheets automatically interprets as numbers and mangles. if val and _NUM_REGEX.match(val): return "'" + val return val def ScrubValFromSS(val): """Remove string indicator prefix if found.""" if val and val[0] == "'": return val[1:] return val class Creds(object): """Class to manage user/password credentials.""" __slots__ = ( 'docs_auth_token', # Docs Client auth token string 'creds_dirty', # True if user/password set and not, yet, saved 'password', # User password 'token_dirty', # True if auth token(s) set and not, yet, saved 'tracker_auth_token', # Tracker Client auth token string 'user', # User account (foo@chromium.org) ) SAVED_TOKEN_ATTRS = ('docs_auth_token', 'tracker_auth_token', 'user') def __init__(self): self.user = None self.password = None self.docs_auth_token = None self.tracker_auth_token = None self.token_dirty = False self.creds_dirty = False def SetDocsAuthToken(self, auth_token): """Set the Docs auth_token string.""" self.docs_auth_token = auth_token self.token_dirty = True def SetTrackerAuthToken(self, auth_token): """Set the Tracker auth_token string.""" self.tracker_auth_token = auth_token self.token_dirty = True def LoadAuthToken(self, filepath): """Load previously saved auth token(s) from |filepath|. This first clears both docs_auth_token and tracker_auth_token. """ self.docs_auth_token = None self.tracker_auth_token = None try: f = open(filepath, 'r') obj = pickle.load(f) f.close() if obj.has_key('auth_token'): # Backwards compatability. Default 'auth_token' is what # docs_auth_token used to be saved as. self.docs_auth_token = obj['auth_token'] self.token_dirty = True for attr in self.SAVED_TOKEN_ATTRS: if obj.has_key(attr): setattr(self, attr, obj[attr]) oper.Notice('Loaded Docs/Tracker auth token(s) from "%s"' % filepath) except IOError: oper.Error('Unable to load auth token file at "%s"' % filepath) def StoreAuthTokenIfNeeded(self, filepath): """Store auth token(s) to |filepath| if anything changed.""" if self.token_dirty: self.StoreAuthToken(filepath) def StoreAuthToken(self, filepath): """Store auth token(s) to |filepath|.""" obj = {} for attr in self.SAVED_TOKEN_ATTRS: val = getattr(self, attr) if val: obj[attr] = val try: oper.Notice('Storing Docs and/or Tracker auth token to "%s"' % filepath) f = open(filepath, 'w') pickle.dump(obj, f) f.close() self.token_dirty = False except IOError: oper.Error('Unable to store auth token to file at "%s"' % filepath) def SetCreds(self, user, password=None): if not '@' in user: user = '%s@chromium.org' % user if not password: password = getpass.getpass('Docs password for %s:' % user) self.user = user self.password = password self.creds_dirty = True def LoadCreds(self, filepath): """Load email/password credentials from |filepath|.""" # Read email from first line and password from second. with open(filepath, 'r') as f: (self.user, self.password) = (l.strip() for l in f.readlines()) oper.Notice('Loaded Docs/Tracker login credentials from "%s"' % filepath) def StoreCredsIfNeeded(self, filepath): """Store email/password credentials to |filepath| if anything changed.""" if self.creds_dirty: self.StoreCreds(filepath) def StoreCreds(self, filepath): """Store email/password credentials to |filepath|.""" oper.Notice('Storing Docs/Tracker login credentials to "%s"' % filepath) # Simply write email on first line and password on second. with open(filepath, 'w') as f: f.write(self.user + '\n') f.write(self.password + '\n') self.creds_dirty = False class IssueComment(object): """Represent a Tracker issue comment.""" __slots__ = ['title', 'text'] def __init__(self, title, text): self.title = title self.text = text def __str__(self): text = '' if self.text: text = '\n '.join(self.text.split('\n')) return '%s:\n %s' % (self.title, text) class Issue(object): """Represents one Tracker Issue.""" SlotDefaults = { 'comments': [], # List of IssueComment objects 'id': 0, # Issue id number (int) 'labels': [], # List of text labels 'owner': None, # Current owner (text, chromium.org account) 'status': None, # Current issue status (text) (e.g. Assigned) 'summary': None,# Issue summary (first comment) 'title': None, # Title text 'ccs': [], # Cc list } __slots__ = SlotDefaults.keys() def __init__(self, **kwargs): """Init for one Issue object. |kwargs| - key/value arguments to give initial values to any additional attributes on |self|. """ # Use SlotDefaults overwritten by kwargs for starting slot values. slotvals = self.SlotDefaults.copy() slotvals.update(kwargs) for slot in self.__slots__: setattr(self, slot, slotvals.pop(slot)) if slotvals: raise ValueError('I do not know what to do with %r' % slotvals) def __str__(self): """Pretty print of issue.""" lines = [ 'Issue %d - %s' % (self.id, self.title), 'Status: %s, Owner: %s' % (self.status, self.owner), 'Labels: %s' % ', '.join(self.labels), ] if self.summary: lines.append('Summary: %s' % self.summary) if self.comments: lines.extend(self.comments) return '\n'.join(lines) def InitFromTracker(self, t_issue, project_name): """Initialize |self| from tracker issue |t_issue|""" # The __slots__ logic above confuses pylint. # https://bitbucket.org/logilab/pylint/issue/380/ # pylint: disable=assigning-non-slot self.id = int(t_issue.id.text.split('/')[-1]) self.labels = [label.text for label in t_issue.label] if t_issue.owner: self.owner = t_issue.owner.username.text self.status = t_issue.status.text self.summary = t_issue.content.text self.title = t_issue.title.text self.comments = self.GetTrackerIssueComments(self.id, project_name) def GetTrackerIssueComments(self, issue_id, project_name): """Retrieve comments for |issue_id| from comments URL""" comments = [] feeds = 'http://code.google.com/feeds' url = '%s/issues/p/%s/issues/%d/comments/full' % (feeds, project_name, issue_id) doc = xml.dom.minidom.parse(urllib.urlopen(url)) entries = doc.getElementsByTagName('entry') for entry in entries: title_text_list = [] for key in ('title', 'content'): child = entry.getElementsByTagName(key)[0].firstChild title_text_list.append(child.nodeValue if child else None) comments.append(IssueComment(*title_text_list)) return comments def __eq__(self, other): return (self.id == other.id and self.labels == other.labels and self.owner == other.owner and self.status == other.status and self.summary == other.summary and self.title == other.title) def __ne__(self, other): return not self == other class TrackerError(RuntimeError): """Error class for tracker communication errors.""" class TrackerInvalidUserError(TrackerError): """Error class for when user not recognized by Tracker.""" class TrackerComm(object): """Class to manage communication with Tracker.""" __slots__ = ( 'author', # Author when creating/editing Tracker issues 'it_client', # Issue Tracker client 'project_name', # Tracker project name ) def __init__(self): self.author = None self.it_client = None self.project_name = None def Connect(self, creds, project_name, source='chromiumos'): self.project_name = project_name it_client = gdata.projecthosting.client.ProjectHostingClient() it_client.source = source if creds.tracker_auth_token: oper.Notice('Logging into Tracker using previous auth token.') it_client.auth_token = gdata.gauth.ClientLoginToken( creds.tracker_auth_token) else: oper.Notice('Logging into Tracker as "%s".' % creds.user) it_client.ClientLogin(creds.user, creds.password, source=source, service='code', account_type='GOOGLE') creds.SetTrackerAuthToken(it_client.auth_token.token_string) self.author = creds.user self.it_client = it_client def _QueryTracker(self, query): """Query the tracker for a list of issues. Return |None| on failure.""" try: return self.it_client.get_issues(self.project_name, query=query) except gdata.client.RequestError: return None def _CreateIssue(self, t_issue): """Create an Issue from a Tracker Issue.""" issue = Issue() issue.InitFromTracker(t_issue, self.project_name) return issue # TODO(mtennant): This method works today, but is not being actively used. # Leaving it in, because a logical use of the method is for to verify # that a Tracker issue in the package spreadsheet is open, and to add # comments to it when new upstream versions become available. def GetTrackerIssueById(self, tid): """Get tracker issue given |tid| number. Return Issue object if found.""" query = gdata.projecthosting.client.Query(issue_id=str(tid)) feed = self._QueryTracker(query) if feed.entry: return self._CreateIssue(feed.entry[0]) return None def GetTrackerIssuesByText(self, search_text, full_text=True, only_open=True): """Find all Tracker Issues that contain the text search_text.""" if not full_text: search_text = 'summary:"%s"' % search_text if only_open: search_text += ' is:open' query = gdata.projecthosting.client.Query(text_query=search_text) feed = self._QueryTracker(query) if feed: return [self._CreateIssue(tissue) for tissue in feed.entry] else: return [] def CreateTrackerIssue(self, issue): """Create a new issue in Tracker according to |issue|.""" try: created = self.it_client.add_issue(project_name=self.project_name, title=issue.title, content=issue.summary, author=self.author, status=issue.status, owner=issue.owner, labels=issue.labels, ccs=issue.ccs) issue.id = int(created.id.text.split('/')[-1]) return issue.id except gdata.client.RequestError as ex: if ex.body and ex.body.lower() == 'user not found': raise TrackerInvalidUserError('Tracker user %s not found' % issue.owner) if ex.body and ex.body.lower() == 'issue owner must be a member': raise TrackerInvalidUserError('Tracker user %s not a member' % issue.owner) raise def AppendTrackerIssueById(self, issue_id, comment, owner=None): """Append |comment| to issue |issue_id| in Tracker""" self.it_client.update_issue(project_name=self.project_name, issue_id=issue_id, author=self.author, comment=comment, owner=owner) return issue_id class SpreadsheetRow(dict): """Minor semi-immutable extension of dict to hold spreadsheet data. This lets us keep the original spreadsheet row object and spreadsheet row number as attributes. No changes are made to equality checking or anything else, so client code that wishes to handle this as a pure dict can. """ def __init__(self, ss_row_obj, ss_row_num, mapping=None): if mapping: dict.__init__(self, mapping) self.ss_row_obj = ss_row_obj self.ss_row_num = ss_row_num def __setitem__(self, key, val): raise TypeError('setting item in SpreadsheetRow not supported') def __delitem__(self, key): raise TypeError('deleting item in SpreadsheetRow not supported') class SpreadsheetError(RuntimeError): """Error class for spreadsheet communication errors.""" def ReadWriteDecorator(func): """Raise SpreadsheetError if appropriate.""" def f(self, *args, **kwargs): try: return func(self, *args, **kwargs) except gdata.service.RequestError as ex: raise SpreadsheetError(str(ex)) f.__name__ = func.__name__ return f class SpreadsheetComm(object): """Class to manage communication with one Google Spreadsheet worksheet.""" # Row numbering in spreadsheets effectively starts at 2 because row 1 # has the column headers. ROW_NUMBER_OFFSET = 2 # Spreadsheet column numbers start at 1. COLUMN_NUMBER_OFFSET = 1 __slots__ = ( '_columns', # Tuple of translated column names, filled in as needed '_rows', # Tuple of Row dicts in order, filled in as needed 'gd_client', # Google Data client 'ss_key', # Spreadsheet key 'ws_name', # Worksheet name 'ws_key', # Worksheet key ) @property def columns(self): """The columns property is filled in on demand. It is a tuple of column names, each run through PrepColNameForSS. """ if self._columns is None: query = gdata.spreadsheet.service.CellQuery() query['max-row'] = '1' feed = self.gd_client.GetCellsFeed(self.ss_key, self.ws_key, query=query) # The use of PrepColNameForSS here looks weird, but the values # in row 1 are the unaltered column names, rather than the restricted # column names used for interface purposes. In other words, if the # spreadsheet looks like it has a column called "Foo Bar", then the # first row will have a value "Foo Bar" but all interaction with that # column for other rows will use column key "foobar". Translate to # restricted names now with PrepColNameForSS. cols = [PrepColNameForSS(entry.content.text) for entry in feed.entry] self._columns = tuple(cols) return self._columns @property def rows(self): """The rows property is filled in on demand. It is a tuple of SpreadsheetRow objects. """ if self._rows is None: rows = [] feed = self.gd_client.GetListFeed(self.ss_key, self.ws_key) for rowIx, rowObj in enumerate(feed.entry, start=self.ROW_NUMBER_OFFSET): row_dict = dict((key, ScrubValFromSS(val.text)) for key, val in rowObj.custom.iteritems()) rows.append(SpreadsheetRow(rowObj, rowIx, row_dict)) self._rows = tuple(rows) return self._rows def __init__(self): for slot in self.__slots__: setattr(self, slot, None) def Connect(self, creds, ss_key, ws_name, source='chromiumos'): """Login to spreadsheet service and set current worksheet. |creds| Credentials object for Google Docs |ss_key| Spreadsheet key |ws_name| Worksheet name |source| Name to associate with connecting service """ self._Login(creds, source) self.SetCurrentWorksheet(ws_name, ss_key=ss_key) def SetCurrentWorksheet(self, ws_name, ss_key=None): """Change the current worksheet. This clears all caches.""" if ss_key and ss_key != self.ss_key: self.ss_key = ss_key self._ClearCache() self.ws_name = ws_name ws_key = self._GetWorksheetKey(self.ss_key, self.ws_name) if ws_key != self.ws_key: self.ws_key = ws_key self._ClearCache() def _ClearCache(self, keep_columns=False): """Called whenever column/row data might be stale.""" self._rows = None if not keep_columns: self._columns = None def _Login(self, creds, source): """Login to Google doc client using given |creds|.""" gd_client = RetrySpreadsheetsService() gd_client.source = source # Login using previous auth token if available, otherwise # use email/password from creds. if creds.docs_auth_token: oper.Notice('Logging into Docs using previous auth token.') gd_client.SetClientLoginToken(creds.docs_auth_token) else: oper.Notice('Logging into Docs as "%s".' % creds.user) gd_client.email = creds.user gd_client.password = creds.password gd_client.ProgrammaticLogin() creds.SetDocsAuthToken(gd_client.GetClientLoginToken()) self.gd_client = gd_client def _GetWorksheetKey(self, ss_key, ws_name): """Get the worksheet key with name |ws_name| in spreadsheet |ss_key|.""" feed = self.gd_client.GetWorksheetsFeed(ss_key) # The worksheet key is the last component in the URL (after last '/') for entry in feed.entry: if ws_name == entry.title.text: return entry.id.text.split('/')[-1] oper.Die('Unable to find worksheet "%s" in spreadsheet "%s"' % (ws_name, ss_key)) @ReadWriteDecorator def GetColumns(self): """Return tuple of column names in worksheet. Note that each returned name has been run through PrepColNameForSS. """ return self.columns @ReadWriteDecorator def GetColumnIndex(self, colName): """Get the column index (starting at 1) for column |colName|""" try: # Spreadsheet column indices start at 1, so +1. return self.columns.index(colName) + self.COLUMN_NUMBER_OFFSET except ValueError: return None @ReadWriteDecorator def GetRows(self): """Return tuple of SpreadsheetRow objects in order.""" return self.rows @ReadWriteDecorator def GetRowCacheByCol(self, column): """Return a dict for looking up rows by value in |column|. Each row value is a SpreadsheetRow object. If more than one row has the same value for |column|, then the row objects will be in a list in the returned dict. """ row_cache = {} for row in self.GetRows(): col_val = row[column] current_entry = row_cache.get(col_val, None) if current_entry and type(current_entry) is list: current_entry.append(row) elif current_entry: current_entry = [current_entry, row] else: current_entry = row row_cache[col_val] = current_entry return row_cache @ReadWriteDecorator def InsertRow(self, row): """Insert |row| at end of spreadsheet.""" self.gd_client.InsertRow(row, self.ss_key, self.ws_key) self._ClearCache(keep_columns=True) @ReadWriteDecorator def UpdateRowCellByCell(self, rowIx, row): """Replace cell values in row at |rowIx| with those in |row| dict.""" for colName in row: colIx = self.GetColumnIndex(colName) if colIx is not None: self.ReplaceCellValue(rowIx, colIx, row[colName]) self._ClearCache(keep_columns=True) @ReadWriteDecorator def DeleteRow(self, ss_row): """Delete the given |ss_row| (must be original spreadsheet row object.""" self.gd_client.DeleteRow(ss_row) self._ClearCache(keep_columns=True) @ReadWriteDecorator def ReplaceCellValue(self, rowIx, colIx, val): """Replace cell value at |rowIx| and |colIx| with |val|""" self.gd_client.UpdateCell(rowIx, colIx, val, self.ss_key, self.ws_key) self._ClearCache(keep_columns=True) @ReadWriteDecorator def ClearCellValue(self, rowIx, colIx): """Clear cell value at |rowIx| and |colIx|""" self.ReplaceCellValue(rowIx, colIx, None) @ReadWriteDecorator def ClearColumnWorksheet(self, colIx): """Clear column with index |colIX| from current worksheet.""" query = gdata.spreadsheet.service.CellQuery() query.min_col = str(colIx) query.max_col = str(colIx) cells = self.gd_client.GetCellsFeed(self.ss_key, wksht_id=self.ws_key, query=query) batchRequest = gdata.spreadsheet.SpreadsheetsCellsFeed() for entry in cells.entry: entry.cell.inputValue = None batchRequest.AddUpdate(entry) self.gd_client.ExecuteBatch(batchRequest, cells.GetBatchLink().href) @ReadWriteDecorator def WriteColumnToWorksheet(self, colIx, data): """Clear column index |colIx| from worksheet and write |data| to it.""" self.ClearColumnWorksheet(colIx) query = gdata.spreadsheet.service.CellQuery() query.min_col = str(colIx) query.max_col = str(colIx) query.min_row = '1' query.max_row = str(len(data)) query.return_empty = 'true' cells = self.gd_client.GetCellsFeed(self.ss_key, wksht_id=self.ws_key, query=query) batchRequest = gdata.spreadsheet.SpreadsheetsCellsFeed() for entry, value in zip(cells.entry, data): entry.cell.inputValue = str(value) batchRequest.AddUpdate(entry) self.gd_client.ExecuteBatch(batchRequest, cells.GetBatchLink().href) class RetrySpreadsheetsService(gdata.spreadsheet.service.SpreadsheetsService): """Extend SpreadsheetsService to put retry logic around http request method. The entire purpose of this class is to remove some flakiness from interactions with Google Drive spreadsheet service, in the form of certain 40* and 50* http error responses to http requests. This is documented in https://code.google.com/p/chromium/issues/detail?id=206798. There are two "request" methods that need to be wrapped in retry logic. 1) The request method on self. Original implementation is in base class atom.service.AtomService. 2) The request method on self.http_client. The class of self.http_client can actually vary, so the original implementation of the request method can also vary. """ TRY_MAX = 5 RETRYABLE_STATUSES = ( 403, # Forbidden (but retries still seem to help). 500, # Internal server error. ) def __init__(self, *args, **kwargs): gdata.spreadsheet.service.SpreadsheetsService.__init__(self, *args, **kwargs) # Wrap self.http_client.request with retry wrapper. This request method # is used by ProgrammaticLogin(), at least. if hasattr(self, 'http_client'): self.http_client.request = functools.partial(self._RetryRequest, self.http_client.request) self.request = functools.partial(self._RetryRequest, self.request) def _RetryRequest(self, func, *args, **kwargs): """Retry wrapper for bound |func|, passing |args| and |kwargs|. This retry wrapper can be used for any http request |func| that provides an http status code via the .status attribute of the returned value. Retry when the status value on the return object is in RETRYABLE_STATUSES, and run up to TRY_MAX times. If successful (whether or not retries were necessary) return the last return value returned from base method. If unsuccessful return the first return value returned from base method. """ first_retval = None for try_ix in xrange(1, self.TRY_MAX + 1): retval = func(*args, **kwargs) if retval.status not in self.RETRYABLE_STATUSES: return retval else: oper.Warning('Retry-able HTTP request failure (status=%d), try %d/%d' % (retval.status, try_ix, self.TRY_MAX)) if not first_retval: first_retval = retval oper.Warning('Giving up on HTTP request after %d tries' % self.TRY_MAX) return first_retval