diff options
Diffstat (limited to 'catapult/common/py_utils/py_utils/cloud_storage.py')
-rw-r--r-- | catapult/common/py_utils/py_utils/cloud_storage.py | 74 |
1 files changed, 71 insertions, 3 deletions
diff --git a/catapult/common/py_utils/py_utils/cloud_storage.py b/catapult/common/py_utils/py_utils/cloud_storage.py index b4988c58..a359a065 100644 --- a/catapult/common/py_utils/py_utils/cloud_storage.py +++ b/catapult/common/py_utils/py_utils/cloud_storage.py @@ -198,6 +198,37 @@ def List(bucket): return [url[len(query):] for url in stdout.splitlines()] +def ListDirs(bucket, path=''): + """Returns only directories matching the given path in bucket. + + Args: + bucket: Name of cloud storage bucket to look at. + path: Path within the bucket to filter to. Path can include wildcards. + path = 'foo*' will return ['mybucket/foo1/', 'mybucket/foo2/, ... ] but + not mybucket/foo1/file.txt or mybucket/foo-file.txt. + + Returns: + A list of directories. All returned path are relative to the bucket root + directory. For example, List('my-bucket', path='foo/') will returns results + of the form ['/foo/123', '/foo/124', ...], as opposed to ['123', '124', + ...]. + """ + bucket_prefix = 'gs://%s' % bucket + full_path = '%s/%s' % (bucket_prefix, path) + # Note that -d only ensures we don't recurse into subdirectories + # unnecessarily. It still lists all non directory files matching the path + # following by a blank line. Adding -d here is a performance optimization. + stdout = _RunCommand(['ls', '-d', full_path]) + dirs = [] + for url in stdout.splitlines(): + if len(url) == 0: + continue + # The only way to identify directories is by filtering for trailing slash. + # See https://github.com/GoogleCloudPlatform/gsutil/issues/466 + if url[-1] == '/': + dirs.append(url[len(bucket_prefix):]) + return dirs + def Exists(bucket, remote_path): try: _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) @@ -362,7 +393,8 @@ def _GetLocked(bucket, remote_path, local_path): def Insert(bucket, remote_path, local_path, publicly_readable=False): - """ Upload file in |local_path| to cloud storage. + """Upload file in |local_path| to cloud storage. + Args: bucket: the google cloud storage bucket name. remote_path: the remote file path in |bucket|. @@ -373,6 +405,43 @@ def Insert(bucket, remote_path, local_path, publicly_readable=False): Returns: The url where the file is uploaded to. """ + cloud_filepath = Upload(bucket, remote_path, local_path, publicly_readable) + return cloud_filepath.view_url + + +class CloudFilepath(object): + def __init__(self, bucket, remote_path): + self.bucket = bucket + self.remote_path = remote_path + + @property + def view_url(self): + """Get a human viewable url for the cloud file.""" + return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( + self.bucket, self.remote_path) + + @property + def fetch_url(self): + """Get a machine fetchable url for the cloud file.""" + return 'gs://%s/%s' % (self.bucket, self.remote_path) + + +def Upload(bucket, remote_path, local_path, publicly_readable=False): + """Upload file in |local_path| to cloud storage. + + Newer version of 'Insert()' returns an object instead of a string. + + Args: + bucket: the google cloud storage bucket name. + remote_path: the remote file path in |bucket|. + local_path: path of the local file to be uploaded. + publicly_readable: whether the uploaded file has publicly readable + permission. + + Returns: + A CloudFilepath object providing the location of the object in various + formats. + """ url = 'gs://%s/%s' % (bucket, remote_path) command_and_args = ['cp'] extra_info = '' @@ -382,8 +451,7 @@ def Insert(bucket, remote_path, local_path, publicly_readable=False): command_and_args += [local_path, url] logger.info('Uploading %s to %s%s', local_path, url, extra_info) _RunCommand(command_and_args) - return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( - bucket, remote_path) + return CloudFilepath(bucket, remote_path) def GetIfHashChanged(cs_path, download_path, bucket, file_hash): |