Since this is a regular URL, you can use urlparse
to get all parts of the URL.
>>> from urlparse import urlparse >>> o = urlparse('s3://bucket_name/folder1/folder2/file1.json', allow_fragments=False) >>> o ParseResult(scheme='s3', netloc='bucket_name', path='/folder1/folder2/file1.json', params='', query='', fragment='') >>> o.netloc 'bucket_name' >>> o.path '/folder1/folder2/file1.json'
You may need to remove the leading slash from the key, as the following answer suggests.
o.path.lstrip('/')
In Python 3, urlparse
moved to urllib.parse
so use:
from urllib.parse import urlparse
Here is a class that takes care of all the details.
try: from urlparse import urlparse except ImportError: from urllib.parse import urlparse class S3Url(object): """ >>> s = S3Url("s3://bucket/hello/world") >>> s.bucket 'bucket' >>> s.key 'hello/world' >>> s.url 's3://bucket/hello/world' >>> s = S3Url("s3://bucket/hello/world?qwe1=3#ddd") >>> s.bucket 'bucket' >>> s.key 'hello/world?qwe1=3#ddd' >>> s.url 's3://bucket/hello/world?qwe1=3#ddd' >>> s = S3Url("s3://bucket/hello/world#foo?bar=2") >>> s.key 'hello/world#foo?bar=2' >>> s.url 's3://bucket/hello/world#foo?bar=2' """ def __init__(self, url): self._parsed = urlparse(url, allow_fragments=False) @property def bucket(self): return self._parsed.netloc @property def key(self): if self._parsed.query: return self._parsed.path.lstrip('/') + '?' + self._parsed.query else: return self._parsed.path.lstrip('/') @property def url(self): return self._parsed.geturl()
source share