2014-08-27 11:17:06 -04:00
|
|
|
from __future__ import unicode_literals
|
2018-01-30 16:10:43 -08:00
|
|
|
import logging
|
2018-03-21 16:33:09 +00:00
|
|
|
import os
|
2014-12-07 12:43:14 -05:00
|
|
|
|
|
|
|
from boto.s3.key import Key
|
2013-02-18 21:22:03 -05:00
|
|
|
import re
|
2014-12-07 12:43:14 -05:00
|
|
|
import six
|
2019-09-24 17:07:58 -05:00
|
|
|
from six.moves.urllib.parse import urlparse, unquote, quote
|
2014-12-07 12:43:14 -05:00
|
|
|
import sys
|
2013-02-18 21:22:03 -05:00
|
|
|
|
2018-01-30 16:10:43 -08:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2014-12-10 20:20:43 -05:00
|
|
|
bucket_name_regex = re.compile("(.+).s3(.*).amazonaws.com")
|
2013-02-18 21:22:03 -05:00
|
|
|
|
|
|
|
|
2013-05-03 19:33:13 -04:00
|
|
|
def bucket_name_from_url(url):
|
2019-10-31 08:44:26 -07:00
|
|
|
if os.environ.get("S3_IGNORE_SUBDOMAIN_BUCKETNAME", "") in ["1", "true"]:
|
2018-03-21 16:33:09 +00:00
|
|
|
return None
|
2014-08-26 13:25:50 -04:00
|
|
|
domain = urlparse(url).netloc
|
2013-05-03 19:33:13 -04:00
|
|
|
|
2019-10-31 08:44:26 -07:00
|
|
|
if domain.startswith("www."):
|
2013-09-24 00:00:52 +02:00
|
|
|
domain = domain[4:]
|
2013-05-03 19:33:13 -04:00
|
|
|
|
2019-10-31 08:44:26 -07:00
|
|
|
if "amazonaws.com" in domain:
|
2013-05-03 19:33:13 -04:00
|
|
|
bucket_result = bucket_name_regex.search(domain)
|
2013-03-05 08:14:43 -05:00
|
|
|
if bucket_result:
|
|
|
|
return bucket_result.groups()[0]
|
|
|
|
else:
|
2019-10-31 08:44:26 -07:00
|
|
|
if "." in domain:
|
2013-05-03 19:33:13 -04:00
|
|
|
return domain.split(".")[0]
|
2013-03-05 08:14:43 -05:00
|
|
|
else:
|
|
|
|
# No subdomain found.
|
|
|
|
return None
|
2013-04-13 19:00:37 -04:00
|
|
|
|
|
|
|
|
2020-03-18 13:02:07 +00:00
|
|
|
# 'owi-common-cf', 'snippets/test.json' = bucket_and_name_from_url('s3://owi-common-cf/snippets/test.json')
|
|
|
|
def bucket_and_name_from_url(url):
|
|
|
|
prefix = "s3://"
|
|
|
|
if url.startswith(prefix):
|
|
|
|
bucket_name = url[len(prefix) : url.index("/", len(prefix))]
|
|
|
|
key = url[url.index("/", len(prefix)) + 1 :]
|
|
|
|
return bucket_name, key
|
|
|
|
else:
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
2018-01-30 16:10:43 -08:00
|
|
|
REGION_URL_REGEX = re.compile(
|
2019-10-31 08:44:26 -07:00
|
|
|
r"^https?://(s3[-\.](?P<region1>.+)\.amazonaws\.com/(.+)|"
|
2020-02-02 10:36:51 +00:00
|
|
|
r"(.+)\.s3[-\.](?P<region2>.+)\.amazonaws\.com)/?"
|
2019-10-31 08:44:26 -07:00
|
|
|
)
|
2018-01-30 16:10:43 -08:00
|
|
|
|
|
|
|
|
|
|
|
def parse_region_from_url(url):
|
|
|
|
match = REGION_URL_REGEX.search(url)
|
|
|
|
if match:
|
2019-10-31 08:44:26 -07:00
|
|
|
region = match.group("region1") or match.group("region2")
|
2018-01-30 16:10:43 -08:00
|
|
|
else:
|
2019-10-31 08:44:26 -07:00
|
|
|
region = "us-east-1"
|
2018-01-30 16:10:43 -08:00
|
|
|
return region
|
|
|
|
|
|
|
|
|
2014-12-07 12:43:14 -05:00
|
|
|
def metadata_from_headers(headers):
|
|
|
|
metadata = {}
|
2020-03-12 09:34:25 -07:00
|
|
|
meta_regex = re.compile(r"^x-amz-meta-([a-zA-Z0-9\-_]+)$", flags=re.IGNORECASE)
|
2014-12-07 12:43:14 -05:00
|
|
|
for header, value in headers.items():
|
|
|
|
if isinstance(header, six.string_types):
|
|
|
|
result = meta_regex.match(header)
|
|
|
|
meta_key = None
|
|
|
|
if result:
|
|
|
|
# Check for extra metadata
|
|
|
|
meta_key = result.group(0).lower()
|
|
|
|
elif header.lower() in Key.base_user_settable_fields:
|
|
|
|
# Check for special metadata that doesn't start with x-amz-meta
|
|
|
|
meta_key = header
|
|
|
|
if meta_key:
|
|
|
|
metadata[meta_key] = headers[header]
|
|
|
|
return metadata
|
|
|
|
|
|
|
|
|
2019-09-24 17:07:58 -05:00
|
|
|
def clean_key_name(key_name):
|
2016-09-04 04:31:03 +06:00
|
|
|
if six.PY2:
|
2019-10-31 08:44:26 -07:00
|
|
|
return unquote(key_name.encode("utf-8")).decode("utf-8")
|
2019-09-24 17:07:58 -05:00
|
|
|
return unquote(key_name)
|
|
|
|
|
|
|
|
|
|
|
|
def undo_clean_key_name(key_name):
|
|
|
|
if six.PY2:
|
2019-10-31 08:44:26 -07:00
|
|
|
return quote(key_name.encode("utf-8")).decode("utf-8")
|
2019-09-24 17:07:58 -05:00
|
|
|
return quote(key_name)
|
2014-06-27 15:37:51 -06:00
|
|
|
|
|
|
|
|
|
|
|
class _VersionedKeyStore(dict):
|
|
|
|
|
|
|
|
""" A simplified/modified version of Django's `MultiValueDict` taken from:
|
|
|
|
https://github.com/django/django/blob/70576740b0bb5289873f5a9a9a4e1a26b2c330e5/django/utils/datastructures.py#L282
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __sgetitem__(self, key):
|
|
|
|
return super(_VersionedKeyStore, self).__getitem__(key)
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
return self.__sgetitem__(key)[-1]
|
|
|
|
|
|
|
|
def __setitem__(self, key, value):
|
|
|
|
try:
|
|
|
|
current = self.__sgetitem__(key)
|
|
|
|
current.append(value)
|
|
|
|
except (KeyError, IndexError):
|
|
|
|
current = [value]
|
|
|
|
|
|
|
|
super(_VersionedKeyStore, self).__setitem__(key, current)
|
|
|
|
|
|
|
|
def get(self, key, default=None):
|
|
|
|
try:
|
|
|
|
return self[key]
|
|
|
|
except (KeyError, IndexError):
|
|
|
|
pass
|
|
|
|
return default
|
|
|
|
|
|
|
|
def getlist(self, key, default=None):
|
|
|
|
try:
|
|
|
|
return self.__sgetitem__(key)
|
|
|
|
except (KeyError, IndexError):
|
|
|
|
pass
|
|
|
|
return default
|
|
|
|
|
|
|
|
def setlist(self, key, list_):
|
|
|
|
if isinstance(list_, tuple):
|
|
|
|
list_ = list(list_)
|
|
|
|
elif not isinstance(list_, list):
|
|
|
|
list_ = [list_]
|
|
|
|
|
|
|
|
super(_VersionedKeyStore, self).__setitem__(key, list_)
|
|
|
|
|
|
|
|
def _iteritems(self):
|
|
|
|
for key in self:
|
|
|
|
yield key, self[key]
|
|
|
|
|
|
|
|
def _itervalues(self):
|
|
|
|
for key in self:
|
|
|
|
yield self[key]
|
|
|
|
|
|
|
|
def _iterlists(self):
|
|
|
|
for key in self:
|
|
|
|
yield key, self.getlist(key)
|
|
|
|
|
|
|
|
items = iteritems = _iteritems
|
|
|
|
lists = iterlists = _iterlists
|
|
|
|
values = itervalues = _itervalues
|
|
|
|
|
|
|
|
if sys.version_info[0] < 3:
|
2019-10-31 08:44:26 -07:00
|
|
|
|
2014-06-27 15:37:51 -06:00
|
|
|
def items(self):
|
|
|
|
return list(self.iteritems())
|
|
|
|
|
|
|
|
def values(self):
|
|
|
|
return list(self.itervalues())
|
|
|
|
|
|
|
|
def lists(self):
|
|
|
|
return list(self.iterlists())
|