Optimize content length for large files

This commit is contained in:
Bert Blommers 2020-03-17 09:16:12 +00:00
parent 3802767817
commit d8423b5de0
3 changed files with 9 additions and 26 deletions

View File

@ -120,11 +120,9 @@ class FakeKey(BaseModel):
@property @property
def value(self): def value(self):
self.lock.acquire() self.lock.acquire()
print("===>value")
self._value_buffer.seek(0) self._value_buffer.seek(0)
print("===>seek")
r = self._value_buffer.read() r = self._value_buffer.read()
print("===>read") r = copy.copy(r)
self.lock.release() self.lock.release()
return r return r
@ -138,6 +136,7 @@ class FakeKey(BaseModel):
if isinstance(new_value, six.text_type): if isinstance(new_value, six.text_type):
new_value = new_value.encode(DEFAULT_TEXT_ENCODING) new_value = new_value.encode(DEFAULT_TEXT_ENCODING)
self._value_buffer.write(new_value) self._value_buffer.write(new_value)
self.contentsize = len(new_value)
def copy(self, new_name=None, new_is_versioned=None): def copy(self, new_name=None, new_is_versioned=None):
r = copy.deepcopy(self) r = copy.deepcopy(self)
@ -165,6 +164,7 @@ class FakeKey(BaseModel):
self.acl = acl self.acl = acl
def append_to_value(self, value): def append_to_value(self, value):
self.contentsize += len(value)
self._value_buffer.seek(0, os.SEEK_END) self._value_buffer.seek(0, os.SEEK_END)
self._value_buffer.write(value) self._value_buffer.write(value)
@ -237,8 +237,7 @@ class FakeKey(BaseModel):
@property @property
def size(self): def size(self):
self._value_buffer.seek(0, os.SEEK_END) return self.contentsize
return self._value_buffer.tell()
@property @property
def storage_class(self): def storage_class(self):
@ -257,6 +256,7 @@ class FakeKey(BaseModel):
state = self.__dict__.copy() state = self.__dict__.copy()
state["value"] = self.value state["value"] = self.value
del state["_value_buffer"] del state["_value_buffer"]
del state["lock"]
return state return state
def __setstate__(self, state): def __setstate__(self, state):
@ -266,6 +266,7 @@ class FakeKey(BaseModel):
max_size=self._max_buffer_size max_size=self._max_buffer_size
) )
self.value = state["value"] self.value = state["value"]
self.lock = threading.Lock()
class FakeMultipart(BaseModel): class FakeMultipart(BaseModel):
@ -292,7 +293,7 @@ class FakeMultipart(BaseModel):
etag = etag.replace('"', "") etag = etag.replace('"', "")
if part is None or part_etag != etag: if part is None or part_etag != etag:
raise InvalidPart() raise InvalidPart()
if last is not None and len(last.value) < UPLOAD_PART_MIN_SIZE: if last is not None and last.contentsize < UPLOAD_PART_MIN_SIZE:
raise EntityTooSmall() raise EntityTooSmall()
md5s.extend(decode_hex(part_etag)[0]) md5s.extend(decode_hex(part_etag)[0])
total.extend(part.value) total.extend(part.value)
@ -1327,7 +1328,6 @@ class S3Backend(BaseBackend):
return key return key
def get_key(self, bucket_name, key_name, version_id=None, part_number=None): def get_key(self, bucket_name, key_name, version_id=None, part_number=None):
print("get_key("+str(bucket_name)+","+str(key_name)+","+str(version_id)+","+str(part_number)+")")
key_name = clean_key_name(key_name) key_name = clean_key_name(key_name)
bucket = self.get_bucket(bucket_name) bucket = self.get_bucket(bucket_name)
key = None key = None

View File

@ -859,7 +859,6 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin):
def _handle_range_header(self, request, headers, response_content): def _handle_range_header(self, request, headers, response_content):
response_headers = {} response_headers = {}
length = len(response_content) length = len(response_content)
print("Length: " + str(length) + " Range: " + str(request.headers.get("range")))
last = length - 1 last = length - 1
_, rspec = request.headers.get("range").split("=") _, rspec = request.headers.get("range").split("=")
if "," in rspec: if "," in rspec:
@ -877,7 +876,6 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin):
else: else:
return 400, response_headers, "" return 400, response_headers, ""
if begin < 0 or end > last or begin > min(end, last): if begin < 0 or end > last or begin > min(end, last):
print(str(begin)+ " < 0 or " + str(end) + " > " + str(last) + " or " + str(begin) + " > min("+str(end)+","+str(last)+")")
return 416, response_headers, "" return 416, response_headers, ""
response_headers["content-range"] = "bytes {0}-{1}/{2}".format( response_headers["content-range"] = "bytes {0}-{1}/{2}".format(
begin, end, length begin, end, length
@ -907,8 +905,6 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin):
response_content = response response_content = response
else: else:
status_code, response_headers, response_content = response status_code, response_headers, response_content = response
print("response received: " + str(len(response_content)))
print(request.headers)
if status_code == 200 and "range" in request.headers: if status_code == 200 and "range" in request.headers:
self.lock.acquire() self.lock.acquire()
@ -920,7 +916,6 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin):
return status_code, response_headers, response_content return status_code, response_headers, response_content
def _control_response(self, request, full_url, headers): def _control_response(self, request, full_url, headers):
print("_control_response")
parsed_url = urlparse(full_url) parsed_url = urlparse(full_url)
query = parse_qs(parsed_url.query, keep_blank_values=True) query = parse_qs(parsed_url.query, keep_blank_values=True)
method = request.method method = request.method
@ -1068,14 +1063,12 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin):
) )
def _key_response_get(self, bucket_name, query, key_name, headers): def _key_response_get(self, bucket_name, query, key_name, headers):
print("_key_response_get("+str(key_name)+","+str(headers)+")")
self._set_action("KEY", "GET", query) self._set_action("KEY", "GET", query)
self._authenticate_and_authorize_s3_action() self._authenticate_and_authorize_s3_action()
response_headers = {} response_headers = {}
if query.get("uploadId"): if query.get("uploadId"):
upload_id = query["uploadId"][0] upload_id = query["uploadId"][0]
print("UploadID: " + str(upload_id))
parts = self.backend.list_multipart(bucket_name, upload_id) parts = self.backend.list_multipart(bucket_name, upload_id)
template = self.response_template(S3_MULTIPART_LIST_RESPONSE) template = self.response_template(S3_MULTIPART_LIST_RESPONSE)
return ( return (
@ -1107,7 +1100,6 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin):
response_headers.update(key.metadata) response_headers.update(key.metadata)
response_headers.update(key.response_dict) response_headers.update(key.response_dict)
print("returning 200, " + str(headers) + ", " + str(len(key.value)) + " ( " + str(key_name) + ")")
return 200, response_headers, key.value return 200, response_headers, key.value
def _key_response_put(self, request, body, bucket_name, query, key_name, headers): def _key_response_put(self, request, body, bucket_name, query, key_name, headers):

View File

@ -4398,24 +4398,17 @@ def test_s3_config_dict():
@mock_s3 @mock_s3
def test_delete_downloaded_file(): def test_delete_downloaded_file():
# SET UP # SET UP
filename = '...' filename = 'some_large_file.pdf'
file = open(filename, 'rb') file = open(filename, 'rb')
uploader = PdfFileUploader(file) uploader = PdfFileUploader(file)
boto3.client('s3').create_bucket(Bucket=uploader.bucket_name()) boto3.client('s3').create_bucket(Bucket=uploader.bucket_name())
uploader.upload() uploader.upload()
print("================\nUPLOADED\n=================")
# DOWNLOAD
# the following two lines are basically
# boto3.client('s3').download_file(bucket_name, file_name, local_path)
# where bucket_name, file_name and local_path are retrieved from PdfFileUploader
# e.g. boto3.client('s3').download_file("bucket_name", "asdf.pdf", "/tmp/asdf.pdf")
downloader = PdfFileDownloader(uploader.full_bucket_file_name()) downloader = PdfFileDownloader(uploader.full_bucket_file_name())
downloader.download() downloader.download()
downloader.delete_downloaded_file() downloader.delete_downloaded_file()
print("Done!")
from pathlib import Path from pathlib import Path
import re import re
@ -4431,8 +4424,6 @@ class PdfFileDownloader:
return self.local_path() return self.local_path()
except ClientError as exc: except ClientError as exc:
print("=======")
print(exc)
raise exc raise exc
def local_path(self): def local_path(self):