Textract: Addressed comments
This commit is contained in:
parent
64c16c715d
commit
7dc5adb5d5
45
docs/docs/services/textract.rst
Normal file
45
docs/docs/services/textract.rst
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
.. _implementedservice_textract:
|
||||||
|
|
||||||
|
.. |start-h3| raw:: html
|
||||||
|
|
||||||
|
<h3>
|
||||||
|
|
||||||
|
.. |end-h3| raw:: html
|
||||||
|
|
||||||
|
</h3>
|
||||||
|
|
||||||
|
==========
|
||||||
|
textract
|
||||||
|
==========
|
||||||
|
|
||||||
|
.. autoclass:: moto.textract.models.TextractBackend
|
||||||
|
|
||||||
|
|start-h3| Example usage |end-h3|
|
||||||
|
|
||||||
|
.. sourcecode:: python
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_textract_behaviour:
|
||||||
|
boto3.client("textract")
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|start-h3| Implemented features for this service |end-h3|
|
||||||
|
|
||||||
|
- [ ] analyze_document
|
||||||
|
- [ ] analyze_expense
|
||||||
|
- [ ] analyze_id
|
||||||
|
- [ ] can_paginate
|
||||||
|
- [ ] detect_document_text
|
||||||
|
- [ ] get_document_analysis
|
||||||
|
- [X] get_document_text_detection
|
||||||
|
- [ ] get_expense_analysis
|
||||||
|
- [ ] get_paginator
|
||||||
|
- [ ] get_waiter
|
||||||
|
- [ ] start_document_analysis
|
||||||
|
- [X] start_document_text_detection
|
||||||
|
- [ ] start_expense_analysis
|
||||||
|
|
||||||
|
Pagination has not yet been implemented
|
||||||
|
|
@ -2,4 +2,4 @@
|
|||||||
from .models import textract_backends
|
from .models import textract_backends
|
||||||
from ..core.models import base_decorator
|
from ..core.models import base_decorator
|
||||||
|
|
||||||
mock_textract = base_decorator(textract_backends)
|
mock_textract = base_decorator(textract_backends)
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
"""Exceptions raised by the textract service."""
|
"""Exceptions raised by the textract service."""
|
||||||
from moto.core.exceptions import JsonRESTError
|
from moto.core.exceptions import JsonRESTError
|
||||||
|
|
||||||
|
|
||||||
class InvalidJobIdException(JsonRESTError):
|
class InvalidJobIdException(JsonRESTError):
|
||||||
code = 400
|
code = 400
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
__class__.__name__,
|
__class__.__name__, "An invalid job identifier was passed.",
|
||||||
"An invalid job identifier was passed.",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InvalidS3ObjectException(JsonRESTError):
|
class InvalidS3ObjectException(JsonRESTError):
|
||||||
code = 400
|
code = 400
|
||||||
|
|
||||||
@ -19,6 +20,7 @@ class InvalidS3ObjectException(JsonRESTError):
|
|||||||
"Amazon Textract is unable to access the S3 object that's specified in the request.",
|
"Amazon Textract is unable to access the S3 object that's specified in the request.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InvalidParameterException(JsonRESTError):
|
class InvalidParameterException(JsonRESTError):
|
||||||
code = 400
|
code = 400
|
||||||
|
|
||||||
@ -27,4 +29,3 @@ class InvalidParameterException(JsonRESTError):
|
|||||||
__class__.__name__,
|
__class__.__name__,
|
||||||
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again.",
|
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -9,18 +9,25 @@ from moto.core.utils import BackendDict
|
|||||||
|
|
||||||
from .exceptions import InvalidParameterException, InvalidJobIdException
|
from .exceptions import InvalidParameterException, InvalidJobIdException
|
||||||
|
|
||||||
|
|
||||||
class TextractJobStatus:
|
class TextractJobStatus:
|
||||||
in_progress = "IN_PROGRESS"
|
in_progress = "IN_PROGRESS"
|
||||||
succeeded = "SUCCEEDED"
|
succeeded = "SUCCEEDED"
|
||||||
failed = "FAILED"
|
failed = "FAILED"
|
||||||
partial_success = "PARTIAL_SUCCESS"
|
partial_success = "PARTIAL_SUCCESS"
|
||||||
|
|
||||||
|
|
||||||
class TextractJob(BaseModel):
|
class TextractJob(BaseModel):
|
||||||
def __init__(self, job):
|
def __init__(self, job):
|
||||||
self.job = job
|
self.job = job
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return self.job
|
||||||
|
|
||||||
|
|
||||||
class TextractBackend(BaseBackend):
|
class TextractBackend(BaseBackend):
|
||||||
"""Implementation of Textract APIs."""
|
"""Implementation of Textract APIs."""
|
||||||
|
|
||||||
JOB_STATUS = TextractJobStatus.succeeded
|
JOB_STATUS = TextractJobStatus.succeeded
|
||||||
PAGES = {"Pages": randint(5, 500)}
|
PAGES = {"Pages": randint(5, 500)}
|
||||||
BLOCKS = []
|
BLOCKS = []
|
||||||
@ -54,12 +61,14 @@ class TextractBackend(BaseBackend):
|
|||||||
if not document_location:
|
if not document_location:
|
||||||
raise InvalidParameterException()
|
raise InvalidParameterException()
|
||||||
job_id = str(uuid.uuid4())
|
job_id = str(uuid.uuid4())
|
||||||
self.async_text_detection_jobs[job_id] = TextractJob({
|
self.async_text_detection_jobs[job_id] = TextractJob(
|
||||||
"Blocks": TextractBackend.BLOCKS,
|
{
|
||||||
"DetectDocumentTextModelVersion": "1.0",
|
"Blocks": TextractBackend.BLOCKS,
|
||||||
"DocumentMetadata": {"Pages": TextractBackend.PAGES},
|
"DetectDocumentTextModelVersion": "1.0",
|
||||||
"JobStatus": TextractBackend.JOB_STATUS,
|
"DocumentMetadata": {"Pages": TextractBackend.PAGES},
|
||||||
})
|
"JobStatus": TextractBackend.JOB_STATUS,
|
||||||
|
}
|
||||||
|
)
|
||||||
return job_id
|
return job_id
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,18 +14,15 @@ class TextractResponse(BaseResponse):
|
|||||||
return textract_backends[self.region]
|
return textract_backends[self.region]
|
||||||
|
|
||||||
def get_document_text_detection(self):
|
def get_document_text_detection(self):
|
||||||
params = self._get_params()
|
params = json.loads(self.body)
|
||||||
job_id = params.get("JobId")
|
job_id = params.get("JobId")
|
||||||
max_results = params.get("MaxResults")
|
max_results = params.get("MaxResults")
|
||||||
next_token = params.get("NextToken")
|
next_token = params.get("NextToken")
|
||||||
job = self.textract_backend.get_document_text_detection(
|
job = self.textract_backend.get_document_text_detection(
|
||||||
job_id=job_id,
|
job_id=job_id, max_results=max_results, next_token=next_token,
|
||||||
max_results=max_results,
|
).to_dict()
|
||||||
next_token=next_token,
|
|
||||||
)
|
|
||||||
return json.dumps(job)
|
return json.dumps(job)
|
||||||
|
|
||||||
|
|
||||||
def start_document_text_detection(self):
|
def start_document_text_detection(self):
|
||||||
params = json.loads(self.body)
|
params = json.loads(self.body)
|
||||||
document_location = params.get("DocumentLocation")
|
document_location = params.get("DocumentLocation")
|
||||||
|
@ -5,8 +5,6 @@ url_bases = [
|
|||||||
r"https?://textract\.(.+)\.amazonaws\.com",
|
r"https?://textract\.(.+)\.amazonaws\.com",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
url_paths = {
|
url_paths = {
|
||||||
"{0}/$": TextractResponse.dispatch,
|
"{0}/$": TextractResponse.dispatch,
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,81 @@
|
|||||||
"""Test different server responses."""
|
"""Test different server responses."""
|
||||||
import sure # noqa # pylint: disable=unused-import
|
import sure # noqa # pylint: disable=unused-import
|
||||||
|
|
||||||
|
import json
|
||||||
import moto.server as server
|
import moto.server as server
|
||||||
|
from moto import mock_textract
|
||||||
|
|
||||||
|
|
||||||
def test_textract_list():
|
@mock_textract
|
||||||
|
def test_textract_start_text_detection():
|
||||||
backend = server.create_backend_app("textract")
|
backend = server.create_backend_app("textract")
|
||||||
test_client = backend.test_client()
|
test_client = backend.test_client()
|
||||||
|
|
||||||
resp = test_client.get("/")
|
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
|
||||||
|
request_body = {
|
||||||
|
"DocumentLocation": {
|
||||||
|
"S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = test_client.post("/", headers=headers, json=request_body)
|
||||||
|
data = json.loads(resp.data.decode("utf-8"))
|
||||||
resp.status_code.should.equal(200)
|
resp.status_code.should.equal(200)
|
||||||
str(resp.data).should.contain("?")
|
data["JobId"].should.be.an(str)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_textract_start_text_detection_without_document_location():
|
||||||
|
backend = server.create_backend_app("textract")
|
||||||
|
test_client = backend.test_client()
|
||||||
|
|
||||||
|
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
|
||||||
|
resp = test_client.post("/", headers=headers, json={})
|
||||||
|
data = json.loads(resp.data.decode("utf-8"))
|
||||||
|
resp.status_code.should.equal(400)
|
||||||
|
data["__type"].should.equal("InvalidParameterException")
|
||||||
|
data["message"].should.equal(
|
||||||
|
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_textract_get_text_detection():
|
||||||
|
backend = server.create_backend_app("textract")
|
||||||
|
test_client = backend.test_client()
|
||||||
|
|
||||||
|
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
|
||||||
|
request_body = {
|
||||||
|
"DocumentLocation": {
|
||||||
|
"S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = test_client.post("/", headers=headers, json=request_body)
|
||||||
|
start_job_data = json.loads(resp.data.decode("utf-8"))
|
||||||
|
|
||||||
|
headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
|
||||||
|
request_body = {
|
||||||
|
"JobId": start_job_data["JobId"],
|
||||||
|
}
|
||||||
|
resp = test_client.post("/", headers=headers, json=request_body)
|
||||||
|
resp.status_code.should.equal(200)
|
||||||
|
data = json.loads(resp.data.decode("utf-8"))
|
||||||
|
data["JobStatus"].should.equal("SUCCEEDED")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_textract_get_text_detection_without_job_id():
|
||||||
|
backend = server.create_backend_app("textract")
|
||||||
|
test_client = backend.test_client()
|
||||||
|
|
||||||
|
headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
|
||||||
|
request_body = {
|
||||||
|
"JobId": "invalid_job_id",
|
||||||
|
}
|
||||||
|
resp = test_client.post("/", headers=headers, json=request_body)
|
||||||
|
resp.status_code.should.equal(400)
|
||||||
|
data = json.loads(resp.data.decode("utf-8"))
|
||||||
|
print(data)
|
||||||
|
data["__type"].should.equal("InvalidJobIdException")
|
||||||
|
data["message"].should.equal("An invalid job identifier was passed.")
|
||||||
|
@ -1,31 +1,81 @@
|
|||||||
"""Unit tests for textract-supported APIs."""
|
"""Unit tests for textract-supported APIs."""
|
||||||
|
from random import randint
|
||||||
|
from botocore.exceptions import ClientError, ParamValidationError
|
||||||
|
import pytest
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
|
from moto.textract.models import TextractBackend
|
||||||
from moto import mock_textract
|
from moto import mock_textract
|
||||||
|
|
||||||
# See our Development Tips on writing tests for hints on how to write good tests:
|
# See our Development Tips on writing tests for hints on how to write good tests:
|
||||||
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
||||||
|
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_get_document_text_detection():
|
||||||
|
TextractBackend.JOB_STATUS = "SUCCEEDED"
|
||||||
|
TextractBackend.PAGES = randint(5, 500)
|
||||||
|
TextractBackend.BLOCKS = [
|
||||||
|
{
|
||||||
|
"Text": "This is a test",
|
||||||
|
"Id": "0",
|
||||||
|
"Confidence": "100",
|
||||||
|
"Geometry": {
|
||||||
|
"BoundingBox": {
|
||||||
|
"Width": "0.5",
|
||||||
|
"Height": "0.5",
|
||||||
|
"Left": "0.5",
|
||||||
|
"Top": "0.5",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
# @mock_textract
|
client = boto3.client("textract", region_name="us-east-1")
|
||||||
# def test_get_document_text_detection():
|
job = client.start_document_text_detection(
|
||||||
# client = boto3.client("textract", region_name="us-east-1")
|
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name"}},
|
||||||
# resp = client.get_document_text_detection()
|
)
|
||||||
|
|
||||||
# raise Exception("NotYetImplemented")
|
resp = client.get_document_text_detection(JobId=job["JobId"])
|
||||||
|
|
||||||
|
resp["Blocks"][0]["Text"].should.equal("This is a test")
|
||||||
|
resp["Blocks"][0]["Id"].should.equal("0")
|
||||||
|
resp["Blocks"][0]["Confidence"].should.equal("100")
|
||||||
|
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Width"].should.equal("0.5")
|
||||||
|
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Height"].should.equal("0.5")
|
||||||
|
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Left"].should.equal("0.5")
|
||||||
|
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Top"].should.equal("0.5")
|
||||||
|
resp["JobStatus"].should.equal("SUCCEEDED")
|
||||||
|
resp["DocumentMetadata"]["Pages"].should.equal(TextractBackend.PAGES)
|
||||||
|
|
||||||
|
|
||||||
@mock_textract
|
@mock_textract
|
||||||
def test_start_document_text_detection():
|
def test_start_document_text_detection():
|
||||||
client = boto3.client("textract", region_name="us-east-1")
|
client = boto3.client("textract", region_name="us-east-1")
|
||||||
resp = client.start_document_text_detection(
|
resp = client.start_document_text_detection(
|
||||||
DocumentLocation={
|
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name",}},
|
||||||
'S3Object': {
|
|
||||||
'Bucket': 'bucket',
|
|
||||||
'Name': 'name',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
resp.should.have.key("JobId")
|
resp.should.have.key("JobId")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_get_document_text_detection_without_job_id():
|
||||||
|
client = boto3.client("textract", region_name="us-east-1")
|
||||||
|
with pytest.raises(ClientError) as e:
|
||||||
|
client.get_document_text_detection(JobId="Invalid Job Id")
|
||||||
|
|
||||||
|
e.value.response["Error"]["Code"].should.equal("InvalidJobIdException")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_textract
|
||||||
|
def test_get_document_text_detection_without_document_location():
|
||||||
|
client = boto3.client("textract", region_name="us-east-1")
|
||||||
|
with pytest.raises(ParamValidationError) as e:
|
||||||
|
client.start_document_text_detection()
|
||||||
|
|
||||||
|
assert e.typename == "ParamValidationError"
|
||||||
|
assert (
|
||||||
|
'Parameter validation failed:\nMissing required parameter in input: "DocumentLocation"'
|
||||||
|
in e.value.args
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user