Merge pull request #4869 from ecumene/master
This commit is contained in:
commit
412153aeb7
45
docs/docs/services/textract.rst
Normal file
45
docs/docs/services/textract.rst
Normal file
@ -0,0 +1,45 @@
|
||||
.. _implementedservice_textract:
|
||||
|
||||
.. |start-h3| raw:: html
|
||||
|
||||
<h3>
|
||||
|
||||
.. |end-h3| raw:: html
|
||||
|
||||
</h3>
|
||||
|
||||
==========
|
||||
textract
|
||||
==========
|
||||
|
||||
.. autoclass:: moto.textract.models.TextractBackend
|
||||
|
||||
|start-h3| Example usage |end-h3|
|
||||
|
||||
.. sourcecode:: python
|
||||
|
||||
@mock_textract
|
||||
def test_textract_behaviour:
|
||||
boto3.client("textract")
|
||||
...
|
||||
|
||||
|
||||
|
||||
|start-h3| Implemented features for this service |end-h3|
|
||||
|
||||
- [ ] analyze_document
|
||||
- [ ] analyze_expense
|
||||
- [ ] analyze_id
|
||||
- [ ] can_paginate
|
||||
- [ ] detect_document_text
|
||||
- [ ] get_document_analysis
|
||||
- [X] get_document_text_detection
|
||||
- [ ] get_expense_analysis
|
||||
- [ ] get_paginator
|
||||
- [ ] get_waiter
|
||||
- [ ] start_document_analysis
|
||||
- [X] start_document_text_detection
|
||||
- [ ] start_expense_analysis
|
||||
|
||||
Pagination has not yet been implemented
|
||||
|
@ -149,6 +149,7 @@ XRaySegment = lazy_load(".xray", "XRaySegment")
|
||||
mock_xray = lazy_load(".xray", "mock_xray")
|
||||
mock_xray_client = lazy_load(".xray", "mock_xray_client")
|
||||
mock_wafv2 = lazy_load(".wafv2", "mock_wafv2")
|
||||
mock_textract = lazy_load(".textract", "mock_textract")
|
||||
|
||||
|
||||
class MockAll(ContextDecorator):
|
||||
|
@ -139,6 +139,7 @@ backend_url_patterns = [
|
||||
("sts", re.compile("https?://sts\\.(.*\\.)?amazonaws\\.com")),
|
||||
("support", re.compile("https?://support\\.(.+)\\.amazonaws\\.com")),
|
||||
("swf", re.compile("https?://swf\\.(.+)\\.amazonaws\\.com")),
|
||||
("textract", re.compile("https?://textract\\.(.+)\\.amazonaws\\.com")),
|
||||
(
|
||||
"timestream-write",
|
||||
re.compile("https?://ingest\\.timestream\\.(.+)\\.amazonaws\\.com"),
|
||||
|
5
moto/textract/__init__.py
Normal file
5
moto/textract/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""textract module initialization; sets value for base decorator."""
|
||||
from .models import textract_backends
|
||||
from ..core.models import base_decorator
|
||||
|
||||
mock_textract = base_decorator(textract_backends)
|
31
moto/textract/exceptions.py
Normal file
31
moto/textract/exceptions.py
Normal file
@ -0,0 +1,31 @@
|
||||
"""Exceptions raised by the textract service."""
|
||||
from moto.core.exceptions import JsonRESTError
|
||||
|
||||
|
||||
class InvalidJobIdException(JsonRESTError):
|
||||
code = 400
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
__class__.__name__, "An invalid job identifier was passed.",
|
||||
)
|
||||
|
||||
|
||||
class InvalidS3ObjectException(JsonRESTError):
|
||||
code = 400
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
__class__.__name__,
|
||||
"Amazon Textract is unable to access the S3 object that's specified in the request.",
|
||||
)
|
||||
|
||||
|
||||
class InvalidParameterException(JsonRESTError):
|
||||
code = 400
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
__class__.__name__,
|
||||
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again.",
|
||||
)
|
75
moto/textract/models.py
Normal file
75
moto/textract/models.py
Normal file
@ -0,0 +1,75 @@
|
||||
"""TextractBackend class with methods for supported APIs."""
|
||||
|
||||
import uuid
|
||||
from random import randint
|
||||
from collections import defaultdict
|
||||
|
||||
from moto.core import BaseBackend, BaseModel
|
||||
from moto.core.utils import BackendDict
|
||||
|
||||
from .exceptions import InvalidParameterException, InvalidJobIdException
|
||||
|
||||
|
||||
class TextractJobStatus:
|
||||
in_progress = "IN_PROGRESS"
|
||||
succeeded = "SUCCEEDED"
|
||||
failed = "FAILED"
|
||||
partial_success = "PARTIAL_SUCCESS"
|
||||
|
||||
|
||||
class TextractJob(BaseModel):
|
||||
def __init__(self, job):
|
||||
self.job = job
|
||||
|
||||
def to_dict(self):
|
||||
return self.job
|
||||
|
||||
|
||||
class TextractBackend(BaseBackend):
|
||||
"""Implementation of Textract APIs."""
|
||||
|
||||
JOB_STATUS = TextractJobStatus.succeeded
|
||||
PAGES = {"Pages": randint(5, 500)}
|
||||
BLOCKS = []
|
||||
|
||||
def __init__(self, region_name=None):
|
||||
self.region_name = region_name
|
||||
self.async_text_detection_jobs = defaultdict()
|
||||
|
||||
def reset(self):
|
||||
"""Re-initialize all attributes for this instance."""
|
||||
region_name = self.region_name
|
||||
self.async_text_detection_jobs = defaultdict()
|
||||
self.__dict__ = {}
|
||||
self.__init__(region_name)
|
||||
|
||||
def get_document_text_detection(self, job_id, max_results, next_token):
|
||||
job = self.async_text_detection_jobs.get(job_id)
|
||||
if not job:
|
||||
raise InvalidJobIdException()
|
||||
return job
|
||||
|
||||
def start_document_text_detection(
|
||||
self,
|
||||
document_location,
|
||||
client_request_token,
|
||||
job_tag,
|
||||
notification_channel,
|
||||
output_config,
|
||||
kms_key_id,
|
||||
):
|
||||
if not document_location:
|
||||
raise InvalidParameterException()
|
||||
job_id = str(uuid.uuid4())
|
||||
self.async_text_detection_jobs[job_id] = TextractJob(
|
||||
{
|
||||
"Blocks": TextractBackend.BLOCKS,
|
||||
"DetectDocumentTextModelVersion": "1.0",
|
||||
"DocumentMetadata": {"Pages": TextractBackend.PAGES},
|
||||
"JobStatus": TextractBackend.JOB_STATUS,
|
||||
}
|
||||
)
|
||||
return job_id
|
||||
|
||||
|
||||
textract_backends = BackendDict(TextractBackend, "textract")
|
42
moto/textract/responses.py
Normal file
42
moto/textract/responses.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""Handles incoming textract requests, invokes methods, returns responses."""
|
||||
import json
|
||||
|
||||
from moto.core.responses import BaseResponse
|
||||
from .models import textract_backends
|
||||
|
||||
|
||||
class TextractResponse(BaseResponse):
|
||||
"""Handler for Textract requests and responses."""
|
||||
|
||||
@property
|
||||
def textract_backend(self):
|
||||
"""Return backend instance specific for this region."""
|
||||
return textract_backends[self.region]
|
||||
|
||||
def get_document_text_detection(self):
|
||||
params = json.loads(self.body)
|
||||
job_id = params.get("JobId")
|
||||
max_results = params.get("MaxResults")
|
||||
next_token = params.get("NextToken")
|
||||
job = self.textract_backend.get_document_text_detection(
|
||||
job_id=job_id, max_results=max_results, next_token=next_token,
|
||||
).to_dict()
|
||||
return json.dumps(job)
|
||||
|
||||
def start_document_text_detection(self):
|
||||
params = json.loads(self.body)
|
||||
document_location = params.get("DocumentLocation")
|
||||
client_request_token = params.get("ClientRequestToken")
|
||||
job_tag = params.get("JobTag")
|
||||
notification_channel = params.get("NotificationChannel")
|
||||
output_config = params.get("OutputConfig")
|
||||
kms_key_id = params.get("KMSKeyId")
|
||||
job_id = self.textract_backend.start_document_text_detection(
|
||||
document_location=document_location,
|
||||
client_request_token=client_request_token,
|
||||
job_tag=job_tag,
|
||||
notification_channel=notification_channel,
|
||||
output_config=output_config,
|
||||
kms_key_id=kms_key_id,
|
||||
)
|
||||
return json.dumps(dict(JobId=job_id))
|
10
moto/textract/urls.py
Normal file
10
moto/textract/urls.py
Normal file
@ -0,0 +1,10 @@
|
||||
"""textract base URL and path."""
|
||||
from .responses import TextractResponse
|
||||
|
||||
url_bases = [
|
||||
r"https?://textract\.(.+)\.amazonaws\.com",
|
||||
]
|
||||
|
||||
url_paths = {
|
||||
"{0}/$": TextractResponse.dispatch,
|
||||
}
|
0
tests/test_textract/__init__.py
Normal file
0
tests/test_textract/__init__.py
Normal file
80
tests/test_textract/test_server.py
Normal file
80
tests/test_textract/test_server.py
Normal file
@ -0,0 +1,80 @@
|
||||
"""Test different server responses."""
|
||||
import sure # noqa # pylint: disable=unused-import
|
||||
|
||||
import json
|
||||
import moto.server as server
|
||||
from moto import mock_textract
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_textract_start_text_detection():
|
||||
backend = server.create_backend_app("textract")
|
||||
test_client = backend.test_client()
|
||||
|
||||
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
|
||||
request_body = {
|
||||
"DocumentLocation": {
|
||||
"S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
|
||||
}
|
||||
}
|
||||
|
||||
resp = test_client.post("/", headers=headers, json=request_body)
|
||||
data = json.loads(resp.data.decode("utf-8"))
|
||||
resp.status_code.should.equal(200)
|
||||
data["JobId"].should.be.an(str)
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_textract_start_text_detection_without_document_location():
|
||||
backend = server.create_backend_app("textract")
|
||||
test_client = backend.test_client()
|
||||
|
||||
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
|
||||
resp = test_client.post("/", headers=headers, json={})
|
||||
data = json.loads(resp.data.decode("utf-8"))
|
||||
resp.status_code.should.equal(400)
|
||||
data["__type"].should.equal("InvalidParameterException")
|
||||
data["message"].should.equal(
|
||||
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again."
|
||||
)
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_textract_get_text_detection():
|
||||
backend = server.create_backend_app("textract")
|
||||
test_client = backend.test_client()
|
||||
|
||||
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
|
||||
request_body = {
|
||||
"DocumentLocation": {
|
||||
"S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
|
||||
}
|
||||
}
|
||||
|
||||
resp = test_client.post("/", headers=headers, json=request_body)
|
||||
start_job_data = json.loads(resp.data.decode("utf-8"))
|
||||
|
||||
headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
|
||||
request_body = {
|
||||
"JobId": start_job_data["JobId"],
|
||||
}
|
||||
resp = test_client.post("/", headers=headers, json=request_body)
|
||||
resp.status_code.should.equal(200)
|
||||
data = json.loads(resp.data.decode("utf-8"))
|
||||
data["JobStatus"].should.equal("SUCCEEDED")
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_textract_get_text_detection_without_job_id():
|
||||
backend = server.create_backend_app("textract")
|
||||
test_client = backend.test_client()
|
||||
|
||||
headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
|
||||
request_body = {
|
||||
"JobId": "invalid_job_id",
|
||||
}
|
||||
resp = test_client.post("/", headers=headers, json=request_body)
|
||||
resp.status_code.should.equal(400)
|
||||
data = json.loads(resp.data.decode("utf-8"))
|
||||
data["__type"].should.equal("InvalidJobIdException")
|
||||
data["message"].should.equal("An invalid job identifier was passed.")
|
85
tests/test_textract/test_textract.py
Normal file
85
tests/test_textract/test_textract.py
Normal file
@ -0,0 +1,85 @@
|
||||
"""Unit tests for textract-supported APIs."""
|
||||
from random import randint
|
||||
from botocore.exceptions import ClientError, ParamValidationError
|
||||
import pytest
|
||||
import boto3
|
||||
|
||||
from unittest import SkipTest
|
||||
from moto.textract.models import TextractBackend
|
||||
from moto import settings, mock_textract
|
||||
|
||||
# See our Development Tips on writing tests for hints on how to write good tests:
|
||||
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_get_document_text_detection():
|
||||
if settings.TEST_SERVER_MODE:
|
||||
raise SkipTest("Cannot set textract backend values in server mode")
|
||||
|
||||
TextractBackend.JOB_STATUS = "SUCCEEDED"
|
||||
TextractBackend.PAGES = randint(5, 500)
|
||||
TextractBackend.BLOCKS = [
|
||||
{
|
||||
"Text": "This is a test",
|
||||
"Id": "0",
|
||||
"Confidence": "100",
|
||||
"Geometry": {
|
||||
"BoundingBox": {
|
||||
"Width": "0.5",
|
||||
"Height": "0.5",
|
||||
"Left": "0.5",
|
||||
"Top": "0.5",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
client = boto3.client("textract", region_name="us-east-1")
|
||||
job = client.start_document_text_detection(
|
||||
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name"}},
|
||||
)
|
||||
|
||||
resp = client.get_document_text_detection(JobId=job["JobId"])
|
||||
|
||||
resp["Blocks"][0]["Text"].should.equal("This is a test")
|
||||
resp["Blocks"][0]["Id"].should.equal("0")
|
||||
resp["Blocks"][0]["Confidence"].should.equal("100")
|
||||
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Width"].should.equal("0.5")
|
||||
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Height"].should.equal("0.5")
|
||||
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Left"].should.equal("0.5")
|
||||
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Top"].should.equal("0.5")
|
||||
resp["JobStatus"].should.equal("SUCCEEDED")
|
||||
resp["DocumentMetadata"]["Pages"].should.equal(TextractBackend.PAGES)
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_start_document_text_detection():
|
||||
client = boto3.client("textract", region_name="us-east-1")
|
||||
resp = client.start_document_text_detection(
|
||||
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name",}},
|
||||
)
|
||||
|
||||
resp.should.have.key("JobId")
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_get_document_text_detection_without_job_id():
|
||||
client = boto3.client("textract", region_name="us-east-1")
|
||||
with pytest.raises(ClientError) as e:
|
||||
client.get_document_text_detection(JobId="Invalid Job Id")
|
||||
|
||||
e.value.response["Error"]["Code"].should.equal("InvalidJobIdException")
|
||||
|
||||
|
||||
@mock_textract
|
||||
def test_get_document_text_detection_without_document_location():
|
||||
client = boto3.client("textract", region_name="us-east-1")
|
||||
with pytest.raises(ParamValidationError) as e:
|
||||
client.start_document_text_detection()
|
||||
|
||||
assert e.typename == "ParamValidationError"
|
||||
assert (
|
||||
'Parameter validation failed:\nMissing required parameter in input: "DocumentLocation"'
|
||||
in e.value.args
|
||||
)
|
Loading…
Reference in New Issue
Block a user