Merge pull request #4869 from ecumene/master

This commit is contained in:
Bert Blommers 2022-02-21 20:20:02 -01:00 committed by GitHub
commit 412153aeb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 375 additions and 0 deletions

View File

@ -0,0 +1,45 @@
.. _implementedservice_textract:
.. |start-h3| raw:: html
<h3>
.. |end-h3| raw:: html
</h3>
==========
textract
==========
.. autoclass:: moto.textract.models.TextractBackend
|start-h3| Example usage |end-h3|
.. sourcecode:: python
@mock_textract
def test_textract_behaviour:
boto3.client("textract")
...
|start-h3| Implemented features for this service |end-h3|
- [ ] analyze_document
- [ ] analyze_expense
- [ ] analyze_id
- [ ] can_paginate
- [ ] detect_document_text
- [ ] get_document_analysis
- [X] get_document_text_detection
- [ ] get_expense_analysis
- [ ] get_paginator
- [ ] get_waiter
- [ ] start_document_analysis
- [X] start_document_text_detection
- [ ] start_expense_analysis
Pagination has not yet been implemented

View File

@ -149,6 +149,7 @@ XRaySegment = lazy_load(".xray", "XRaySegment")
mock_xray = lazy_load(".xray", "mock_xray")
mock_xray_client = lazy_load(".xray", "mock_xray_client")
mock_wafv2 = lazy_load(".wafv2", "mock_wafv2")
mock_textract = lazy_load(".textract", "mock_textract")
class MockAll(ContextDecorator):

View File

@ -139,6 +139,7 @@ backend_url_patterns = [
("sts", re.compile("https?://sts\\.(.*\\.)?amazonaws\\.com")),
("support", re.compile("https?://support\\.(.+)\\.amazonaws\\.com")),
("swf", re.compile("https?://swf\\.(.+)\\.amazonaws\\.com")),
("textract", re.compile("https?://textract\\.(.+)\\.amazonaws\\.com")),
(
"timestream-write",
re.compile("https?://ingest\\.timestream\\.(.+)\\.amazonaws\\.com"),

View File

@ -0,0 +1,5 @@
"""textract module initialization; sets value for base decorator."""
from .models import textract_backends
from ..core.models import base_decorator
mock_textract = base_decorator(textract_backends)

View File

@ -0,0 +1,31 @@
"""Exceptions raised by the textract service."""
from moto.core.exceptions import JsonRESTError
class InvalidJobIdException(JsonRESTError):
code = 400
def __init__(self):
super().__init__(
__class__.__name__, "An invalid job identifier was passed.",
)
class InvalidS3ObjectException(JsonRESTError):
code = 400
def __init__(self):
super().__init__(
__class__.__name__,
"Amazon Textract is unable to access the S3 object that's specified in the request.",
)
class InvalidParameterException(JsonRESTError):
code = 400
def __init__(self):
super().__init__(
__class__.__name__,
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again.",
)

75
moto/textract/models.py Normal file
View File

@ -0,0 +1,75 @@
"""TextractBackend class with methods for supported APIs."""
import uuid
from random import randint
from collections import defaultdict
from moto.core import BaseBackend, BaseModel
from moto.core.utils import BackendDict
from .exceptions import InvalidParameterException, InvalidJobIdException
class TextractJobStatus:
in_progress = "IN_PROGRESS"
succeeded = "SUCCEEDED"
failed = "FAILED"
partial_success = "PARTIAL_SUCCESS"
class TextractJob(BaseModel):
def __init__(self, job):
self.job = job
def to_dict(self):
return self.job
class TextractBackend(BaseBackend):
"""Implementation of Textract APIs."""
JOB_STATUS = TextractJobStatus.succeeded
PAGES = {"Pages": randint(5, 500)}
BLOCKS = []
def __init__(self, region_name=None):
self.region_name = region_name
self.async_text_detection_jobs = defaultdict()
def reset(self):
"""Re-initialize all attributes for this instance."""
region_name = self.region_name
self.async_text_detection_jobs = defaultdict()
self.__dict__ = {}
self.__init__(region_name)
def get_document_text_detection(self, job_id, max_results, next_token):
job = self.async_text_detection_jobs.get(job_id)
if not job:
raise InvalidJobIdException()
return job
def start_document_text_detection(
self,
document_location,
client_request_token,
job_tag,
notification_channel,
output_config,
kms_key_id,
):
if not document_location:
raise InvalidParameterException()
job_id = str(uuid.uuid4())
self.async_text_detection_jobs[job_id] = TextractJob(
{
"Blocks": TextractBackend.BLOCKS,
"DetectDocumentTextModelVersion": "1.0",
"DocumentMetadata": {"Pages": TextractBackend.PAGES},
"JobStatus": TextractBackend.JOB_STATUS,
}
)
return job_id
textract_backends = BackendDict(TextractBackend, "textract")

View File

@ -0,0 +1,42 @@
"""Handles incoming textract requests, invokes methods, returns responses."""
import json
from moto.core.responses import BaseResponse
from .models import textract_backends
class TextractResponse(BaseResponse):
"""Handler for Textract requests and responses."""
@property
def textract_backend(self):
"""Return backend instance specific for this region."""
return textract_backends[self.region]
def get_document_text_detection(self):
params = json.loads(self.body)
job_id = params.get("JobId")
max_results = params.get("MaxResults")
next_token = params.get("NextToken")
job = self.textract_backend.get_document_text_detection(
job_id=job_id, max_results=max_results, next_token=next_token,
).to_dict()
return json.dumps(job)
def start_document_text_detection(self):
params = json.loads(self.body)
document_location = params.get("DocumentLocation")
client_request_token = params.get("ClientRequestToken")
job_tag = params.get("JobTag")
notification_channel = params.get("NotificationChannel")
output_config = params.get("OutputConfig")
kms_key_id = params.get("KMSKeyId")
job_id = self.textract_backend.start_document_text_detection(
document_location=document_location,
client_request_token=client_request_token,
job_tag=job_tag,
notification_channel=notification_channel,
output_config=output_config,
kms_key_id=kms_key_id,
)
return json.dumps(dict(JobId=job_id))

10
moto/textract/urls.py Normal file
View File

@ -0,0 +1,10 @@
"""textract base URL and path."""
from .responses import TextractResponse
url_bases = [
r"https?://textract\.(.+)\.amazonaws\.com",
]
url_paths = {
"{0}/$": TextractResponse.dispatch,
}

View File

View File

@ -0,0 +1,80 @@
"""Test different server responses."""
import sure # noqa # pylint: disable=unused-import
import json
import moto.server as server
from moto import mock_textract
@mock_textract
def test_textract_start_text_detection():
backend = server.create_backend_app("textract")
test_client = backend.test_client()
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
request_body = {
"DocumentLocation": {
"S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
}
}
resp = test_client.post("/", headers=headers, json=request_body)
data = json.loads(resp.data.decode("utf-8"))
resp.status_code.should.equal(200)
data["JobId"].should.be.an(str)
@mock_textract
def test_textract_start_text_detection_without_document_location():
backend = server.create_backend_app("textract")
test_client = backend.test_client()
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
resp = test_client.post("/", headers=headers, json={})
data = json.loads(resp.data.decode("utf-8"))
resp.status_code.should.equal(400)
data["__type"].should.equal("InvalidParameterException")
data["message"].should.equal(
"An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again."
)
@mock_textract
def test_textract_get_text_detection():
backend = server.create_backend_app("textract")
test_client = backend.test_client()
headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
request_body = {
"DocumentLocation": {
"S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
}
}
resp = test_client.post("/", headers=headers, json=request_body)
start_job_data = json.loads(resp.data.decode("utf-8"))
headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
request_body = {
"JobId": start_job_data["JobId"],
}
resp = test_client.post("/", headers=headers, json=request_body)
resp.status_code.should.equal(200)
data = json.loads(resp.data.decode("utf-8"))
data["JobStatus"].should.equal("SUCCEEDED")
@mock_textract
def test_textract_get_text_detection_without_job_id():
backend = server.create_backend_app("textract")
test_client = backend.test_client()
headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
request_body = {
"JobId": "invalid_job_id",
}
resp = test_client.post("/", headers=headers, json=request_body)
resp.status_code.should.equal(400)
data = json.loads(resp.data.decode("utf-8"))
data["__type"].should.equal("InvalidJobIdException")
data["message"].should.equal("An invalid job identifier was passed.")

View File

@ -0,0 +1,85 @@
"""Unit tests for textract-supported APIs."""
from random import randint
from botocore.exceptions import ClientError, ParamValidationError
import pytest
import boto3
from unittest import SkipTest
from moto.textract.models import TextractBackend
from moto import settings, mock_textract
# See our Development Tips on writing tests for hints on how to write good tests:
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
@mock_textract
def test_get_document_text_detection():
if settings.TEST_SERVER_MODE:
raise SkipTest("Cannot set textract backend values in server mode")
TextractBackend.JOB_STATUS = "SUCCEEDED"
TextractBackend.PAGES = randint(5, 500)
TextractBackend.BLOCKS = [
{
"Text": "This is a test",
"Id": "0",
"Confidence": "100",
"Geometry": {
"BoundingBox": {
"Width": "0.5",
"Height": "0.5",
"Left": "0.5",
"Top": "0.5",
},
},
}
]
client = boto3.client("textract", region_name="us-east-1")
job = client.start_document_text_detection(
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name"}},
)
resp = client.get_document_text_detection(JobId=job["JobId"])
resp["Blocks"][0]["Text"].should.equal("This is a test")
resp["Blocks"][0]["Id"].should.equal("0")
resp["Blocks"][0]["Confidence"].should.equal("100")
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Width"].should.equal("0.5")
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Height"].should.equal("0.5")
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Left"].should.equal("0.5")
resp["Blocks"][0]["Geometry"]["BoundingBox"]["Top"].should.equal("0.5")
resp["JobStatus"].should.equal("SUCCEEDED")
resp["DocumentMetadata"]["Pages"].should.equal(TextractBackend.PAGES)
@mock_textract
def test_start_document_text_detection():
client = boto3.client("textract", region_name="us-east-1")
resp = client.start_document_text_detection(
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name",}},
)
resp.should.have.key("JobId")
@mock_textract
def test_get_document_text_detection_without_job_id():
client = boto3.client("textract", region_name="us-east-1")
with pytest.raises(ClientError) as e:
client.get_document_text_detection(JobId="Invalid Job Id")
e.value.response["Error"]["Code"].should.equal("InvalidJobIdException")
@mock_textract
def test_get_document_text_detection_without_document_location():
client = boto3.client("textract", region_name="us-east-1")
with pytest.raises(ParamValidationError) as e:
client.start_document_text_detection()
assert e.typename == "ParamValidationError"
assert (
'Parameter validation failed:\nMissing required parameter in input: "DocumentLocation"'
in e.value.args
)