Textract: Addressed comments

2022-02-21 11:43:36 -03:30 · 2022-02-21 11:43:36 -03:30 · 7dc5adb5d5
commit 7dc5adb5d5
parent 64c16c715d
8 changed files with 200 additions and 32 deletions
--- a/docs/docs/services/textract.rst
+++ b/docs/docs/services/textract.rst
@ -0,0 +1,45 @@
 .. _implementedservice_textract:
 .. |start-h3| raw:: html
    <h3>
 .. |end-h3| raw:: html
    </h3>
 ==========
 textract
 ==========
 .. autoclass:: moto.textract.models.TextractBackend
 |start-h3| Example usage |end-h3|
 .. sourcecode:: python
            @mock_textract
            def test_textract_behaviour:
                boto3.client("textract")
                ...
 |start-h3| Implemented features for this service |end-h3|
 - [ ] analyze_document
 - [ ] analyze_expense
 - [ ] analyze_id
 - [ ] can_paginate
 - [ ] detect_document_text
 - [ ] get_document_analysis
 - [X] get_document_text_detection
 - [ ] get_expense_analysis
 - [ ] get_paginator
 - [ ] get_waiter
 - [ ] start_document_analysis
 - [X] start_document_text_detection
 - [ ] start_expense_analysis
        Pagination has not yet been implemented
--- a/moto/textract/init.py
+++ b/moto/textract/init.py
@ -2,4 +2,4 @@
 from .models import textract_backends
 from ..core.models import base_decorator
-mock_textract = base_decorator(textract_backends)
+mock_textract = base_decorator(textract_backends)
--- a/moto/textract/exceptions.py
+++ b/moto/textract/exceptions.py
@ -1,15 +1,16 @@
 """Exceptions raised by the textract service."""
 from moto.core.exceptions import JsonRESTError
 class InvalidJobIdException(JsonRESTError):
    code = 400
    def __init__(self):
        super().__init__(
-            __class__.__name__,
+            __class__.__name__, "An invalid job identifier was passed.",
            "An invalid job identifier was passed.",
        )
 class InvalidS3ObjectException(JsonRESTError):
    code = 400
@ -19,6 +20,7 @@ class InvalidS3ObjectException(JsonRESTError):
            "Amazon Textract is unable to access the S3 object that's specified in the request.",
        )
 class InvalidParameterException(JsonRESTError):
    code = 400
@ -27,4 +29,3 @@ class InvalidParameterException(JsonRESTError):
            __class__.__name__,
            "An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again.",
        )
--- a/moto/textract/models.py
+++ b/moto/textract/models.py
@ -9,18 +9,25 @@ from moto.core.utils import BackendDict
 from .exceptions import InvalidParameterException, InvalidJobIdException
 class TextractJobStatus:
    in_progress = "IN_PROGRESS"
    succeeded = "SUCCEEDED"
    failed = "FAILED"
    partial_success = "PARTIAL_SUCCESS"
 class TextractJob(BaseModel):
    def __init__(self, job):
        self.job = job
    def to_dict(self):
        return self.job
 class TextractBackend(BaseBackend):
    """Implementation of Textract APIs."""
    JOB_STATUS = TextractJobStatus.succeeded
    PAGES = {"Pages": randint(5, 500)}
    BLOCKS = []
@ -54,12 +61,14 @@ class TextractBackend(BaseBackend):
        if not document_location:
            raise InvalidParameterException()
        job_id = str(uuid.uuid4())
-        self.async_text_detection_jobs[job_id] = TextractJob({
+        self.async_text_detection_jobs[job_id] = TextractJob(
-            "Blocks": TextractBackend.BLOCKS,
+            {
-            "DetectDocumentTextModelVersion": "1.0",
+                "Blocks": TextractBackend.BLOCKS,
-            "DocumentMetadata": {"Pages": TextractBackend.PAGES},
+                "DetectDocumentTextModelVersion": "1.0",
-            "JobStatus": TextractBackend.JOB_STATUS,
+                "DocumentMetadata": {"Pages": TextractBackend.PAGES},
-        })
+                "JobStatus": TextractBackend.JOB_STATUS,
            }
        )
        return job_id
--- a/moto/textract/responses.py
+++ b/moto/textract/responses.py
@ -14,18 +14,15 @@ class TextractResponse(BaseResponse):
        return textract_backends[self.region]
    def get_document_text_detection(self):
-        params = self._get_params()
+        params = json.loads(self.body)
        job_id = params.get("JobId")
        max_results = params.get("MaxResults")
        next_token = params.get("NextToken")
        job = self.textract_backend.get_document_text_detection(
-            job_id=job_id,
+            job_id=job_id, max_results=max_results, next_token=next_token,
-            max_results=max_results,
+        ).to_dict()
            next_token=next_token,
        )
        return json.dumps(job)
    def start_document_text_detection(self):
        params = json.loads(self.body)
        document_location = params.get("DocumentLocation")
--- a/moto/textract/urls.py
+++ b/moto/textract/urls.py
@ -5,8 +5,6 @@ url_bases = [
    r"https?://textract\.(.+)\.amazonaws\.com",
 ]
 url_paths = {
    "{0}/$": TextractResponse.dispatch,
 }
--- a/tests/test_textract/test_server.py
+++ b/tests/test_textract/test_server.py
@ -1,13 +1,81 @@
 """Test different server responses."""
 import sure  # noqa # pylint: disable=unused-import
 import json
 import moto.server as server
 from moto import mock_textract
-def test_textract_list():
+@mock_textract
 def test_textract_start_text_detection():
    backend = server.create_backend_app("textract")
    test_client = backend.test_client()
-    resp = test_client.get("/")
+    headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
    request_body = {
        "DocumentLocation": {
            "S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
        }
    }
    resp = test_client.post("/", headers=headers, json=request_body)
    data = json.loads(resp.data.decode("utf-8"))
    resp.status_code.should.equal(200)
-    str(resp.data).should.contain("?")
+    data["JobId"].should.be.an(str)
@mock_textract
 def test_textract_start_text_detection_without_document_location():
    backend = server.create_backend_app("textract")
    test_client = backend.test_client()
    headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
    resp = test_client.post("/", headers=headers, json={})
    data = json.loads(resp.data.decode("utf-8"))
    resp.status_code.should.equal(400)
    data["__type"].should.equal("InvalidParameterException")
    data["message"].should.equal(
        "An input parameter violated a constraint. For example, in synchronous operations, an InvalidParameterException exception occurs when neither of the S3Object or Bytes values are supplied in the Document request parameter. Validate your parameter before calling the API operation again."
    )
@mock_textract
 def test_textract_get_text_detection():
    backend = server.create_backend_app("textract")
    test_client = backend.test_client()
    headers = {"X-Amz-Target": "X-Amz-Target=Textract.StartDocumentTextDetection"}
    request_body = {
        "DocumentLocation": {
            "S3Object": {"Bucket": "bucket", "Name": "name", "Version": "version",}
        }
    }
    resp = test_client.post("/", headers=headers, json=request_body)
    start_job_data = json.loads(resp.data.decode("utf-8"))
    headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
    request_body = {
        "JobId": start_job_data["JobId"],
    }
    resp = test_client.post("/", headers=headers, json=request_body)
    resp.status_code.should.equal(200)
    data = json.loads(resp.data.decode("utf-8"))
    data["JobStatus"].should.equal("SUCCEEDED")
@mock_textract
 def test_textract_get_text_detection_without_job_id():
    backend = server.create_backend_app("textract")
    test_client = backend.test_client()
    headers = {"X-Amz-Target": "X-Amz-Target=Textract.GetDocumentTextDetection"}
    request_body = {
        "JobId": "invalid_job_id",
    }
    resp = test_client.post("/", headers=headers, json=request_body)
    resp.status_code.should.equal(400)
    data = json.loads(resp.data.decode("utf-8"))
    print(data)
    data["__type"].should.equal("InvalidJobIdException")
    data["message"].should.equal("An invalid job identifier was passed.")
--- a/tests/test_textract/test_textract.py
+++ b/tests/test_textract/test_textract.py
@ -1,31 +1,81 @@
 """Unit tests for textract-supported APIs."""
 from random import randint
 from botocore.exceptions import ClientError, ParamValidationError
 import pytest
 import boto3
 from moto.textract.models import TextractBackend
 from moto import mock_textract
 # See our Development Tips on writing tests for hints on how to write good tests:
 # http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
@mock_textract
 def test_get_document_text_detection():
    TextractBackend.JOB_STATUS = "SUCCEEDED"
    TextractBackend.PAGES = randint(5, 500)
    TextractBackend.BLOCKS = [
        {
            "Text": "This is a test",
            "Id": "0",
            "Confidence": "100",
            "Geometry": {
                "BoundingBox": {
                    "Width": "0.5",
                    "Height": "0.5",
                    "Left": "0.5",
                    "Top": "0.5",
                },
            },
        }
    ]
-# @mock_textract
+    client = boto3.client("textract", region_name="us-east-1")
-# def test_get_document_text_detection():
+    job = client.start_document_text_detection(
-#     client = boto3.client("textract", region_name="us-east-1")
+        DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name"}},
-#     resp = client.get_document_text_detection()
+    )
-#     raise Exception("NotYetImplemented")
+    resp = client.get_document_text_detection(JobId=job["JobId"])
    resp["Blocks"][0]["Text"].should.equal("This is a test")
    resp["Blocks"][0]["Id"].should.equal("0")
    resp["Blocks"][0]["Confidence"].should.equal("100")
    resp["Blocks"][0]["Geometry"]["BoundingBox"]["Width"].should.equal("0.5")
    resp["Blocks"][0]["Geometry"]["BoundingBox"]["Height"].should.equal("0.5")
    resp["Blocks"][0]["Geometry"]["BoundingBox"]["Left"].should.equal("0.5")
    resp["Blocks"][0]["Geometry"]["BoundingBox"]["Top"].should.equal("0.5")
    resp["JobStatus"].should.equal("SUCCEEDED")
    resp["DocumentMetadata"]["Pages"].should.equal(TextractBackend.PAGES)
@mock_textract
 def test_start_document_text_detection():
    client = boto3.client("textract", region_name="us-east-1")
    resp = client.start_document_text_detection(
-        DocumentLocation={
+        DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name",}},
            'S3Object': {
                'Bucket': 'bucket',
                'Name': 'name',
            }
        },
    )
    resp.should.have.key("JobId")
@mock_textract
 def test_get_document_text_detection_without_job_id():
    client = boto3.client("textract", region_name="us-east-1")
    with pytest.raises(ClientError) as e:
        client.get_document_text_detection(JobId="Invalid Job Id")
    e.value.response["Error"]["Code"].should.equal("InvalidJobIdException")
@mock_textract
 def test_get_document_text_detection_without_document_location():
    client = boto3.client("textract", region_name="us-east-1")
    with pytest.raises(ParamValidationError) as e:
        client.start_document_text_detection()
    assert e.typename == "ParamValidationError"
    assert (
        'Parameter validation failed:\nMissing required parameter in input: "DocumentLocation"'
        in e.value.args
    )