2022-02-21 15:13:36 +00:00
|
|
|
from random import randint
|
2023-11-30 15:55:51 +00:00
|
|
|
from unittest import SkipTest
|
|
|
|
|
2022-02-17 19:00:48 +00:00
|
|
|
import boto3
|
2023-11-30 15:55:51 +00:00
|
|
|
import pytest
|
|
|
|
from botocore.exceptions import ClientError, ParamValidationError
|
2022-02-17 19:00:48 +00:00
|
|
|
|
2024-01-07 12:03:33 +00:00
|
|
|
from moto import mock_aws, settings
|
2022-02-21 15:13:36 +00:00
|
|
|
from moto.textract.models import TextractBackend
|
2022-02-17 19:00:48 +00:00
|
|
|
|
|
|
|
# See our Development Tips on writing tests for hints on how to write good tests:
|
|
|
|
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
|
|
|
|
|
|
|
|
2024-01-07 12:03:33 +00:00
|
|
|
@mock_aws
|
2022-02-21 15:13:36 +00:00
|
|
|
def test_get_document_text_detection():
|
2022-02-21 17:56:50 +00:00
|
|
|
if settings.TEST_SERVER_MODE:
|
|
|
|
raise SkipTest("Cannot set textract backend values in server mode")
|
|
|
|
|
2022-02-21 15:13:36 +00:00
|
|
|
TextractBackend.JOB_STATUS = "SUCCEEDED"
|
|
|
|
TextractBackend.PAGES = randint(5, 500)
|
|
|
|
TextractBackend.BLOCKS = [
|
|
|
|
{
|
|
|
|
"Text": "This is a test",
|
|
|
|
"Id": "0",
|
|
|
|
"Confidence": "100",
|
|
|
|
"Geometry": {
|
|
|
|
"BoundingBox": {
|
|
|
|
"Width": "0.5",
|
|
|
|
"Height": "0.5",
|
|
|
|
"Left": "0.5",
|
|
|
|
"Top": "0.5",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
client = boto3.client("textract", region_name="us-east-1")
|
|
|
|
job = client.start_document_text_detection(
|
2022-03-10 14:39:59 +00:00
|
|
|
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name"}}
|
2022-02-21 15:13:36 +00:00
|
|
|
)
|
2022-02-17 19:00:48 +00:00
|
|
|
|
2022-02-21 15:13:36 +00:00
|
|
|
resp = client.get_document_text_detection(JobId=job["JobId"])
|
2022-02-17 19:00:48 +00:00
|
|
|
|
2023-07-09 17:01:29 +00:00
|
|
|
assert resp["Blocks"][0]["Text"] == "This is a test"
|
|
|
|
assert resp["Blocks"][0]["Id"] == "0"
|
|
|
|
assert resp["Blocks"][0]["Confidence"] == "100"
|
|
|
|
assert resp["Blocks"][0]["Geometry"]["BoundingBox"]["Width"] == "0.5"
|
|
|
|
assert resp["Blocks"][0]["Geometry"]["BoundingBox"]["Height"] == "0.5"
|
|
|
|
assert resp["Blocks"][0]["Geometry"]["BoundingBox"]["Left"] == "0.5"
|
|
|
|
assert resp["Blocks"][0]["Geometry"]["BoundingBox"]["Top"] == "0.5"
|
|
|
|
assert resp["JobStatus"] == "SUCCEEDED"
|
|
|
|
assert resp["DocumentMetadata"]["Pages"] == TextractBackend.PAGES
|
2022-02-17 19:00:48 +00:00
|
|
|
|
|
|
|
|
2024-01-07 12:03:33 +00:00
|
|
|
@mock_aws
|
2022-02-17 19:00:48 +00:00
|
|
|
def test_start_document_text_detection():
|
2022-02-17 19:31:40 +00:00
|
|
|
client = boto3.client("textract", region_name="us-east-1")
|
2022-02-17 19:00:48 +00:00
|
|
|
resp = client.start_document_text_detection(
|
2022-03-10 14:39:59 +00:00
|
|
|
DocumentLocation={"S3Object": {"Bucket": "bucket", "Name": "name"}}
|
2022-02-17 19:00:48 +00:00
|
|
|
)
|
|
|
|
|
2023-07-09 17:01:29 +00:00
|
|
|
assert "JobId" in resp
|
2022-02-21 15:13:36 +00:00
|
|
|
|
|
|
|
|
2024-01-07 12:03:33 +00:00
|
|
|
@mock_aws
|
2022-02-21 15:13:36 +00:00
|
|
|
def test_get_document_text_detection_without_job_id():
|
|
|
|
client = boto3.client("textract", region_name="us-east-1")
|
|
|
|
with pytest.raises(ClientError) as e:
|
|
|
|
client.get_document_text_detection(JobId="Invalid Job Id")
|
|
|
|
|
2023-07-09 17:01:29 +00:00
|
|
|
assert e.value.response["Error"]["Code"] == "InvalidJobIdException"
|
2022-02-21 15:13:36 +00:00
|
|
|
|
|
|
|
|
2024-01-07 12:03:33 +00:00
|
|
|
@mock_aws
|
2022-02-21 15:13:36 +00:00
|
|
|
def test_get_document_text_detection_without_document_location():
|
|
|
|
client = boto3.client("textract", region_name="us-east-1")
|
|
|
|
with pytest.raises(ParamValidationError) as e:
|
|
|
|
client.start_document_text_detection()
|
|
|
|
|
|
|
|
assert e.typename == "ParamValidationError"
|
|
|
|
assert (
|
|
|
|
'Parameter validation failed:\nMissing required parameter in input: "DocumentLocation"'
|
|
|
|
in e.value.args
|
|
|
|
)
|
2023-12-30 14:24:42 +00:00
|
|
|
|
|
|
|
|
2024-01-07 12:03:33 +00:00
|
|
|
@mock_aws
|
2023-12-30 14:24:42 +00:00
|
|
|
def test_detect_document_text():
|
|
|
|
client = boto3.client("textract", region_name="us-east-1")
|
|
|
|
result = client.detect_document_text(
|
|
|
|
Document={
|
|
|
|
"S3Object": {
|
|
|
|
"Bucket": "bucket",
|
|
|
|
"Name": "name.jpg",
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
assert isinstance(result["Blocks"], list)
|
|
|
|
assert result["DetectDocumentTextModelVersion"] == "1.0"
|