Comprehend: implement canned responses for detect_* functions (#6319)

This commit is contained in:
rafcio19 2023-05-25 11:46:40 +01:00 committed by GitHub
parent 25a0fee17b
commit 7379b46460
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 326 additions and 8 deletions

View File

@ -1197,7 +1197,7 @@
## comprehend ## comprehend
<details> <details>
<summary>9% implemented</summary> <summary>13% implemented</summary>
- [ ] batch_detect_dominant_language - [ ] batch_detect_dominant_language
- [ ] batch_detect_entities - [ ] batch_detect_entities
@ -1235,9 +1235,9 @@
- [ ] describe_topics_detection_job - [ ] describe_topics_detection_job
- [ ] detect_dominant_language - [ ] detect_dominant_language
- [ ] detect_entities - [ ] detect_entities
- [ ] detect_key_phrases - [X] detect_key_phrases
- [ ] detect_pii_entities - [X] detect_pii_entities
- [ ] detect_sentiment - [X] detect_sentiment
- [ ] detect_syntax - [ ] detect_syntax
- [ ] detect_targeted_sentiment - [ ] detect_targeted_sentiment
- [ ] import_model - [ ] import_model

View File

@ -67,9 +67,9 @@ comprehend
- [ ] describe_topics_detection_job - [ ] describe_topics_detection_job
- [ ] detect_dominant_language - [ ] detect_dominant_language
- [ ] detect_entities - [ ] detect_entities
- [ ] detect_key_phrases - [X] detect_key_phrases
- [ ] detect_pii_entities - [X] detect_pii_entities
- [ ] detect_sentiment - [X] detect_sentiment
- [ ] detect_syntax - [ ] detect_syntax
- [ ] detect_targeted_sentiment - [ ] detect_targeted_sentiment
- [ ] import_model - [ ] import_model

View File

@ -1,5 +1,6 @@
"""Exceptions raised by the comprehend service.""" """Exceptions raised by the comprehend service."""
from moto.core.exceptions import JsonRESTError from moto.core.exceptions import JsonRESTError
from typing import List
class ResourceNotFound(JsonRESTError): class ResourceNotFound(JsonRESTError):
@ -8,3 +9,22 @@ class ResourceNotFound(JsonRESTError):
"ResourceNotFoundException", "ResourceNotFoundException",
"RESOURCE_NOT_FOUND: Could not find specified resource.", "RESOURCE_NOT_FOUND: Could not find specified resource.",
) )
class DetectPIIValidationException(JsonRESTError):
def __init__(self, language: str, all_languages: List[str]) -> None:
all_languages_str = str(all_languages).replace("'", "")
super().__init__(
"ValidationException",
f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
f"Member must satisfy enum value set: {all_languages_str}",
)
class TextSizeLimitExceededException(JsonRESTError):
def __init__(self, size: int) -> None:
super().__init__(
"TextSizeLimitExceededException",
"Input text size exceeds limit. Max length of request text allowed is 100000 bytes while in "
f"this request the text size is {size} bytes",
)

View File

@ -2,9 +2,62 @@
from moto.core import BaseBackend, BackendDict, BaseModel from moto.core import BaseBackend, BackendDict, BaseModel
from moto.utilities.tagging_service import TaggingService from moto.utilities.tagging_service import TaggingService
from .exceptions import ResourceNotFound from .exceptions import (
ResourceNotFound,
DetectPIIValidationException,
TextSizeLimitExceededException,
)
from typing import Any, Dict, List, Iterable from typing import Any, Dict, List, Iterable
CANNED_DETECT_RESPONSE = [
{
"Score": 0.9999890923500061,
"Type": "NAME",
"BeginOffset": 50,
"EndOffset": 58,
},
{
"Score": 0.9999966621398926,
"Type": "EMAIL",
"BeginOffset": 230,
"EndOffset": 259,
},
{
"Score": 0.9999954700469971,
"Type": "BANK_ACCOUNT_NUMBER",
"BeginOffset": 334,
"EndOffset": 349,
},
]
CANNED_PHRASES_RESPONSE = [
{
"Score": 0.9999890923500061,
"BeginOffset": 50,
"EndOffset": 58,
},
{
"Score": 0.9999966621398926,
"BeginOffset": 230,
"EndOffset": 259,
},
{
"Score": 0.9999954700469971,
"BeginOffset": 334,
"EndOffset": 349,
},
]
CANNED_SENTIMENT_RESPONSE = {
"Sentiment": "NEUTRAL",
"SentimentScore": {
"Positive": 0.008101312443614006,
"Negative": 0.0002824589901138097,
"Neutral": 0.9916020035743713,
"Mixed": 1.4156351426208857e-05,
},
}
class EntityRecognizer(BaseModel): class EntityRecognizer(BaseModel):
def __init__( def __init__(
@ -53,6 +106,24 @@ class EntityRecognizer(BaseModel):
class ComprehendBackend(BaseBackend): class ComprehendBackend(BaseBackend):
"""Implementation of Comprehend APIs.""" """Implementation of Comprehend APIs."""
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_key_phrases.html
detect_key_phrases_languages = [
"ar",
"hi",
"ko",
"zh-TW",
"ja",
"zh",
"de",
"pt",
"en",
"it",
"fr",
"es",
]
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_pii_entities.html
detect_pii_entities_languages = ["en"]
def __init__(self, region_name: str, account_id: str): def __init__(self, region_name: str, account_id: str):
super().__init__(region_name, account_id) super().__init__(region_name, account_id)
self.recognizers: Dict[str, EntityRecognizer] = dict() self.recognizers: Dict[str, EntityRecognizer] = dict()
@ -130,5 +201,35 @@ class ComprehendBackend(BaseBackend):
def untag_resource(self, resource_arn: str, tag_keys: List[str]) -> None: def untag_resource(self, resource_arn: str, tag_keys: List[str]) -> None:
self.tagger.untag_resource_using_names(resource_arn, tag_keys) self.tagger.untag_resource_using_names(resource_arn, tag_keys)
def detect_pii_entities(self, text: str, language: str) -> List[Dict[str, Any]]:
if language not in self.detect_pii_entities_languages:
raise DetectPIIValidationException(
language, self.detect_pii_entities_languages
)
text_size = len(text)
if text_size > 100000:
raise TextSizeLimitExceededException(text_size)
return CANNED_DETECT_RESPONSE
def detect_key_phrases(self, text: str, language: str) -> List[Dict[str, Any]]:
if language not in self.detect_key_phrases_languages:
raise DetectPIIValidationException(
language, self.detect_key_phrases_languages
)
text_size = len(text)
if text_size > 100000:
raise TextSizeLimitExceededException(text_size)
return CANNED_PHRASES_RESPONSE
def detect_sentiment(self, text: str, language: str) -> Dict[str, Any]:
if language not in self.detect_key_phrases_languages:
raise DetectPIIValidationException(
language, self.detect_key_phrases_languages
)
text_size = len(text)
if text_size > 5000:
raise TextSizeLimitExceededException(text_size)
return CANNED_SENTIMENT_RESPONSE
comprehend_backends = BackendDict(ComprehendBackend, "comprehend") comprehend_backends = BackendDict(ComprehendBackend, "comprehend")

View File

@ -95,3 +95,24 @@ class ComprehendResponse(BaseResponse):
tag_keys = params.get("TagKeys") tag_keys = params.get("TagKeys")
self.comprehend_backend.untag_resource(resource_arn, tag_keys) self.comprehend_backend.untag_resource(resource_arn, tag_keys)
return "{}" return "{}"
def detect_pii_entities(self) -> str:
params = json.loads(self.body)
text = params.get("Text")
language = params.get("LanguageCode")
resp = self.comprehend_backend.detect_pii_entities(text, language)
return json.dumps(dict(Entities=resp))
def detect_key_phrases(self) -> str:
params = json.loads(self.body)
text = params.get("Text")
language = params.get("LanguageCode")
resp = self.comprehend_backend.detect_key_phrases(text, language)
return json.dumps(dict(KeyPhrases=resp))
def detect_sentiment(self) -> str:
params = json.loads(self.body)
text = params.get("Text")
language = params.get("LanguageCode")
resp = self.comprehend_backend.detect_sentiment(text, language)
return json.dumps(resp)

View File

@ -4,6 +4,11 @@ import pytest
import sure # noqa # pylint: disable=unused-import import sure # noqa # pylint: disable=unused-import
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
from moto import mock_comprehend from moto import mock_comprehend
from moto.comprehend.models import (
CANNED_DETECT_RESPONSE,
CANNED_PHRASES_RESPONSE,
CANNED_SENTIMENT_RESPONSE,
)
# See our Development Tips on writing tests for hints on how to write good tests: # See our Development Tips on writing tests for hints on how to write good tests:
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html # http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
@ -198,3 +203,174 @@ def test_delete_entity_recognizer():
err["Message"].should.equal( err["Message"].should.equal(
"RESOURCE_NOT_FOUND: Could not find specified resource." "RESOURCE_NOT_FOUND: Could not find specified resource."
) )
@mock_comprehend
def test_detect_pii_entities():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
sample_text = "Doesn't matter what we send, we will get a canned response"
# Execute
result = client.detect_pii_entities(Text=sample_text, LanguageCode="en")
# Verify
assert "Entities" in result
assert result["Entities"] == CANNED_DETECT_RESPONSE
@mock_comprehend
def test_detect_pii_entities_invalid_languages():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
sample_text = "Doesn't matter what we send, we will get a canned response"
language = "es"
# Execute
with pytest.raises(ClientError) as exc:
client.detect_pii_entities(Text=sample_text, LanguageCode=language)
# Verify
err = exc.value.response["Error"]
assert err["Code"] == "ValidationException"
assert (
err["Message"]
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
f"Member must satisfy enum value set: [en]"
)
@mock_comprehend
def test_detect_pii_entities_text_too_large():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
size = 100001
sample_text = "x" * size
language = "en"
# Execute
with pytest.raises(ClientError) as exc:
client.detect_pii_entities(Text=sample_text, LanguageCode=language)
# Verify
err = exc.value.response["Error"]
assert err["Code"] == "TextSizeLimitExceededException"
assert (
err["Message"]
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes "
f"while in this request the text size is {size} bytes"
)
@mock_comprehend
def test_detect_key_phrases():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
sample_text = "Doesn't matter what we send, we will get a canned response"
# Execute
result = client.detect_key_phrases(Text=sample_text, LanguageCode="en")
# Verify
assert "KeyPhrases" in result
assert result["KeyPhrases"] == CANNED_PHRASES_RESPONSE
@mock_comprehend
def test_detect_key_phrases_invalid_languages():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
sample_text = "Doesn't matter what we send, we will get a canned response"
language = "blah"
# Execute
with pytest.raises(ClientError) as exc:
client.detect_key_phrases(Text=sample_text, LanguageCode=language)
# Verify
err = exc.value.response["Error"]
assert err["Code"] == "ValidationException"
assert (
err["Message"]
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
f"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]"
)
@mock_comprehend
def test_detect_detect_key_phrases_text_too_large():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
size = 100002
sample_text = "x" * size
language = "en"
# Execute
with pytest.raises(ClientError) as exc:
client.detect_key_phrases(Text=sample_text, LanguageCode=language)
# Verify
err = exc.value.response["Error"]
assert err["Code"] == "TextSizeLimitExceededException"
assert (
err["Message"]
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes "
f"while in this request the text size is {size} bytes"
)
@mock_comprehend
def test_detect_sentiment():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
sample_text = "Doesn't matter what we send, we will get a canned response"
# Execute
result = client.detect_sentiment(Text=sample_text, LanguageCode="en")
# Verify
del result["ResponseMetadata"]
assert result == CANNED_SENTIMENT_RESPONSE
@mock_comprehend
def test_detect_sentiment_invalid_languages():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
sample_text = "Doesn't matter what we send, we will get a canned response"
language = "blah"
# Execute
with pytest.raises(ClientError) as exc:
client.detect_sentiment(Text=sample_text, LanguageCode=language)
# Verify
err = exc.value.response["Error"]
assert err["Code"] == "ValidationException"
assert (
err["Message"]
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]"
)
@mock_comprehend
def test_detect_sentiment_text_too_large():
# Setup
client = boto3.client("comprehend", region_name="eu-west-1")
size = 5001
sample_text = "x" * size
language = "en"
# Execute
with pytest.raises(ClientError) as exc:
client.detect_sentiment(Text=sample_text, LanguageCode=language)
# Verify
err = exc.value.response["Error"]
assert err["Code"] == "TextSizeLimitExceededException"
assert (
err["Message"]
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes while "
f"in this request the text size is {size} bytes"
)