Comprehend: implement canned responses for detect_* functions (#6319)
This commit is contained in:
parent
25a0fee17b
commit
7379b46460
@ -1197,7 +1197,7 @@
|
||||
|
||||
## comprehend
|
||||
<details>
|
||||
<summary>9% implemented</summary>
|
||||
<summary>13% implemented</summary>
|
||||
|
||||
- [ ] batch_detect_dominant_language
|
||||
- [ ] batch_detect_entities
|
||||
@ -1235,9 +1235,9 @@
|
||||
- [ ] describe_topics_detection_job
|
||||
- [ ] detect_dominant_language
|
||||
- [ ] detect_entities
|
||||
- [ ] detect_key_phrases
|
||||
- [ ] detect_pii_entities
|
||||
- [ ] detect_sentiment
|
||||
- [X] detect_key_phrases
|
||||
- [X] detect_pii_entities
|
||||
- [X] detect_sentiment
|
||||
- [ ] detect_syntax
|
||||
- [ ] detect_targeted_sentiment
|
||||
- [ ] import_model
|
||||
|
@ -67,9 +67,9 @@ comprehend
|
||||
- [ ] describe_topics_detection_job
|
||||
- [ ] detect_dominant_language
|
||||
- [ ] detect_entities
|
||||
- [ ] detect_key_phrases
|
||||
- [ ] detect_pii_entities
|
||||
- [ ] detect_sentiment
|
||||
- [X] detect_key_phrases
|
||||
- [X] detect_pii_entities
|
||||
- [X] detect_sentiment
|
||||
- [ ] detect_syntax
|
||||
- [ ] detect_targeted_sentiment
|
||||
- [ ] import_model
|
||||
|
@ -1,5 +1,6 @@
|
||||
"""Exceptions raised by the comprehend service."""
|
||||
from moto.core.exceptions import JsonRESTError
|
||||
from typing import List
|
||||
|
||||
|
||||
class ResourceNotFound(JsonRESTError):
|
||||
@ -8,3 +9,22 @@ class ResourceNotFound(JsonRESTError):
|
||||
"ResourceNotFoundException",
|
||||
"RESOURCE_NOT_FOUND: Could not find specified resource.",
|
||||
)
|
||||
|
||||
|
||||
class DetectPIIValidationException(JsonRESTError):
|
||||
def __init__(self, language: str, all_languages: List[str]) -> None:
|
||||
all_languages_str = str(all_languages).replace("'", "")
|
||||
super().__init__(
|
||||
"ValidationException",
|
||||
f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||
f"Member must satisfy enum value set: {all_languages_str}",
|
||||
)
|
||||
|
||||
|
||||
class TextSizeLimitExceededException(JsonRESTError):
|
||||
def __init__(self, size: int) -> None:
|
||||
super().__init__(
|
||||
"TextSizeLimitExceededException",
|
||||
"Input text size exceeds limit. Max length of request text allowed is 100000 bytes while in "
|
||||
f"this request the text size is {size} bytes",
|
||||
)
|
||||
|
@ -2,9 +2,62 @@
|
||||
|
||||
from moto.core import BaseBackend, BackendDict, BaseModel
|
||||
from moto.utilities.tagging_service import TaggingService
|
||||
from .exceptions import ResourceNotFound
|
||||
from .exceptions import (
|
||||
ResourceNotFound,
|
||||
DetectPIIValidationException,
|
||||
TextSizeLimitExceededException,
|
||||
)
|
||||
from typing import Any, Dict, List, Iterable
|
||||
|
||||
CANNED_DETECT_RESPONSE = [
|
||||
{
|
||||
"Score": 0.9999890923500061,
|
||||
"Type": "NAME",
|
||||
"BeginOffset": 50,
|
||||
"EndOffset": 58,
|
||||
},
|
||||
{
|
||||
"Score": 0.9999966621398926,
|
||||
"Type": "EMAIL",
|
||||
"BeginOffset": 230,
|
||||
"EndOffset": 259,
|
||||
},
|
||||
{
|
||||
"Score": 0.9999954700469971,
|
||||
"Type": "BANK_ACCOUNT_NUMBER",
|
||||
"BeginOffset": 334,
|
||||
"EndOffset": 349,
|
||||
},
|
||||
]
|
||||
|
||||
CANNED_PHRASES_RESPONSE = [
|
||||
{
|
||||
"Score": 0.9999890923500061,
|
||||
"BeginOffset": 50,
|
||||
"EndOffset": 58,
|
||||
},
|
||||
{
|
||||
"Score": 0.9999966621398926,
|
||||
"BeginOffset": 230,
|
||||
"EndOffset": 259,
|
||||
},
|
||||
{
|
||||
"Score": 0.9999954700469971,
|
||||
"BeginOffset": 334,
|
||||
"EndOffset": 349,
|
||||
},
|
||||
]
|
||||
|
||||
CANNED_SENTIMENT_RESPONSE = {
|
||||
"Sentiment": "NEUTRAL",
|
||||
"SentimentScore": {
|
||||
"Positive": 0.008101312443614006,
|
||||
"Negative": 0.0002824589901138097,
|
||||
"Neutral": 0.9916020035743713,
|
||||
"Mixed": 1.4156351426208857e-05,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class EntityRecognizer(BaseModel):
|
||||
def __init__(
|
||||
@ -53,6 +106,24 @@ class EntityRecognizer(BaseModel):
|
||||
class ComprehendBackend(BaseBackend):
|
||||
"""Implementation of Comprehend APIs."""
|
||||
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_key_phrases.html
|
||||
detect_key_phrases_languages = [
|
||||
"ar",
|
||||
"hi",
|
||||
"ko",
|
||||
"zh-TW",
|
||||
"ja",
|
||||
"zh",
|
||||
"de",
|
||||
"pt",
|
||||
"en",
|
||||
"it",
|
||||
"fr",
|
||||
"es",
|
||||
]
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_pii_entities.html
|
||||
detect_pii_entities_languages = ["en"]
|
||||
|
||||
def __init__(self, region_name: str, account_id: str):
|
||||
super().__init__(region_name, account_id)
|
||||
self.recognizers: Dict[str, EntityRecognizer] = dict()
|
||||
@ -130,5 +201,35 @@ class ComprehendBackend(BaseBackend):
|
||||
def untag_resource(self, resource_arn: str, tag_keys: List[str]) -> None:
|
||||
self.tagger.untag_resource_using_names(resource_arn, tag_keys)
|
||||
|
||||
def detect_pii_entities(self, text: str, language: str) -> List[Dict[str, Any]]:
|
||||
if language not in self.detect_pii_entities_languages:
|
||||
raise DetectPIIValidationException(
|
||||
language, self.detect_pii_entities_languages
|
||||
)
|
||||
text_size = len(text)
|
||||
if text_size > 100000:
|
||||
raise TextSizeLimitExceededException(text_size)
|
||||
return CANNED_DETECT_RESPONSE
|
||||
|
||||
def detect_key_phrases(self, text: str, language: str) -> List[Dict[str, Any]]:
|
||||
if language not in self.detect_key_phrases_languages:
|
||||
raise DetectPIIValidationException(
|
||||
language, self.detect_key_phrases_languages
|
||||
)
|
||||
text_size = len(text)
|
||||
if text_size > 100000:
|
||||
raise TextSizeLimitExceededException(text_size)
|
||||
return CANNED_PHRASES_RESPONSE
|
||||
|
||||
def detect_sentiment(self, text: str, language: str) -> Dict[str, Any]:
|
||||
if language not in self.detect_key_phrases_languages:
|
||||
raise DetectPIIValidationException(
|
||||
language, self.detect_key_phrases_languages
|
||||
)
|
||||
text_size = len(text)
|
||||
if text_size > 5000:
|
||||
raise TextSizeLimitExceededException(text_size)
|
||||
return CANNED_SENTIMENT_RESPONSE
|
||||
|
||||
|
||||
comprehend_backends = BackendDict(ComprehendBackend, "comprehend")
|
||||
|
@ -95,3 +95,24 @@ class ComprehendResponse(BaseResponse):
|
||||
tag_keys = params.get("TagKeys")
|
||||
self.comprehend_backend.untag_resource(resource_arn, tag_keys)
|
||||
return "{}"
|
||||
|
||||
def detect_pii_entities(self) -> str:
|
||||
params = json.loads(self.body)
|
||||
text = params.get("Text")
|
||||
language = params.get("LanguageCode")
|
||||
resp = self.comprehend_backend.detect_pii_entities(text, language)
|
||||
return json.dumps(dict(Entities=resp))
|
||||
|
||||
def detect_key_phrases(self) -> str:
|
||||
params = json.loads(self.body)
|
||||
text = params.get("Text")
|
||||
language = params.get("LanguageCode")
|
||||
resp = self.comprehend_backend.detect_key_phrases(text, language)
|
||||
return json.dumps(dict(KeyPhrases=resp))
|
||||
|
||||
def detect_sentiment(self) -> str:
|
||||
params = json.loads(self.body)
|
||||
text = params.get("Text")
|
||||
language = params.get("LanguageCode")
|
||||
resp = self.comprehend_backend.detect_sentiment(text, language)
|
||||
return json.dumps(resp)
|
||||
|
@ -4,6 +4,11 @@ import pytest
|
||||
import sure # noqa # pylint: disable=unused-import
|
||||
from botocore.exceptions import ClientError
|
||||
from moto import mock_comprehend
|
||||
from moto.comprehend.models import (
|
||||
CANNED_DETECT_RESPONSE,
|
||||
CANNED_PHRASES_RESPONSE,
|
||||
CANNED_SENTIMENT_RESPONSE,
|
||||
)
|
||||
|
||||
# See our Development Tips on writing tests for hints on how to write good tests:
|
||||
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
||||
@ -198,3 +203,174 @@ def test_delete_entity_recognizer():
|
||||
err["Message"].should.equal(
|
||||
"RESOURCE_NOT_FOUND: Could not find specified resource."
|
||||
)
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_pii_entities():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||
|
||||
# Execute
|
||||
result = client.detect_pii_entities(Text=sample_text, LanguageCode="en")
|
||||
|
||||
# Verify
|
||||
assert "Entities" in result
|
||||
assert result["Entities"] == CANNED_DETECT_RESPONSE
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_pii_entities_invalid_languages():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||
language = "es"
|
||||
|
||||
# Execute
|
||||
with pytest.raises(ClientError) as exc:
|
||||
client.detect_pii_entities(Text=sample_text, LanguageCode=language)
|
||||
|
||||
# Verify
|
||||
err = exc.value.response["Error"]
|
||||
assert err["Code"] == "ValidationException"
|
||||
assert (
|
||||
err["Message"]
|
||||
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||
f"Member must satisfy enum value set: [en]"
|
||||
)
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_pii_entities_text_too_large():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
size = 100001
|
||||
sample_text = "x" * size
|
||||
language = "en"
|
||||
|
||||
# Execute
|
||||
with pytest.raises(ClientError) as exc:
|
||||
client.detect_pii_entities(Text=sample_text, LanguageCode=language)
|
||||
|
||||
# Verify
|
||||
err = exc.value.response["Error"]
|
||||
assert err["Code"] == "TextSizeLimitExceededException"
|
||||
assert (
|
||||
err["Message"]
|
||||
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes "
|
||||
f"while in this request the text size is {size} bytes"
|
||||
)
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_key_phrases():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||
|
||||
# Execute
|
||||
result = client.detect_key_phrases(Text=sample_text, LanguageCode="en")
|
||||
|
||||
# Verify
|
||||
assert "KeyPhrases" in result
|
||||
assert result["KeyPhrases"] == CANNED_PHRASES_RESPONSE
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_key_phrases_invalid_languages():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||
language = "blah"
|
||||
|
||||
# Execute
|
||||
with pytest.raises(ClientError) as exc:
|
||||
client.detect_key_phrases(Text=sample_text, LanguageCode=language)
|
||||
|
||||
# Verify
|
||||
err = exc.value.response["Error"]
|
||||
assert err["Code"] == "ValidationException"
|
||||
assert (
|
||||
err["Message"]
|
||||
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||
f"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]"
|
||||
)
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_detect_key_phrases_text_too_large():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
size = 100002
|
||||
sample_text = "x" * size
|
||||
language = "en"
|
||||
|
||||
# Execute
|
||||
with pytest.raises(ClientError) as exc:
|
||||
client.detect_key_phrases(Text=sample_text, LanguageCode=language)
|
||||
|
||||
# Verify
|
||||
err = exc.value.response["Error"]
|
||||
assert err["Code"] == "TextSizeLimitExceededException"
|
||||
assert (
|
||||
err["Message"]
|
||||
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes "
|
||||
f"while in this request the text size is {size} bytes"
|
||||
)
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_sentiment():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||
|
||||
# Execute
|
||||
result = client.detect_sentiment(Text=sample_text, LanguageCode="en")
|
||||
|
||||
# Verify
|
||||
del result["ResponseMetadata"]
|
||||
assert result == CANNED_SENTIMENT_RESPONSE
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_sentiment_invalid_languages():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||
language = "blah"
|
||||
|
||||
# Execute
|
||||
with pytest.raises(ClientError) as exc:
|
||||
client.detect_sentiment(Text=sample_text, LanguageCode=language)
|
||||
|
||||
# Verify
|
||||
err = exc.value.response["Error"]
|
||||
assert err["Code"] == "ValidationException"
|
||||
assert (
|
||||
err["Message"]
|
||||
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||
"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]"
|
||||
)
|
||||
|
||||
|
||||
@mock_comprehend
|
||||
def test_detect_sentiment_text_too_large():
|
||||
# Setup
|
||||
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||
size = 5001
|
||||
sample_text = "x" * size
|
||||
language = "en"
|
||||
|
||||
# Execute
|
||||
with pytest.raises(ClientError) as exc:
|
||||
client.detect_sentiment(Text=sample_text, LanguageCode=language)
|
||||
|
||||
# Verify
|
||||
err = exc.value.response["Error"]
|
||||
assert err["Code"] == "TextSizeLimitExceededException"
|
||||
assert (
|
||||
err["Message"]
|
||||
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes while "
|
||||
f"in this request the text size is {size} bytes"
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user