Comprehend: implement canned responses for detect_* functions (#6319)
This commit is contained in:
parent
25a0fee17b
commit
7379b46460
@ -1197,7 +1197,7 @@
|
|||||||
|
|
||||||
## comprehend
|
## comprehend
|
||||||
<details>
|
<details>
|
||||||
<summary>9% implemented</summary>
|
<summary>13% implemented</summary>
|
||||||
|
|
||||||
- [ ] batch_detect_dominant_language
|
- [ ] batch_detect_dominant_language
|
||||||
- [ ] batch_detect_entities
|
- [ ] batch_detect_entities
|
||||||
@ -1235,9 +1235,9 @@
|
|||||||
- [ ] describe_topics_detection_job
|
- [ ] describe_topics_detection_job
|
||||||
- [ ] detect_dominant_language
|
- [ ] detect_dominant_language
|
||||||
- [ ] detect_entities
|
- [ ] detect_entities
|
||||||
- [ ] detect_key_phrases
|
- [X] detect_key_phrases
|
||||||
- [ ] detect_pii_entities
|
- [X] detect_pii_entities
|
||||||
- [ ] detect_sentiment
|
- [X] detect_sentiment
|
||||||
- [ ] detect_syntax
|
- [ ] detect_syntax
|
||||||
- [ ] detect_targeted_sentiment
|
- [ ] detect_targeted_sentiment
|
||||||
- [ ] import_model
|
- [ ] import_model
|
||||||
|
@ -67,9 +67,9 @@ comprehend
|
|||||||
- [ ] describe_topics_detection_job
|
- [ ] describe_topics_detection_job
|
||||||
- [ ] detect_dominant_language
|
- [ ] detect_dominant_language
|
||||||
- [ ] detect_entities
|
- [ ] detect_entities
|
||||||
- [ ] detect_key_phrases
|
- [X] detect_key_phrases
|
||||||
- [ ] detect_pii_entities
|
- [X] detect_pii_entities
|
||||||
- [ ] detect_sentiment
|
- [X] detect_sentiment
|
||||||
- [ ] detect_syntax
|
- [ ] detect_syntax
|
||||||
- [ ] detect_targeted_sentiment
|
- [ ] detect_targeted_sentiment
|
||||||
- [ ] import_model
|
- [ ] import_model
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
"""Exceptions raised by the comprehend service."""
|
"""Exceptions raised by the comprehend service."""
|
||||||
from moto.core.exceptions import JsonRESTError
|
from moto.core.exceptions import JsonRESTError
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
class ResourceNotFound(JsonRESTError):
|
class ResourceNotFound(JsonRESTError):
|
||||||
@ -8,3 +9,22 @@ class ResourceNotFound(JsonRESTError):
|
|||||||
"ResourceNotFoundException",
|
"ResourceNotFoundException",
|
||||||
"RESOURCE_NOT_FOUND: Could not find specified resource.",
|
"RESOURCE_NOT_FOUND: Could not find specified resource.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DetectPIIValidationException(JsonRESTError):
|
||||||
|
def __init__(self, language: str, all_languages: List[str]) -> None:
|
||||||
|
all_languages_str = str(all_languages).replace("'", "")
|
||||||
|
super().__init__(
|
||||||
|
"ValidationException",
|
||||||
|
f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||||
|
f"Member must satisfy enum value set: {all_languages_str}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TextSizeLimitExceededException(JsonRESTError):
|
||||||
|
def __init__(self, size: int) -> None:
|
||||||
|
super().__init__(
|
||||||
|
"TextSizeLimitExceededException",
|
||||||
|
"Input text size exceeds limit. Max length of request text allowed is 100000 bytes while in "
|
||||||
|
f"this request the text size is {size} bytes",
|
||||||
|
)
|
||||||
|
@ -2,9 +2,62 @@
|
|||||||
|
|
||||||
from moto.core import BaseBackend, BackendDict, BaseModel
|
from moto.core import BaseBackend, BackendDict, BaseModel
|
||||||
from moto.utilities.tagging_service import TaggingService
|
from moto.utilities.tagging_service import TaggingService
|
||||||
from .exceptions import ResourceNotFound
|
from .exceptions import (
|
||||||
|
ResourceNotFound,
|
||||||
|
DetectPIIValidationException,
|
||||||
|
TextSizeLimitExceededException,
|
||||||
|
)
|
||||||
from typing import Any, Dict, List, Iterable
|
from typing import Any, Dict, List, Iterable
|
||||||
|
|
||||||
|
CANNED_DETECT_RESPONSE = [
|
||||||
|
{
|
||||||
|
"Score": 0.9999890923500061,
|
||||||
|
"Type": "NAME",
|
||||||
|
"BeginOffset": 50,
|
||||||
|
"EndOffset": 58,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Score": 0.9999966621398926,
|
||||||
|
"Type": "EMAIL",
|
||||||
|
"BeginOffset": 230,
|
||||||
|
"EndOffset": 259,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Score": 0.9999954700469971,
|
||||||
|
"Type": "BANK_ACCOUNT_NUMBER",
|
||||||
|
"BeginOffset": 334,
|
||||||
|
"EndOffset": 349,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
CANNED_PHRASES_RESPONSE = [
|
||||||
|
{
|
||||||
|
"Score": 0.9999890923500061,
|
||||||
|
"BeginOffset": 50,
|
||||||
|
"EndOffset": 58,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Score": 0.9999966621398926,
|
||||||
|
"BeginOffset": 230,
|
||||||
|
"EndOffset": 259,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Score": 0.9999954700469971,
|
||||||
|
"BeginOffset": 334,
|
||||||
|
"EndOffset": 349,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
CANNED_SENTIMENT_RESPONSE = {
|
||||||
|
"Sentiment": "NEUTRAL",
|
||||||
|
"SentimentScore": {
|
||||||
|
"Positive": 0.008101312443614006,
|
||||||
|
"Negative": 0.0002824589901138097,
|
||||||
|
"Neutral": 0.9916020035743713,
|
||||||
|
"Mixed": 1.4156351426208857e-05,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class EntityRecognizer(BaseModel):
|
class EntityRecognizer(BaseModel):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -53,6 +106,24 @@ class EntityRecognizer(BaseModel):
|
|||||||
class ComprehendBackend(BaseBackend):
|
class ComprehendBackend(BaseBackend):
|
||||||
"""Implementation of Comprehend APIs."""
|
"""Implementation of Comprehend APIs."""
|
||||||
|
|
||||||
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_key_phrases.html
|
||||||
|
detect_key_phrases_languages = [
|
||||||
|
"ar",
|
||||||
|
"hi",
|
||||||
|
"ko",
|
||||||
|
"zh-TW",
|
||||||
|
"ja",
|
||||||
|
"zh",
|
||||||
|
"de",
|
||||||
|
"pt",
|
||||||
|
"en",
|
||||||
|
"it",
|
||||||
|
"fr",
|
||||||
|
"es",
|
||||||
|
]
|
||||||
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_pii_entities.html
|
||||||
|
detect_pii_entities_languages = ["en"]
|
||||||
|
|
||||||
def __init__(self, region_name: str, account_id: str):
|
def __init__(self, region_name: str, account_id: str):
|
||||||
super().__init__(region_name, account_id)
|
super().__init__(region_name, account_id)
|
||||||
self.recognizers: Dict[str, EntityRecognizer] = dict()
|
self.recognizers: Dict[str, EntityRecognizer] = dict()
|
||||||
@ -130,5 +201,35 @@ class ComprehendBackend(BaseBackend):
|
|||||||
def untag_resource(self, resource_arn: str, tag_keys: List[str]) -> None:
|
def untag_resource(self, resource_arn: str, tag_keys: List[str]) -> None:
|
||||||
self.tagger.untag_resource_using_names(resource_arn, tag_keys)
|
self.tagger.untag_resource_using_names(resource_arn, tag_keys)
|
||||||
|
|
||||||
|
def detect_pii_entities(self, text: str, language: str) -> List[Dict[str, Any]]:
|
||||||
|
if language not in self.detect_pii_entities_languages:
|
||||||
|
raise DetectPIIValidationException(
|
||||||
|
language, self.detect_pii_entities_languages
|
||||||
|
)
|
||||||
|
text_size = len(text)
|
||||||
|
if text_size > 100000:
|
||||||
|
raise TextSizeLimitExceededException(text_size)
|
||||||
|
return CANNED_DETECT_RESPONSE
|
||||||
|
|
||||||
|
def detect_key_phrases(self, text: str, language: str) -> List[Dict[str, Any]]:
|
||||||
|
if language not in self.detect_key_phrases_languages:
|
||||||
|
raise DetectPIIValidationException(
|
||||||
|
language, self.detect_key_phrases_languages
|
||||||
|
)
|
||||||
|
text_size = len(text)
|
||||||
|
if text_size > 100000:
|
||||||
|
raise TextSizeLimitExceededException(text_size)
|
||||||
|
return CANNED_PHRASES_RESPONSE
|
||||||
|
|
||||||
|
def detect_sentiment(self, text: str, language: str) -> Dict[str, Any]:
|
||||||
|
if language not in self.detect_key_phrases_languages:
|
||||||
|
raise DetectPIIValidationException(
|
||||||
|
language, self.detect_key_phrases_languages
|
||||||
|
)
|
||||||
|
text_size = len(text)
|
||||||
|
if text_size > 5000:
|
||||||
|
raise TextSizeLimitExceededException(text_size)
|
||||||
|
return CANNED_SENTIMENT_RESPONSE
|
||||||
|
|
||||||
|
|
||||||
comprehend_backends = BackendDict(ComprehendBackend, "comprehend")
|
comprehend_backends = BackendDict(ComprehendBackend, "comprehend")
|
||||||
|
@ -95,3 +95,24 @@ class ComprehendResponse(BaseResponse):
|
|||||||
tag_keys = params.get("TagKeys")
|
tag_keys = params.get("TagKeys")
|
||||||
self.comprehend_backend.untag_resource(resource_arn, tag_keys)
|
self.comprehend_backend.untag_resource(resource_arn, tag_keys)
|
||||||
return "{}"
|
return "{}"
|
||||||
|
|
||||||
|
def detect_pii_entities(self) -> str:
|
||||||
|
params = json.loads(self.body)
|
||||||
|
text = params.get("Text")
|
||||||
|
language = params.get("LanguageCode")
|
||||||
|
resp = self.comprehend_backend.detect_pii_entities(text, language)
|
||||||
|
return json.dumps(dict(Entities=resp))
|
||||||
|
|
||||||
|
def detect_key_phrases(self) -> str:
|
||||||
|
params = json.loads(self.body)
|
||||||
|
text = params.get("Text")
|
||||||
|
language = params.get("LanguageCode")
|
||||||
|
resp = self.comprehend_backend.detect_key_phrases(text, language)
|
||||||
|
return json.dumps(dict(KeyPhrases=resp))
|
||||||
|
|
||||||
|
def detect_sentiment(self) -> str:
|
||||||
|
params = json.loads(self.body)
|
||||||
|
text = params.get("Text")
|
||||||
|
language = params.get("LanguageCode")
|
||||||
|
resp = self.comprehend_backend.detect_sentiment(text, language)
|
||||||
|
return json.dumps(resp)
|
||||||
|
@ -4,6 +4,11 @@ import pytest
|
|||||||
import sure # noqa # pylint: disable=unused-import
|
import sure # noqa # pylint: disable=unused-import
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
from moto import mock_comprehend
|
from moto import mock_comprehend
|
||||||
|
from moto.comprehend.models import (
|
||||||
|
CANNED_DETECT_RESPONSE,
|
||||||
|
CANNED_PHRASES_RESPONSE,
|
||||||
|
CANNED_SENTIMENT_RESPONSE,
|
||||||
|
)
|
||||||
|
|
||||||
# See our Development Tips on writing tests for hints on how to write good tests:
|
# See our Development Tips on writing tests for hints on how to write good tests:
|
||||||
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
# http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html
|
||||||
@ -198,3 +203,174 @@ def test_delete_entity_recognizer():
|
|||||||
err["Message"].should.equal(
|
err["Message"].should.equal(
|
||||||
"RESOURCE_NOT_FOUND: Could not find specified resource."
|
"RESOURCE_NOT_FOUND: Could not find specified resource."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_pii_entities():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = client.detect_pii_entities(Text=sample_text, LanguageCode="en")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert "Entities" in result
|
||||||
|
assert result["Entities"] == CANNED_DETECT_RESPONSE
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_pii_entities_invalid_languages():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||||
|
language = "es"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.detect_pii_entities(Text=sample_text, LanguageCode=language)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
err = exc.value.response["Error"]
|
||||||
|
assert err["Code"] == "ValidationException"
|
||||||
|
assert (
|
||||||
|
err["Message"]
|
||||||
|
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||||
|
f"Member must satisfy enum value set: [en]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_pii_entities_text_too_large():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
size = 100001
|
||||||
|
sample_text = "x" * size
|
||||||
|
language = "en"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.detect_pii_entities(Text=sample_text, LanguageCode=language)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
err = exc.value.response["Error"]
|
||||||
|
assert err["Code"] == "TextSizeLimitExceededException"
|
||||||
|
assert (
|
||||||
|
err["Message"]
|
||||||
|
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes "
|
||||||
|
f"while in this request the text size is {size} bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_key_phrases():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = client.detect_key_phrases(Text=sample_text, LanguageCode="en")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert "KeyPhrases" in result
|
||||||
|
assert result["KeyPhrases"] == CANNED_PHRASES_RESPONSE
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_key_phrases_invalid_languages():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||||
|
language = "blah"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.detect_key_phrases(Text=sample_text, LanguageCode=language)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
err = exc.value.response["Error"]
|
||||||
|
assert err["Code"] == "ValidationException"
|
||||||
|
assert (
|
||||||
|
err["Message"]
|
||||||
|
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||||
|
f"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_detect_key_phrases_text_too_large():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
size = 100002
|
||||||
|
sample_text = "x" * size
|
||||||
|
language = "en"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.detect_key_phrases(Text=sample_text, LanguageCode=language)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
err = exc.value.response["Error"]
|
||||||
|
assert err["Code"] == "TextSizeLimitExceededException"
|
||||||
|
assert (
|
||||||
|
err["Message"]
|
||||||
|
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes "
|
||||||
|
f"while in this request the text size is {size} bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_sentiment():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = client.detect_sentiment(Text=sample_text, LanguageCode="en")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
del result["ResponseMetadata"]
|
||||||
|
assert result == CANNED_SENTIMENT_RESPONSE
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_sentiment_invalid_languages():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
sample_text = "Doesn't matter what we send, we will get a canned response"
|
||||||
|
language = "blah"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.detect_sentiment(Text=sample_text, LanguageCode=language)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
err = exc.value.response["Error"]
|
||||||
|
assert err["Code"] == "ValidationException"
|
||||||
|
assert (
|
||||||
|
err["Message"]
|
||||||
|
== f"Value '{language}' at 'languageCode'failed to satisfy constraint: "
|
||||||
|
"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_comprehend
|
||||||
|
def test_detect_sentiment_text_too_large():
|
||||||
|
# Setup
|
||||||
|
client = boto3.client("comprehend", region_name="eu-west-1")
|
||||||
|
size = 5001
|
||||||
|
sample_text = "x" * size
|
||||||
|
language = "en"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.detect_sentiment(Text=sample_text, LanguageCode=language)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
err = exc.value.response["Error"]
|
||||||
|
assert err["Code"] == "TextSizeLimitExceededException"
|
||||||
|
assert (
|
||||||
|
err["Message"]
|
||||||
|
== "Input text size exceeds limit. Max length of request text allowed is 100000 bytes while "
|
||||||
|
f"in this request the text size is {size} bytes"
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user