From 7379b46460bf056406913736c4f48abda2d94929 Mon Sep 17 00:00:00 2001 From: rafcio19 Date: Thu, 25 May 2023 11:46:40 +0100 Subject: [PATCH] Comprehend: implement canned responses for detect_* functions (#6319) --- IMPLEMENTATION_COVERAGE.md | 8 +- docs/docs/services/comprehend.rst | 6 +- moto/comprehend/exceptions.py | 20 +++ moto/comprehend/models.py | 103 ++++++++++++- moto/comprehend/responses.py | 21 +++ tests/test_comprehend/test_comprehend.py | 176 +++++++++++++++++++++++ 6 files changed, 326 insertions(+), 8 deletions(-) diff --git a/IMPLEMENTATION_COVERAGE.md b/IMPLEMENTATION_COVERAGE.md index 6243655d0..a741212a0 100644 --- a/IMPLEMENTATION_COVERAGE.md +++ b/IMPLEMENTATION_COVERAGE.md @@ -1197,7 +1197,7 @@ ## comprehend
-9% implemented +13% implemented - [ ] batch_detect_dominant_language - [ ] batch_detect_entities @@ -1235,9 +1235,9 @@ - [ ] describe_topics_detection_job - [ ] detect_dominant_language - [ ] detect_entities -- [ ] detect_key_phrases -- [ ] detect_pii_entities -- [ ] detect_sentiment +- [X] detect_key_phrases +- [X] detect_pii_entities +- [X] detect_sentiment - [ ] detect_syntax - [ ] detect_targeted_sentiment - [ ] import_model diff --git a/docs/docs/services/comprehend.rst b/docs/docs/services/comprehend.rst index 47963412b..4b477a9c7 100644 --- a/docs/docs/services/comprehend.rst +++ b/docs/docs/services/comprehend.rst @@ -67,9 +67,9 @@ comprehend - [ ] describe_topics_detection_job - [ ] detect_dominant_language - [ ] detect_entities -- [ ] detect_key_phrases -- [ ] detect_pii_entities -- [ ] detect_sentiment +- [X] detect_key_phrases +- [X] detect_pii_entities +- [X] detect_sentiment - [ ] detect_syntax - [ ] detect_targeted_sentiment - [ ] import_model diff --git a/moto/comprehend/exceptions.py b/moto/comprehend/exceptions.py index c4031b203..0530214af 100644 --- a/moto/comprehend/exceptions.py +++ b/moto/comprehend/exceptions.py @@ -1,5 +1,6 @@ """Exceptions raised by the comprehend service.""" from moto.core.exceptions import JsonRESTError +from typing import List class ResourceNotFound(JsonRESTError): @@ -8,3 +9,22 @@ class ResourceNotFound(JsonRESTError): "ResourceNotFoundException", "RESOURCE_NOT_FOUND: Could not find specified resource.", ) + + +class DetectPIIValidationException(JsonRESTError): + def __init__(self, language: str, all_languages: List[str]) -> None: + all_languages_str = str(all_languages).replace("'", "") + super().__init__( + "ValidationException", + f"Value '{language}' at 'languageCode'failed to satisfy constraint: " + f"Member must satisfy enum value set: {all_languages_str}", + ) + + +class TextSizeLimitExceededException(JsonRESTError): + def __init__(self, size: int) -> None: + super().__init__( + "TextSizeLimitExceededException", + "Input text size exceeds limit. Max length of request text allowed is 100000 bytes while in " + f"this request the text size is {size} bytes", + ) diff --git a/moto/comprehend/models.py b/moto/comprehend/models.py index c33b72b4c..c06dac041 100644 --- a/moto/comprehend/models.py +++ b/moto/comprehend/models.py @@ -2,9 +2,62 @@ from moto.core import BaseBackend, BackendDict, BaseModel from moto.utilities.tagging_service import TaggingService -from .exceptions import ResourceNotFound +from .exceptions import ( + ResourceNotFound, + DetectPIIValidationException, + TextSizeLimitExceededException, +) from typing import Any, Dict, List, Iterable +CANNED_DETECT_RESPONSE = [ + { + "Score": 0.9999890923500061, + "Type": "NAME", + "BeginOffset": 50, + "EndOffset": 58, + }, + { + "Score": 0.9999966621398926, + "Type": "EMAIL", + "BeginOffset": 230, + "EndOffset": 259, + }, + { + "Score": 0.9999954700469971, + "Type": "BANK_ACCOUNT_NUMBER", + "BeginOffset": 334, + "EndOffset": 349, + }, +] + +CANNED_PHRASES_RESPONSE = [ + { + "Score": 0.9999890923500061, + "BeginOffset": 50, + "EndOffset": 58, + }, + { + "Score": 0.9999966621398926, + "BeginOffset": 230, + "EndOffset": 259, + }, + { + "Score": 0.9999954700469971, + "BeginOffset": 334, + "EndOffset": 349, + }, +] + +CANNED_SENTIMENT_RESPONSE = { + "Sentiment": "NEUTRAL", + "SentimentScore": { + "Positive": 0.008101312443614006, + "Negative": 0.0002824589901138097, + "Neutral": 0.9916020035743713, + "Mixed": 1.4156351426208857e-05, + }, +} + class EntityRecognizer(BaseModel): def __init__( @@ -53,6 +106,24 @@ class EntityRecognizer(BaseModel): class ComprehendBackend(BaseBackend): """Implementation of Comprehend APIs.""" + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_key_phrases.html + detect_key_phrases_languages = [ + "ar", + "hi", + "ko", + "zh-TW", + "ja", + "zh", + "de", + "pt", + "en", + "it", + "fr", + "es", + ] + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend/client/detect_pii_entities.html + detect_pii_entities_languages = ["en"] + def __init__(self, region_name: str, account_id: str): super().__init__(region_name, account_id) self.recognizers: Dict[str, EntityRecognizer] = dict() @@ -130,5 +201,35 @@ class ComprehendBackend(BaseBackend): def untag_resource(self, resource_arn: str, tag_keys: List[str]) -> None: self.tagger.untag_resource_using_names(resource_arn, tag_keys) + def detect_pii_entities(self, text: str, language: str) -> List[Dict[str, Any]]: + if language not in self.detect_pii_entities_languages: + raise DetectPIIValidationException( + language, self.detect_pii_entities_languages + ) + text_size = len(text) + if text_size > 100000: + raise TextSizeLimitExceededException(text_size) + return CANNED_DETECT_RESPONSE + + def detect_key_phrases(self, text: str, language: str) -> List[Dict[str, Any]]: + if language not in self.detect_key_phrases_languages: + raise DetectPIIValidationException( + language, self.detect_key_phrases_languages + ) + text_size = len(text) + if text_size > 100000: + raise TextSizeLimitExceededException(text_size) + return CANNED_PHRASES_RESPONSE + + def detect_sentiment(self, text: str, language: str) -> Dict[str, Any]: + if language not in self.detect_key_phrases_languages: + raise DetectPIIValidationException( + language, self.detect_key_phrases_languages + ) + text_size = len(text) + if text_size > 5000: + raise TextSizeLimitExceededException(text_size) + return CANNED_SENTIMENT_RESPONSE + comprehend_backends = BackendDict(ComprehendBackend, "comprehend") diff --git a/moto/comprehend/responses.py b/moto/comprehend/responses.py index 6e1d7b0b2..d439d853d 100644 --- a/moto/comprehend/responses.py +++ b/moto/comprehend/responses.py @@ -95,3 +95,24 @@ class ComprehendResponse(BaseResponse): tag_keys = params.get("TagKeys") self.comprehend_backend.untag_resource(resource_arn, tag_keys) return "{}" + + def detect_pii_entities(self) -> str: + params = json.loads(self.body) + text = params.get("Text") + language = params.get("LanguageCode") + resp = self.comprehend_backend.detect_pii_entities(text, language) + return json.dumps(dict(Entities=resp)) + + def detect_key_phrases(self) -> str: + params = json.loads(self.body) + text = params.get("Text") + language = params.get("LanguageCode") + resp = self.comprehend_backend.detect_key_phrases(text, language) + return json.dumps(dict(KeyPhrases=resp)) + + def detect_sentiment(self) -> str: + params = json.loads(self.body) + text = params.get("Text") + language = params.get("LanguageCode") + resp = self.comprehend_backend.detect_sentiment(text, language) + return json.dumps(resp) diff --git a/tests/test_comprehend/test_comprehend.py b/tests/test_comprehend/test_comprehend.py index d5f4aab8b..321a6eff1 100644 --- a/tests/test_comprehend/test_comprehend.py +++ b/tests/test_comprehend/test_comprehend.py @@ -4,6 +4,11 @@ import pytest import sure # noqa # pylint: disable=unused-import from botocore.exceptions import ClientError from moto import mock_comprehend +from moto.comprehend.models import ( + CANNED_DETECT_RESPONSE, + CANNED_PHRASES_RESPONSE, + CANNED_SENTIMENT_RESPONSE, +) # See our Development Tips on writing tests for hints on how to write good tests: # http://docs.getmoto.org/en/latest/docs/contributing/development_tips/tests.html @@ -198,3 +203,174 @@ def test_delete_entity_recognizer(): err["Message"].should.equal( "RESOURCE_NOT_FOUND: Could not find specified resource." ) + + +@mock_comprehend +def test_detect_pii_entities(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + sample_text = "Doesn't matter what we send, we will get a canned response" + + # Execute + result = client.detect_pii_entities(Text=sample_text, LanguageCode="en") + + # Verify + assert "Entities" in result + assert result["Entities"] == CANNED_DETECT_RESPONSE + + +@mock_comprehend +def test_detect_pii_entities_invalid_languages(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + sample_text = "Doesn't matter what we send, we will get a canned response" + language = "es" + + # Execute + with pytest.raises(ClientError) as exc: + client.detect_pii_entities(Text=sample_text, LanguageCode=language) + + # Verify + err = exc.value.response["Error"] + assert err["Code"] == "ValidationException" + assert ( + err["Message"] + == f"Value '{language}' at 'languageCode'failed to satisfy constraint: " + f"Member must satisfy enum value set: [en]" + ) + + +@mock_comprehend +def test_detect_pii_entities_text_too_large(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + size = 100001 + sample_text = "x" * size + language = "en" + + # Execute + with pytest.raises(ClientError) as exc: + client.detect_pii_entities(Text=sample_text, LanguageCode=language) + + # Verify + err = exc.value.response["Error"] + assert err["Code"] == "TextSizeLimitExceededException" + assert ( + err["Message"] + == "Input text size exceeds limit. Max length of request text allowed is 100000 bytes " + f"while in this request the text size is {size} bytes" + ) + + +@mock_comprehend +def test_detect_key_phrases(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + sample_text = "Doesn't matter what we send, we will get a canned response" + + # Execute + result = client.detect_key_phrases(Text=sample_text, LanguageCode="en") + + # Verify + assert "KeyPhrases" in result + assert result["KeyPhrases"] == CANNED_PHRASES_RESPONSE + + +@mock_comprehend +def test_detect_key_phrases_invalid_languages(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + sample_text = "Doesn't matter what we send, we will get a canned response" + language = "blah" + + # Execute + with pytest.raises(ClientError) as exc: + client.detect_key_phrases(Text=sample_text, LanguageCode=language) + + # Verify + err = exc.value.response["Error"] + assert err["Code"] == "ValidationException" + assert ( + err["Message"] + == f"Value '{language}' at 'languageCode'failed to satisfy constraint: " + f"Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]" + ) + + +@mock_comprehend +def test_detect_detect_key_phrases_text_too_large(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + size = 100002 + sample_text = "x" * size + language = "en" + + # Execute + with pytest.raises(ClientError) as exc: + client.detect_key_phrases(Text=sample_text, LanguageCode=language) + + # Verify + err = exc.value.response["Error"] + assert err["Code"] == "TextSizeLimitExceededException" + assert ( + err["Message"] + == "Input text size exceeds limit. Max length of request text allowed is 100000 bytes " + f"while in this request the text size is {size} bytes" + ) + + +@mock_comprehend +def test_detect_sentiment(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + sample_text = "Doesn't matter what we send, we will get a canned response" + + # Execute + result = client.detect_sentiment(Text=sample_text, LanguageCode="en") + + # Verify + del result["ResponseMetadata"] + assert result == CANNED_SENTIMENT_RESPONSE + + +@mock_comprehend +def test_detect_sentiment_invalid_languages(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + sample_text = "Doesn't matter what we send, we will get a canned response" + language = "blah" + + # Execute + with pytest.raises(ClientError) as exc: + client.detect_sentiment(Text=sample_text, LanguageCode=language) + + # Verify + err = exc.value.response["Error"] + assert err["Code"] == "ValidationException" + assert ( + err["Message"] + == f"Value '{language}' at 'languageCode'failed to satisfy constraint: " + "Member must satisfy enum value set: [ar, hi, ko, zh-TW, ja, zh, de, pt, en, it, fr, es]" + ) + + +@mock_comprehend +def test_detect_sentiment_text_too_large(): + # Setup + client = boto3.client("comprehend", region_name="eu-west-1") + size = 5001 + sample_text = "x" * size + language = "en" + + # Execute + with pytest.raises(ClientError) as exc: + client.detect_sentiment(Text=sample_text, LanguageCode=language) + + # Verify + err = exc.value.response["Error"] + assert err["Code"] == "TextSizeLimitExceededException" + assert ( + err["Message"] + == "Input text size exceeds limit. Max length of request text allowed is 100000 bytes while " + f"in this request the text size is {size} bytes" + )