diff --git a/IMPLEMENTATION_COVERAGE.md b/IMPLEMENTATION_COVERAGE.md index e90d004b2..05fb13dde 100644 --- a/IMPLEMENTATION_COVERAGE.md +++ b/IMPLEMENTATION_COVERAGE.md @@ -11150,23 +11150,23 @@ - [ ] create_call_analytics_category - [ ] create_language_model - [X] create_medical_vocabulary -- [ ] create_vocabulary +- [x] create_vocabulary - [ ] create_vocabulary_filter - [ ] delete_call_analytics_category - [ ] delete_call_analytics_job - [ ] delete_language_model - [X] delete_medical_transcription_job - [X] delete_medical_vocabulary -- [ ] delete_transcription_job -- [ ] delete_vocabulary +- [x] delete_transcription_job +- [x] delete_vocabulary - [ ] delete_vocabulary_filter - [ ] describe_language_model - [ ] get_call_analytics_category - [ ] get_call_analytics_job - [X] get_medical_transcription_job - [X] get_medical_vocabulary -- [ ] get_transcription_job -- [ ] get_vocabulary +- [x] get_transcription_job +- [x] get_vocabulary - [ ] get_vocabulary_filter - [ ] list_call_analytics_categories - [ ] list_call_analytics_jobs @@ -11174,12 +11174,12 @@ - [X] list_medical_transcription_jobs - [X] list_medical_vocabularies - [ ] list_tags_for_resource -- [ ] list_transcription_jobs +- [x] list_transcription_jobs - [ ] list_vocabularies - [ ] list_vocabulary_filters - [ ] start_call_analytics_job - [X] start_medical_transcription_job -- [ ] start_transcription_job +- [x] start_transcription_job - [ ] tag_resource - [ ] untag_resource - [ ] update_call_analytics_category diff --git a/moto/transcribe/models.py b/moto/transcribe/models.py index bf8e602e6..3cff073c8 100644 --- a/moto/transcribe/models.py +++ b/moto/transcribe/models.py @@ -1,6 +1,5 @@ import uuid from datetime import datetime, timedelta - from moto.core import BaseBackend, BaseModel from moto.ec2 import ec2_backends from moto.sts.models import ACCOUNT_ID @@ -28,6 +27,233 @@ class BaseObject(BaseModel): return self.gen_response_object() +class FakeTranscriptionJob(BaseObject): + def __init__( + self, + region_name, + transcription_job_name, + language_code, + media_sample_rate_hertz, + media_format, + media, + output_bucket_name, + output_key, + output_encryption_kms_key_id, + settings, + model_settings, + job_execution_settings, + content_redaction, + identify_language, + language_options, + ): + self._region_name = region_name + self.transcription_job_name = transcription_job_name + self.transcription_job_status = None + self.language_code = language_code + self.media_sample_rate_hertz = media_sample_rate_hertz + self.media_format = media_format + self.media = media + self.transcript = None + self.start_time = self.completion_time = None + self.creation_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + self.failure_reason = None + self.settings = settings or { + "ChannelIdentification": False, + "ShowAlternatives": False, + "ShowSpeakerLabels": False, + } + self.model_settings = model_settings or {"LanguageModelName": None} + self.job_execution_settings = job_execution_settings or { + "AllowDeferredExecution": False, + "DataAccessRoleArn": None, + } + self.content_redaction = content_redaction or { + "RedactionType": None, + "RedactionOutput": None, + } + self.identify_language = identify_language + self.language_options = language_options + self.identified_language_score = (None,) + self._output_bucket_name = output_bucket_name + self.output_key = output_key + self._output_encryption_kms_key_id = output_encryption_kms_key_id + self.output_location_type = ( + "CUSTOMER_BUCKET" if self._output_bucket_name else "SERVICE_BUCKET" + ) + + def response_object(self, response_type): + response_field_dict = { + "CREATE": [ + "TranscriptionJobName", + "TranscriptionJobStatus", + "LanguageCode", + "MediaFormat", + "Media", + "Settings", + "StartTime", + "CreationTime", + "IdentifyLanguage", + "LanguageOptions", + "JobExecutionSettings", + ], + "GET": [ + "TranscriptionJobName", + "TranscriptionJobStatus", + "LanguageCode", + "MediaSampleRateHertz", + "MediaFormat", + "Media", + "Settings", + "Transcript", + "StartTime", + "CreationTime", + "CompletionTime", + "IdentifyLanguage", + "LanguageOptions", + "IdentifiedLanguageScore", + ], + "LIST": [ + "TranscriptionJobName", + "CreationTime", + "StartTime", + "CompletionTime", + "LanguageCode", + "TranscriptionJobStatus", + "FailureReason", + "IdentifyLanguage", + "IdentifiedLanguageScore", + "OutputLocationType", + ], + } + response_fields = response_field_dict[response_type] + response_object = self.gen_response_object() + if response_type != "LIST": + return { + "TranscriptionJob": { + k: v + for k, v in response_object.items() + if k in response_fields and v is not None and v != [None] + } + } + else: + return { + k: v + for k, v in response_object.items() + if k in response_fields and v is not None and v != [None] + } + + def advance_job_status(self): + # On each call advances the fake job status + + if not self.transcription_job_status: + self.transcription_job_status = "QUEUED" + elif self.transcription_job_status == "QUEUED": + self.transcription_job_status = "IN_PROGRESS" + self.start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + if not self.media_sample_rate_hertz: + self.media_sample_rate_hertz = 44100 + if not self.media_format: + file_ext = self.media["MediaFileUri"].split(".")[-1].lower() + self.media_format = ( + file_ext if file_ext in ["mp3", "mp4", "wav", "flac"] else "mp3" + ) + if self.identify_language: + self.identified_language_score = 0.999645948 + # Simply identify first language passed in lanugage_options + # If non is set default to "en-US" + if self.language_options is not None and len(self.language_options) > 0: + self.language_code = self.language_options[0] + else: + self.language_code = "en-US" + elif self.transcription_job_status == "IN_PROGRESS": + self.transcription_job_status = "COMPLETED" + self.completion_time = (datetime.now() + timedelta(seconds=10)).strftime( + "%Y-%m-%d %H:%M:%S" + ) + if self._output_bucket_name: + transcript_file_uri = "https://s3.{0}.amazonaws.com/{1}/".format( + self._region_name, self._output_bucket_name, + ) + transcript_file_uri = ( + transcript_file_uri + + "{0}/{1}.json".format( + self.output_key, self.transcription_job_name, + ) + if self.output_key is not None + else transcript_file_uri + + "{1}.json".format(self.output_key, self.transcription_job_name) + ) + self.output_location_type = "CUSTOMER_BUCKET" + else: + transcript_file_uri = "https://s3.{0}.amazonaws.com/aws-transcribe-{0}-prod/{1}/{2}/{3}/asrOutput.json".format( # noqa: E501 + self._region_name, + ACCOUNT_ID, + self.transcription_job_name, + uuid.uuid4(), + ) + self.output_location_type = "SERVICE_BUCKET" + self.transcript = {"TranscriptFileUri": transcript_file_uri} + + +class FakeVocabulary(BaseObject): + def __init__( + self, region_name, vocabulary_name, language_code, phrases, vocabulary_file_uri, + ): + self._region_name = region_name + self.vocabulary_name = vocabulary_name + self.language_code = language_code + self.phrases = phrases + self.vocabulary_file_uri = vocabulary_file_uri + self.vocabulary_state = None + self.last_modified_time = None + self.failure_reason = None + self.download_uri = "https://s3.{0}.amazonaws.com/aws-transcribe-dictionary-model-{0}-prod/{1}/{2}/{3}/input.txt".format( # noqa: E501 + region_name, ACCOUNT_ID, vocabulary_name, uuid, + ) + + def response_object(self, response_type): + response_field_dict = { + "CREATE": [ + "VocabularyName", + "LanguageCode", + "VocabularyState", + "LastModifiedTime", + "FailureReason", + ], + "GET": [ + "VocabularyName", + "LanguageCode", + "VocabularyState", + "LastModifiedTime", + "FailureReason", + "DownloadUri", + ], + "LIST": [ + "VocabularyName", + "LanguageCode", + "LastModifiedTime", + "VocabularyState", + ], + } + response_fields = response_field_dict[response_type] + response_object = self.gen_response_object() + return { + k: v + for k, v in response_object.items() + if k in response_fields and v is not None and v != [None] + } + + def advance_job_status(self): + # On each call advances the fake job status + + if not self.vocabulary_state: + self.vocabulary_state = "PENDING" + self.last_modified_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + elif self.vocabulary_state == "PENDING": + self.vocabulary_state = "READY" + self.last_modified_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + class FakeMedicalTranscriptionJob(BaseObject): def __init__( self, @@ -162,7 +388,7 @@ class FakeMedicalVocabulary(BaseObject): self.vocabulary_state = None self.last_modified_time = None self.failure_reason = None - self.download_uri = "https://s3.us-east-1.amazonaws.com/aws-transcribe-dictionary-model-{}-prod/{}/medical/{}/{}/input.txt".format( + self.download_uri = "https://s3.us-east-1.amazonaws.com/aws-transcribe-dictionary-model-{}-prod/{}/medical/{}/{}/input.txt".format( # noqa: E501 region_name, ACCOUNT_ID, self.vocabulary_name, uuid.uuid4() ) @@ -212,7 +438,9 @@ class FakeMedicalVocabulary(BaseObject): class TranscribeBackend(BaseBackend): def __init__(self, region_name=None): self.medical_transcriptions = {} + self.transcriptions = {} self.medical_vocabularies = {} + self.vocabularies = {} self.region_name = region_name def reset(self): @@ -220,6 +448,43 @@ class TranscribeBackend(BaseBackend): self.__dict__ = {} self.__init__(region_name) + def start_transcription_job(self, **kwargs): + + name = kwargs.get("transcription_job_name") + if name in self.transcriptions: + raise ConflictException( + message="The requested job name already exists. Use a different job name." + ) + + settings = kwargs.get("settings") + vocabulary_name = settings.get("VocabularyName") if settings else None + if vocabulary_name and vocabulary_name not in self.vocabularies: + raise BadRequestException( + message="The requested vocabulary couldn't be found. " + "Check the vocabulary name and try your request again." + ) + + transcription_job_object = FakeTranscriptionJob( + region_name=self.region_name, + transcription_job_name=name, + language_code=kwargs.get("language_code"), + media_sample_rate_hertz=kwargs.get("media_sample_rate_hertz"), + media_format=kwargs.get("media_format"), + media=kwargs.get("media"), + output_bucket_name=kwargs.get("output_bucket_name"), + output_key=kwargs.get("output_key"), + output_encryption_kms_key_id=kwargs.get("output_encryption_kms_key_id"), + settings=settings, + model_settings=kwargs.get("model_settings"), + job_execution_settings=kwargs.get("job_execution_settings"), + content_redaction=kwargs.get("content_redaction"), + identify_language=kwargs.get("identify_language"), + language_options=kwargs.get("language_options"), + ) + self.transcriptions[name] = transcription_job_object + + return transcription_job_object.response_object("CREATE") + def start_medical_transcription_job(self, **kwargs): name = kwargs.get("medical_transcription_job_name") @@ -233,7 +498,8 @@ class TranscribeBackend(BaseBackend): vocabulary_name = settings.get("VocabularyName") if settings else None if vocabulary_name and vocabulary_name not in self.medical_vocabularies: raise BadRequestException( - message="The requested vocabulary couldn't be found. Check the vocabulary name and try your request again." + message="The requested vocabulary couldn't be found. " + "Check the vocabulary name and try your request again." ) transcription_job_object = FakeMedicalTranscriptionJob( @@ -254,6 +520,17 @@ class TranscribeBackend(BaseBackend): return transcription_job_object.response_object("CREATE") + def get_transcription_job(self, transcription_job_name): + try: + job = self.transcriptions[transcription_job_name] + job.advance_job_status() # Fakes advancement through statuses. + return job.response_object("GET") + except KeyError: + raise BadRequestException( + message="The requested job couldn't be found. " + "Check the job name and try your request again." + ) + def get_medical_transcription_job(self, medical_transcription_job_name): try: job = self.medical_transcriptions[medical_transcription_job_name] @@ -261,7 +538,17 @@ class TranscribeBackend(BaseBackend): return job.response_object("GET") except KeyError: raise BadRequestException( - message="The requested job couldn't be found. Check the job name and try your request again." + message="The requested job couldn't be found. " + "Check the job name and try your request again." + ) + + def delete_transcription_job(self, transcription_job_name): + try: + del self.transcriptions[transcription_job_name] + except KeyError: + raise BadRequestException( + message="The requested job couldn't be found. " + "Check the job name and try your request again.", ) def delete_medical_transcription_job(self, medical_transcription_job_name): @@ -269,9 +556,40 @@ class TranscribeBackend(BaseBackend): del self.medical_transcriptions[medical_transcription_job_name] except KeyError: raise BadRequestException( - message="The requested job couldn't be found. Check the job name and try your request again.", + message="The requested job couldn't be found. " + "Check the job name and try your request again.", ) + def list_transcription_jobs( + self, state_equals, job_name_contains, next_token, max_results + ): + jobs = list(self.transcriptions.values()) + + if state_equals: + jobs = [job for job in jobs if job.transcription_job_status == state_equals] + + if job_name_contains: + jobs = [ + job for job in jobs if job_name_contains in job.transcription_job_name + ] + + start_offset = int(next_token) if next_token else 0 + end_offset = start_offset + ( + max_results if max_results else 100 + ) # Arbitrarily selected... + jobs_paginated = jobs[start_offset:end_offset] + + response = { + "TranscriptionJobSummaries": [ + job.response_object("LIST") for job in jobs_paginated + ] + } + if end_offset < len(jobs): + response["NextToken"] = str(end_offset) + if state_equals: + response["Status"] = state_equals + return response + def list_medical_transcription_jobs( self, status, job_name_contains, next_token, max_results ): @@ -304,6 +622,45 @@ class TranscribeBackend(BaseBackend): response["Status"] = status return response + def create_vocabulary(self, **kwargs): + + vocabulary_name = kwargs.get("vocabulary_name") + language_code = kwargs.get("language_code") + phrases = kwargs.get("phrases") + vocabulary_file_uri = kwargs.get("vocabulary_file_uri") + if ( + phrases is not None + and vocabulary_file_uri is not None + or phrases is None + and vocabulary_file_uri is None + ): + raise BadRequestException( + message="Either Phrases or VocabularyFileUri field should be provided.", + ) + if phrases is not None and len(phrases) < 1: + raise BadRequestException( + message="1 validation error detected: Value '[]' at 'phrases' failed to " + "satisfy constraint: Member must have length greater than or " + "equal to 1", + ) + if vocabulary_name in self.vocabularies: + raise ConflictException( + message="The requested vocabulary name already exists. " + "Use a different vocabulary name." + ) + + vocabulary_object = FakeVocabulary( + region_name=self.region_name, + vocabulary_name=vocabulary_name, + language_code=language_code, + phrases=phrases, + vocabulary_file_uri=vocabulary_file_uri, + ) + + self.vocabularies[vocabulary_name] = vocabulary_object + + return vocabulary_object.response_object("CREATE") + def create_medical_vocabulary(self, **kwargs): vocabulary_name = kwargs.get("vocabulary_name") @@ -312,7 +669,8 @@ class TranscribeBackend(BaseBackend): if vocabulary_name in self.medical_vocabularies: raise ConflictException( - message="The requested vocabulary name already exists. Use a different vocabulary name." + message="The requested vocabulary name already exists. " + "Use a different vocabulary name." ) medical_vocabulary_object = FakeMedicalVocabulary( @@ -326,11 +684,31 @@ class TranscribeBackend(BaseBackend): return medical_vocabulary_object.response_object("CREATE") + def get_vocabulary(self, vocabulary_name): + try: + job = self.vocabularies[vocabulary_name] + job.advance_job_status() # Fakes advancement through statuses. + return job.response_object("GET") + except KeyError: + raise BadRequestException( + message="The requested vocabulary couldn't be found. " + "Check the vocabulary name and try your request again." + ) + def get_medical_vocabulary(self, vocabulary_name): try: job = self.medical_vocabularies[vocabulary_name] job.advance_job_status() # Fakes advancement through statuses. return job.response_object("GET") + except KeyError: + raise BadRequestException( + message="The requested vocabulary couldn't be found. " + "Check the vocabulary name and try your request again." + ) + + def delete_vocabulary(self, vocabulary_name): + try: + del self.vocabularies[vocabulary_name] except KeyError: raise BadRequestException( message="The requested vocabulary couldn't be found. Check the vocabulary name and try your request again." @@ -344,6 +722,41 @@ class TranscribeBackend(BaseBackend): message="The requested vocabulary couldn't be found. Check the vocabulary name and try your request again." ) + def list_vocabularies(self, state_equals, name_contains, next_token, max_results): + vocabularies = list(self.vocabularies.values()) + + if state_equals: + vocabularies = [ + vocabulary + for vocabulary in vocabularies + if vocabulary.vocabulary_state == state_equals + ] + + if name_contains: + vocabularies = [ + vocabulary + for vocabulary in vocabularies + if name_contains in vocabulary.vocabulary_name + ] + + start_offset = int(next_token) if next_token else 0 + end_offset = start_offset + ( + max_results if max_results else 100 + ) # Arbitrarily selected... + vocabularies_paginated = vocabularies[start_offset:end_offset] + + response = { + "Vocabularies": [ + vocabulary.response_object("LIST") + for vocabulary in vocabularies_paginated + ] + } + if end_offset < len(vocabularies): + response["NextToken"] = str(end_offset) + if state_equals: + response["Status"] = state_equals + return response + def list_medical_vocabularies( self, state_equals, name_contains, next_token, max_results ): diff --git a/moto/transcribe/responses.py b/moto/transcribe/responses.py index 54d718b3c..e40706342 100644 --- a/moto/transcribe/responses.py +++ b/moto/transcribe/responses.py @@ -19,6 +19,27 @@ class TranscribeResponse(BaseResponse): except ValueError: return {} + @amzn_request_id + def start_transcription_job(self): + name = self._get_param("TranscriptionJobName") + response = self.transcribe_backend.start_transcription_job( + transcription_job_name=name, + language_code=self._get_param("LanguageCode"), + media_sample_rate_hertz=self._get_param("MediaSampleRateHertz"), + media_format=self._get_param("MediaFormat"), + media=self._get_param("Media"), + output_bucket_name=self._get_param("OutputBucketName"), + output_key=self._get_param("OutputKey"), + output_encryption_kms_key_id=self._get_param("OutputEncryptionKMSKeyId"), + settings=self._get_param("Settings"), + model_settings=self._get_param("ModelSettings"), + job_execution_settings=self._get_param("JobExecutionSettings"), + content_redaction=self._get_param("ContentRedaction"), + identify_language=self._get_param("IdentifyLanguage"), + language_options=self._get_param("LanguageOptions"), + ) + return json.dumps(response) + @amzn_request_id def start_medical_transcription_job(self): name = self._get_param("MedicalTranscriptionJobName") @@ -36,6 +57,21 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def list_transcription_jobs(self): + state_equals = self._get_param("Status") + job_name_contains = self._get_param("JobNameContains") + next_token = self._get_param("NextToken") + max_results = self._get_param("MaxResults") + + response = self.transcribe_backend.list_transcription_jobs( + state_equals=state_equals, + job_name_contains=job_name_contains, + next_token=next_token, + max_results=max_results, + ) + return json.dumps(response) + @amzn_request_id def list_medical_transcription_jobs(self): status = self._get_param("Status") @@ -51,6 +87,14 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def get_transcription_job(self): + transcription_job_name = self._get_param("TranscriptionJobName") + response = self.transcribe_backend.get_transcription_job( + transcription_job_name=transcription_job_name + ) + return json.dumps(response) + @amzn_request_id def get_medical_transcription_job(self): medical_transcription_job_name = self._get_param("MedicalTranscriptionJobName") @@ -59,6 +103,14 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def delete_transcription_job(self): + transcription_job_name = self._get_param("TranscriptionJobName") + response = self.transcribe_backend.delete_transcription_job( + transcription_job_name=transcription_job_name + ) + return json.dumps(response) + @amzn_request_id def delete_medical_transcription_job(self): medical_transcription_job_name = self._get_param("MedicalTranscriptionJobName") @@ -67,6 +119,20 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def create_vocabulary(self): + vocabulary_name = self._get_param("VocabularyName") + language_code = self._get_param("LanguageCode") + phrases = self._get_param("Phrases") + vocabulary_file_uri = self._get_param("VocabularyFileUri") + response = self.transcribe_backend.create_vocabulary( + vocabulary_name=vocabulary_name, + language_code=language_code, + phrases=phrases, + vocabulary_file_uri=vocabulary_file_uri, + ) + return json.dumps(response) + @amzn_request_id def create_medical_vocabulary(self): vocabulary_name = self._get_param("VocabularyName") @@ -79,6 +145,14 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def get_vocabulary(self): + vocabulary_name = self._get_param("VocabularyName") + response = self.transcribe_backend.get_vocabulary( + vocabulary_name=vocabulary_name + ) + return json.dumps(response) + @amzn_request_id def get_medical_vocabulary(self): vocabulary_name = self._get_param("VocabularyName") @@ -87,6 +161,21 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def list_vocabularies(self): + state_equals = self._get_param("StateEquals") + name_contains = self._get_param("NameContains") + next_token = self._get_param("NextToken") + max_results = self._get_param("MaxResults") + + response = self.transcribe_backend.list_vocabularies( + state_equals=state_equals, + name_contains=name_contains, + next_token=next_token, + max_results=max_results, + ) + return json.dumps(response) + @amzn_request_id def list_medical_vocabularies(self): state_equals = self._get_param("StateEquals") @@ -102,6 +191,14 @@ class TranscribeResponse(BaseResponse): ) return json.dumps(response) + @amzn_request_id + def delete_vocabulary(self): + vocabulary_name = self._get_param("VocabularyName") + response = self.transcribe_backend.delete_vocabulary( + vocabulary_name=vocabulary_name + ) + return json.dumps(response) + @amzn_request_id def delete_medical_vocabulary(self): vocabulary_name = self._get_param("VocabularyName") diff --git a/tests/test_transcribe/test_transcribe_boto3.py b/tests/test_transcribe/test_transcribe_boto3.py index 3de958bc1..07f57f9d9 100644 --- a/tests/test_transcribe/test_transcribe_boto3.py +++ b/tests/test_transcribe/test_transcribe_boto3.py @@ -179,6 +179,264 @@ def test_run_medical_transcription_job_all_params(): ) +@mock_transcribe +def test_run_transcription_job_all_params(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + vocabulary_name = "MyVocabulary" + resp = client.create_vocabulary( + VocabularyName=vocabulary_name, + LanguageCode="en-US", + VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", + ) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + job_name = "MyJob2" + args = { + "TranscriptionJobName": job_name, + "LanguageCode": "en-US", + "MediaSampleRateHertz": 48000, + "MediaFormat": "flac", + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.dat",}, + "OutputBucketName": "my-output-bucket", + "OutputEncryptionKMSKeyId": "arn:aws:kms:us-east-1:012345678901:key/37111b5e-8eff-4706-ae3a-d4f9d1d559fc", + "Settings": { + "ShowSpeakerLabels": True, + "MaxSpeakerLabels": 5, + "ChannelIdentification": False, + "ShowAlternatives": True, + "MaxAlternatives": 6, + "VocabularyName": vocabulary_name, + }, + # Missing `ContentRedaction`, `JobExecutionSettings`, `VocabularyFilterName`, `LanguageModel` + } + resp = client.start_transcription_job(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + # CREATED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobName"].should.equal(args["TranscriptionJobName"]) + transcription_job["TranscriptionJobStatus"].should.equal("QUEUED") + transcription_job["LanguageCode"].should.equal(args["LanguageCode"]) + transcription_job["Media"].should.equal(args["Media"]) + transcription_job.should.contain("CreationTime") + transcription_job.doesnt.contain("StartTime") + transcription_job.doesnt.contain("CompletionTime") + transcription_job.doesnt.contain("Transcript") + transcription_job["Settings"]["ShowSpeakerLabels"].should.equal( + args["Settings"]["ShowSpeakerLabels"] + ) + transcription_job["Settings"]["MaxSpeakerLabels"].should.equal( + args["Settings"]["MaxSpeakerLabels"] + ) + transcription_job["Settings"]["ChannelIdentification"].should.equal( + args["Settings"]["ChannelIdentification"] + ) + transcription_job["Settings"]["ShowAlternatives"].should.equal( + args["Settings"]["ShowAlternatives"] + ) + transcription_job["Settings"]["MaxAlternatives"].should.equal( + args["Settings"]["MaxAlternatives"] + ) + transcription_job["Settings"]["VocabularyName"].should.equal( + args["Settings"]["VocabularyName"] + ) + # IN_PROGRESS + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("IN_PROGRESS") + transcription_job["MediaFormat"].should.equal("flac") + transcription_job.should.contain("StartTime") + transcription_job.doesnt.contain("CompletionTime") + transcription_job.doesnt.contain("Transcript") + transcription_job["MediaSampleRateHertz"].should.equal(48000) + + # COMPLETED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("COMPLETED") + transcription_job.should.contain("CompletionTime") + transcription_job["Transcript"].should.equal( + { + "TranscriptFileUri": "https://s3.{}.amazonaws.com/{}/{}.json".format( + region_name, args["OutputBucketName"], args["TranscriptionJobName"], + ) + } + ) + + +@mock_transcribe +def test_run_transcription_job_minimal_params(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + job_name = "MyJob" + args = { + "TranscriptionJobName": job_name, + "LanguageCode": "en-US", + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav",}, + } + resp = client.start_transcription_job(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job.should.contain("Settings") + transcription_job["Settings"]["ChannelIdentification"].should.equal(False) + transcription_job["Settings"]["ShowAlternatives"].should.equal(False) + + # CREATED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobName"].should.equal(args["TranscriptionJobName"]) + transcription_job["TranscriptionJobStatus"].should.equal("QUEUED") + transcription_job["LanguageCode"].should.equal(args["LanguageCode"]) + transcription_job["Media"].should.equal(args["Media"]) + transcription_job.should.contain("Settings") + transcription_job["Settings"]["ChannelIdentification"].should.equal(False) + transcription_job["Settings"]["ShowAlternatives"].should.equal(False) + transcription_job.should.contain("CreationTime") + transcription_job.doesnt.contain("StartTime") + transcription_job.doesnt.contain("CompletionTime") + transcription_job.doesnt.contain("Transcript") + + # QUEUED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("IN_PROGRESS") + transcription_job.should.contain("CreationTime") + transcription_job.should.contain("StartTime") + transcription_job.doesnt.contain("CompletionTime") + transcription_job.doesnt.contain("Transcript") + + # IN_PROGESS + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("COMPLETED") + transcription_job.should.contain("CreationTime") + transcription_job.should.contain("StartTime") + transcription_job.should.contain("CompletionTime") + transcription_job.should.contain("Transcript") + # Check aws hosted bucket + transcription_job["Transcript"]["TranscriptFileUri"].should.contain( + "https://s3.{0}.amazonaws.com/aws-transcribe-{0}-prod/".format(region_name) + ) + + # Delete + client.delete_transcription_job(TranscriptionJobName=job_name) + client.get_transcription_job.when.called_with( + TranscriptionJobName=job_name + ).should.throw(client.exceptions.BadRequestException) + + +@mock_transcribe +def test_run_transcription_job_s3output_params(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + job_name = "MyJob" + args = { + "TranscriptionJobName": job_name, + "LanguageCode": "en-US", + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav",}, + "OutputBucketName": "my-output-bucket", + "OutputKey": "bucket-key", + } + resp = client.start_transcription_job(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + # CREATED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobName"].should.equal(args["TranscriptionJobName"]) + transcription_job["TranscriptionJobStatus"].should.equal("QUEUED") + # ... already tested in test_run_transcription_job_minimal_awsoutput_params + + # QUEUED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("IN_PROGRESS") + # ... already tested in test_run_transcription_job_minimal_awsoutput_params + + # IN_PROGESS + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("COMPLETED") + transcription_job.should.contain("CreationTime") + transcription_job.should.contain("StartTime") + transcription_job.should.contain("CompletionTime") + transcription_job.should.contain("Transcript") + # Check aws hosted bucket + transcription_job["Transcript"]["TranscriptFileUri"].should.contain( + "https://s3.us-east-1.amazonaws.com/my-output-bucket/bucket-key/MyJob.json".format( + region_name + ) + ) + # A new job without an "OutputKey" + job_name = "MyJob2" + args = { + "TranscriptionJobName": job_name, + "LanguageCode": "en-US", + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav",}, + "OutputBucketName": "my-output-bucket", + } + client.start_transcription_job(**args) + # Fast forward ... + client.get_transcription_job(TranscriptionJobName=job_name) + client.get_transcription_job(TranscriptionJobName=job_name) + resp = client.get_transcription_job(TranscriptionJobName=job_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job["TranscriptionJobStatus"].should.equal("COMPLETED") + transcription_job.should.contain("CreationTime") + transcription_job.should.contain("StartTime") + transcription_job.should.contain("CompletionTime") + transcription_job.should.contain("Transcript") + # Check aws hosted bucket + transcription_job["Transcript"]["TranscriptFileUri"].should.equal( + "https://s3.us-east-1.amazonaws.com/my-output-bucket/MyJob2.json" + ) + + +@mock_transcribe +def test_run_transcription_job_identify_language_params(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + job_name = "MyJob" + args = { + "TranscriptionJobName": job_name, + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav",}, + "IdentifyLanguage": True, + "LanguageOptions": ["en-US", "en-GB", "es-ES", "de-DE"], + } + resp = client.start_transcription_job(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + transcription_job = resp["TranscriptionJob"] + transcription_job.should.contain("IdentifyLanguage") + transcription_job.should.contain("LanguageOptions") + client.get_transcription_job(TranscriptionJobName=job_name) + resp = client.get_transcription_job(TranscriptionJobName=job_name) + transcription_job = resp["TranscriptionJob"] + transcription_job.should.contain("LanguageCode") + transcription_job.should.contain("IdentifiedLanguageScore") + transcription_job["LanguageCode"].should.equal("en-US") + transcription_job["IdentifiedLanguageScore"].should.equal(0.999645948) + + @mock_transcribe def test_get_nonexistent_medical_transcription_job(): region_name = "us-east-1" @@ -189,6 +447,16 @@ def test_get_nonexistent_medical_transcription_job(): ).should.throw(client.exceptions.BadRequestException) +@mock_transcribe +def test_get_nonexistent_transcription_job(): + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + client.get_transcription_job.when.called_with( + TranscriptionJobName="NonexistentJobName" + ).should.throw(client.exceptions.BadRequestException) + + @mock_transcribe def test_run_medical_transcription_job_with_existing_job_name(): @@ -212,6 +480,26 @@ def test_run_medical_transcription_job_with_existing_job_name(): ) +@mock_transcribe +def test_run_transcription_job_with_existing_job_name(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + job_name = "MyJob" + args = { + "TranscriptionJobName": job_name, + "LanguageCode": "en-US", + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav",}, + } + resp = client.start_transcription_job(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + client.start_transcription_job.when.called_with(**args).should.throw( + client.exceptions.ConflictException + ) + + @mock_transcribe def test_run_medical_transcription_job_nonexistent_vocabulary(): @@ -233,6 +521,25 @@ def test_run_medical_transcription_job_nonexistent_vocabulary(): ) +@mock_transcribe +def test_run_transcription_job_nonexistent_vocabulary(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + job_name = "MyJob3" + args = { + "TranscriptionJobName": job_name, + "LanguageCode": "en-US", + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.dat",}, + "OutputBucketName": "my-output-bucket", + "Settings": {"VocabularyName": "NonexistentVocabulary"}, + } + client.start_transcription_job.when.called_with(**args).should.throw( + client.exceptions.BadRequestException + ) + + @mock_transcribe def test_list_medical_transcription_jobs(): @@ -325,6 +632,90 @@ def test_list_medical_transcription_jobs(): response.shouldnt.contain("NextToken") +@mock_transcribe +def test_list_transcription_jobs(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + def run_job(index, target_status): + job_name = "Job_{}".format(index) + args = { + "TranscriptionJobName": job_name, + "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav",}, + "OutputBucketName": "my-output-bucket", + "IdentifyLanguage": True, + } + resp = client.start_transcription_job(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + # IMPLICITLY PROMOTE JOB STATUS TO QUEUED + resp = client.get_transcription_job(TranscriptionJobName=job_name) + + # IN_PROGRESS + if target_status in ["IN_PROGRESS", "COMPLETED"]: + resp = client.get_transcription_job(TranscriptionJobName=job_name) + + # COMPLETED + if target_status == "COMPLETED": + resp = client.get_transcription_job(TranscriptionJobName=job_name) + + # Run 5 pending jobs + for i in range(5): + run_job(i, "PENDING") + + # Run 10 job to IN_PROGRESS + for i in range(5, 15): + run_job(i, "IN_PROGRESS") + + # Run 15 job to COMPLETED + for i in range(15, 30): + run_job(i, "COMPLETED") + + # List all + response = client.list_transcription_jobs() + response.should.contain("TranscriptionJobSummaries") + len(response["TranscriptionJobSummaries"]).should.equal(30) + response.shouldnt.contain("NextToken") + response.shouldnt.contain("Status") + + # List IN_PROGRESS + response = client.list_transcription_jobs(Status="IN_PROGRESS") + response.should.contain("TranscriptionJobSummaries") + len(response["TranscriptionJobSummaries"]).should.equal(10) + response.shouldnt.contain("NextToken") + response.should.contain("Status") + response["Status"].should.equal("IN_PROGRESS") + + # List JobName contains "8" + response = client.list_transcription_jobs(JobNameContains="8") + response.should.contain("TranscriptionJobSummaries") + len(response["TranscriptionJobSummaries"]).should.equal(3) + response.shouldnt.contain("NextToken") + response.shouldnt.contain("Status") + + # Pagination by 11 + response = client.list_transcription_jobs(MaxResults=11) + response.should.contain("TranscriptionJobSummaries") + len(response["TranscriptionJobSummaries"]).should.equal(11) + response.should.contain("NextToken") + response.shouldnt.contain("Status") + + response = client.list_transcription_jobs( + NextToken=response["NextToken"], MaxResults=11 + ) + response.should.contain("TranscriptionJobSummaries") + len(response["TranscriptionJobSummaries"]).should.equal(11) + response.should.contain("NextToken") + + response = client.list_transcription_jobs( + NextToken=response["NextToken"], MaxResults=11 + ) + response.should.contain("TranscriptionJobSummaries") + len(response["TranscriptionJobSummaries"]).should.equal(8) + response.shouldnt.contain("NextToken") + + @mock_transcribe def test_create_medical_vocabulary(): @@ -361,6 +752,233 @@ def test_create_medical_vocabulary(): ).should.throw(client.exceptions.BadRequestException) +@mock_transcribe +def test_create_vocabulary(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + vocabulary_name = "MyVocabulary" + resp = client.create_vocabulary( + VocabularyName=vocabulary_name, + LanguageCode="en-US", + VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", + ) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + # PENDING + resp = client.get_vocabulary(VocabularyName=vocabulary_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + resp["VocabularyName"].should.equal(vocabulary_name) + resp["LanguageCode"].should.equal("en-US") + resp["VocabularyState"].should.equal("PENDING") + resp.should.contain("LastModifiedTime") + resp.shouldnt.contain("FailureReason") + resp["DownloadUri"].should.contain(vocabulary_name) + + # IN_PROGRESS + resp = client.get_vocabulary(VocabularyName=vocabulary_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + resp["VocabularyState"].should.equal("READY") + + # Delete + client.delete_vocabulary(VocabularyName=vocabulary_name) + client.get_vocabulary.when.called_with(VocabularyName=vocabulary_name).should.throw( + client.exceptions.BadRequestException + ) + + # Create another vocabulary with Phrases + client.create_vocabulary( + VocabularyName=vocabulary_name, + LanguageCode="en-US", + Phrases=["moto", "is", "awesome"], + ) + resp = client.get_vocabulary(VocabularyName=vocabulary_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + resp["VocabularyName"].should.equal(vocabulary_name) + resp["LanguageCode"].should.equal("en-US") + resp["VocabularyState"].should.equal("PENDING") + resp["DownloadUri"].should.contain(vocabulary_name) + resp["DownloadUri"].should.contain( + "https://s3.{0}.amazonaws.com/aws-transcribe-dictionary-model-{0}-prod".format( + region_name + ) + ) + # IN_PROGRESS + resp = client.get_vocabulary(VocabularyName=vocabulary_name) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + resp["VocabularyState"].should.equal("READY") + + +@mock_transcribe +def test_list_vocabularies(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + def create_vocab(index, target_status): + vocabulary_name = "Vocab_{}".format(index) + args = { + "VocabularyName": vocabulary_name, + "LanguageCode": "en-US", + "Phrases": ["moto", "is", "awesome"], + } + resp = client.create_vocabulary(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + # Forward to "PENDING" + resp = client.get_vocabulary(VocabularyName=vocabulary_name) + + # READY + if target_status == "READY": + resp = client.get_vocabulary(VocabularyName=vocabulary_name) + + # Run 5 pending jobs + for i in range(5): + create_vocab(i, "PENDING") + + # Run 10 job to IN_PROGRESS + for i in range(5, 15): + create_vocab(i, "READY") + + # List all + response = client.list_vocabularies() + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(15) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # List PENDING + response = client.list_vocabularies(StateEquals="PENDING") + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(5) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # List READY + response = client.list_vocabularies(StateEquals="READY") + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(10) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # List VocabularyName contains "8" + response = client.list_vocabularies(NameContains="8") + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(1) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # Pagination by 3 + response = client.list_vocabularies(MaxResults=3) + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(3) + response.should.contain("NextToken") + response.should.contain("ResponseMetadata") + + response = client.list_vocabularies(NextToken=response["NextToken"], MaxResults=3) + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(3) + response.should.contain("NextToken") + response.should.contain("ResponseMetadata") + + response = client.list_vocabularies(NextToken=response["NextToken"], MaxResults=30) + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(9) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + client.delete_vocabulary(VocabularyName="Vocab_5") + response = client.list_vocabularies() + len(response["Vocabularies"]).should.equal(14) + + +@mock_transcribe +def test_list_medical_vocabularies(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + def create_vocab(index, target_status): + vocabulary_name = "Vocab_{}".format(index) + resp = client.create_medical_vocabulary( + VocabularyName=vocabulary_name, + LanguageCode="en-US", + VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", + ) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + # Forward to "PENDING" + resp = client.get_medical_vocabulary(VocabularyName=vocabulary_name) + + # READY + if target_status == "READY": + resp = client.get_medical_vocabulary(VocabularyName=vocabulary_name) + + # Run 5 pending jobs + for i in range(5): + create_vocab(i, "PENDING") + + # Run 10 job to IN_PROGRESS + for i in range(5, 15): + create_vocab(i, "READY") + + # List all + response = client.list_medical_vocabularies() + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(15) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # List PENDING + response = client.list_medical_vocabularies(StateEquals="PENDING") + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(5) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # List READY + response = client.list_medical_vocabularies(StateEquals="READY") + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(10) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # List VocabularyName contains "8" + response = client.list_medical_vocabularies(NameContains="8") + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(1) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + # Pagination by 3 + response = client.list_medical_vocabularies(MaxResults=3) + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(3) + response.should.contain("NextToken") + response.should.contain("ResponseMetadata") + + response = client.list_medical_vocabularies( + NextToken=response["NextToken"], MaxResults=3 + ) + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(3) + response.should.contain("NextToken") + response.should.contain("ResponseMetadata") + + response = client.list_medical_vocabularies( + NextToken=response["NextToken"], MaxResults=30 + ) + response.should.contain("Vocabularies") + len(response["Vocabularies"]).should.equal(9) + response.shouldnt.contain("NextToken") + response.should.contain("ResponseMetadata") + + client.delete_medical_vocabulary(VocabularyName="Vocab_5") + response = client.list_medical_vocabularies() + len(response["Vocabularies"]).should.equal(14) + + @mock_transcribe def test_get_nonexistent_medical_vocabulary(): region_name = "us-east-1" @@ -371,6 +989,16 @@ def test_get_nonexistent_medical_vocabulary(): ).should.throw(client.exceptions.BadRequestException) +@mock_transcribe +def test_get_nonexistent_vocabulary(): + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + client.get_vocabulary.when.called_with( + VocabularyName="NonexistentVocabularyName" + ).should.throw(client.exceptions.BadRequestException) + + @mock_transcribe def test_create_medical_vocabulary_with_existing_vocabulary_name(): @@ -389,3 +1017,47 @@ def test_create_medical_vocabulary_with_existing_vocabulary_name(): client.create_medical_vocabulary.when.called_with(**args).should.throw( client.exceptions.ConflictException ) + + +@mock_transcribe +def test_create_vocabulary_with_existing_vocabulary_name(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + vocabulary_name = "MyVocabulary" + args = { + "VocabularyName": vocabulary_name, + "LanguageCode": "en-US", + "VocabularyFileUri": "https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", + } + resp = client.create_vocabulary(**args) + resp["ResponseMetadata"]["HTTPStatusCode"].should.equal(200) + + client.create_vocabulary.when.called_with(**args).should.throw( + client.exceptions.ConflictException + ) + + +@mock_transcribe +def test_create_vocabulary_with_bad_request(): + + region_name = "us-east-1" + client = boto3.client("transcribe", region_name=region_name) + + vocabulary_name = "MyVocabulary" + args = { + "VocabularyName": vocabulary_name, + "LanguageCode": "en-US", + } + client.create_vocabulary.when.called_with(**args).should.throw( + client.exceptions.BadRequestException + ) + args = { + "VocabularyName": vocabulary_name, + "Phrases": [], + "LanguageCode": "en-US", + } + client.create_vocabulary.when.called_with(**args).should.throw( + client.exceptions.BadRequestException + )