# -*- coding: utf-8 -*- import boto3 import pytest from moto import mock_transcribe @mock_transcribe def test_run_medical_transcription_job_minimal_params(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob" args = { "MedicalTranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "OutputBucketName": "my-output-bucket", "Specialty": "PRIMARYCARE", "Type": "CONVERSATION", } resp = client.start_medical_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # CREATED resp = client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["MedicalTranscriptionJob"] assert transcription_job["MedicalTranscriptionJobName"] == ( args["MedicalTranscriptionJobName"] ) assert transcription_job["TranscriptionJobStatus"] == "QUEUED" assert transcription_job["LanguageCode"] == args["LanguageCode"] assert transcription_job["Media"] == args["Media"] assert "CreationTime" in transcription_job assert "StartTime" not in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job assert transcription_job["Settings"]["ChannelIdentification"] is False assert transcription_job["Settings"]["ShowAlternatives"] is False assert transcription_job["Specialty"] == args["Specialty"] assert transcription_job["Type"] == args["Type"] # IN_PROGRESS resp = client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["MedicalTranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" assert transcription_job["MediaFormat"] == "wav" assert "StartTime" in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job assert transcription_job["MediaSampleRateHertz"] == 44100 # COMPLETED resp = client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["MedicalTranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "COMPLETED" assert "CompletionTime" in transcription_job assert transcription_job["Transcript"] == { "TranscriptFileUri": ( f"https://s3.{region_name}.amazonaws.com" f"/{args['OutputBucketName']}/medical" f"/{args['MedicalTranscriptionJobName']}.json" ), } # Delete client.delete_medical_transcription_job(MedicalTranscriptionJobName=job_name) with pytest.raises(client.exceptions.BadRequestException): client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) @mock_transcribe def test_run_medical_transcription_job_all_params(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyMedicalVocabulary" resp = client.create_medical_vocabulary( VocabularyName=vocabulary_name, LanguageCode="en-US", VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", ) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 job_name = "MyJob2" args = { "MedicalTranscriptionJobName": job_name, "LanguageCode": "en-US", "MediaSampleRateHertz": 48000, "MediaFormat": "flac", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.dat"}, "OutputBucketName": "my-output-bucket", "OutputEncryptionKMSKeyId": ( "arn:aws:kms:us-east-1:012345678901:key" "/37111b5e-8eff-4706-ae3a-d4f9d1d559fc" ), "Settings": { "ShowSpeakerLabels": True, "MaxSpeakerLabels": 5, "ChannelIdentification": True, "ShowAlternatives": True, "MaxAlternatives": 6, "VocabularyName": vocabulary_name, }, "Specialty": "PRIMARYCARE", "Type": "CONVERSATION", } resp = client.start_medical_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # CREATED resp = client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["MedicalTranscriptionJob"] assert transcription_job["MedicalTranscriptionJobName"] == ( args["MedicalTranscriptionJobName"] ) assert transcription_job["TranscriptionJobStatus"] == "QUEUED" assert transcription_job["LanguageCode"] == args["LanguageCode"] assert transcription_job["Media"] == args["Media"] assert "CreationTime" in transcription_job assert "StartTime" not in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job assert transcription_job["Settings"]["ShowSpeakerLabels"] == ( args["Settings"]["ShowSpeakerLabels"] ) assert transcription_job["Settings"]["MaxSpeakerLabels"] == ( args["Settings"]["MaxSpeakerLabels"] ) assert transcription_job["Settings"]["ChannelIdentification"] == ( args["Settings"]["ChannelIdentification"] ) assert transcription_job["Settings"]["ShowAlternatives"] == ( args["Settings"]["ShowAlternatives"] ) assert transcription_job["Settings"]["MaxAlternatives"] == ( args["Settings"]["MaxAlternatives"] ) assert transcription_job["Settings"]["VocabularyName"] == ( args["Settings"]["VocabularyName"] ) assert transcription_job["Specialty"] == args["Specialty"] assert transcription_job["Type"] == args["Type"] # IN_PROGRESS resp = client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["MedicalTranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" assert transcription_job["MediaFormat"] == "flac" assert "StartTime" in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job assert transcription_job["MediaSampleRateHertz"] == 48000 # COMPLETED resp = client.get_medical_transcription_job(MedicalTranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["MedicalTranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "COMPLETED" assert "CompletionTime" in transcription_job assert transcription_job["Transcript"] == { "TranscriptFileUri": ( f"https://s3.{region_name}.amazonaws.com" f"/{args['OutputBucketName']}/medical" f"/{args['MedicalTranscriptionJobName']}.json" ), } @mock_transcribe def test_run_transcription_job_all_params(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyVocabulary" resp = client.create_vocabulary( VocabularyName=vocabulary_name, LanguageCode="en-US", VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", ) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 job_name = "MyJob2" args = { "TranscriptionJobName": job_name, "LanguageCode": "en-US", "MediaSampleRateHertz": 48000, "MediaFormat": "flac", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.dat"}, "OutputBucketName": "my-output-bucket", "OutputEncryptionKMSKeyId": ( "arn:aws:kms:us-east-1:012345678901:key" "/37111b5e-8eff-4706-ae3a-d4f9d1d559fc" ), "Settings": { "ShowSpeakerLabels": True, "MaxSpeakerLabels": 5, "ChannelIdentification": False, "ShowAlternatives": True, "MaxAlternatives": 6, "VocabularyName": vocabulary_name, }, "Subtitles": { "Formats": ["srt", "vtt"], "OutputStartIndex": 1, }, # Missing `ContentRedaction`, `JobExecutionSettings`, # `VocabularyFilterName`, `LanguageModel` } resp = client.start_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # CREATED resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobName"] == args["TranscriptionJobName"] assert transcription_job["TranscriptionJobStatus"] == "QUEUED" assert transcription_job["LanguageCode"] == args["LanguageCode"] assert transcription_job["Media"] == args["Media"] assert "CreationTime" in transcription_job assert "StartTime" not in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job assert transcription_job["Settings"]["ShowSpeakerLabels"] == ( args["Settings"]["ShowSpeakerLabels"] ) assert transcription_job["Settings"]["MaxSpeakerLabels"] == ( args["Settings"]["MaxSpeakerLabels"] ) assert transcription_job["Settings"]["ChannelIdentification"] == ( args["Settings"]["ChannelIdentification"] ) assert transcription_job["Settings"]["ShowAlternatives"] == ( args["Settings"]["ShowAlternatives"] ) assert transcription_job["Settings"]["MaxAlternatives"] == ( args["Settings"]["MaxAlternatives"] ) assert transcription_job["Settings"]["VocabularyName"] == ( args["Settings"]["VocabularyName"] ) # IN_PROGRESS resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" assert transcription_job["MediaFormat"] == "flac" assert "StartTime" in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job assert transcription_job["MediaSampleRateHertz"] == 48000 # COMPLETED resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "COMPLETED" assert "CompletionTime" in transcription_job assert transcription_job["Transcript"] == { "TranscriptFileUri": ( f"https://s3.{region_name}.amazonaws.com" f"/{args['OutputBucketName']}" f"/{args['TranscriptionJobName']}.json" ), } assert transcription_job["Subtitles"] == { "Formats": args["Subtitles"]["Formats"], "OutputStartIndex": 1, "SubtitleFileUris": [ ( f"https://s3.{region_name}.amazonaws.com" f"/{args['OutputBucketName']}" f"/{args['TranscriptionJobName']}.{format}" ) for format in args["Subtitles"]["Formats"] ], } @mock_transcribe def test_run_transcription_job_minimal_params(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob" args = { "TranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, } resp = client.start_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert "Settings" in transcription_job assert transcription_job["Settings"]["ChannelIdentification"] is False assert transcription_job["Settings"]["ShowAlternatives"] is False # CREATED resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobName"] == args["TranscriptionJobName"] assert transcription_job["TranscriptionJobStatus"] == "QUEUED" assert transcription_job["LanguageCode"] == args["LanguageCode"] assert transcription_job["Media"] == args["Media"] assert "Settings" in transcription_job assert transcription_job["Settings"]["ChannelIdentification"] is False assert transcription_job["Settings"]["ShowAlternatives"] is False assert "CreationTime" in transcription_job assert "StartTime" not in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job # QUEUED resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" assert "CreationTime" in transcription_job assert "StartTime" in transcription_job assert "CompletionTime" not in transcription_job assert "Transcript" not in transcription_job # IN_PROGESS resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "COMPLETED" assert "CreationTime" in transcription_job assert "StartTime" in transcription_job assert "CompletionTime" in transcription_job assert "Transcript" in transcription_job # Check aws hosted bucket assert ( f"https://s3.{region_name}.amazonaws.com/aws-transcribe-{region_name}-prod/" ) in transcription_job["Transcript"]["TranscriptFileUri"] assert transcription_job["Subtitles"] == { "Formats": [], "OutputStartIndex": 0, "SubtitleFileUris": [], } # Delete client.delete_transcription_job(TranscriptionJobName=job_name) with pytest.raises(client.exceptions.BadRequestException): client.get_transcription_job(TranscriptionJobName=job_name) @mock_transcribe def test_run_transcription_job_s3output_params(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob" args = { "TranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "OutputBucketName": "my-output-bucket", "OutputKey": "bucket.json.key.json", "Subtitles": {"Formats": ["vtt", "srt"]}, } resp = client.start_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # CREATED resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobName"] == args["TranscriptionJobName"] assert transcription_job["TranscriptionJobStatus"] == "QUEUED" # ... already tested in test_run_transcription_job_minimal_awsoutput_params # QUEUED resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" # ... already tested in test_run_transcription_job_minimal_awsoutput_params # IN_PROGESS resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "COMPLETED" assert "CreationTime" in transcription_job assert "StartTime" in transcription_job assert "CompletionTime" in transcription_job assert "Transcript" in transcription_job # Check aws hosted bucket assert ( "https://s3.us-east-1.amazonaws.com/my-output-bucket/bucket.json.key.json" ) in transcription_job["Transcript"]["TranscriptFileUri"] assert transcription_job["Subtitles"] == { "Formats": args["Subtitles"]["Formats"], "SubtitleFileUris": [ f"https://s3.us-east-1.amazonaws.com/my-output-bucket/bucket.json.key.{format}" for format in args["Subtitles"]["Formats"] ], } # A new job without an "OutputKey" job_name = "MyJob2" args = { "TranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "OutputBucketName": "my-output-bucket", } client.start_transcription_job(**args) # Fast forward ... client.get_transcription_job(TranscriptionJobName=job_name) client.get_transcription_job(TranscriptionJobName=job_name) resp = client.get_transcription_job(TranscriptionJobName=job_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 transcription_job = resp["TranscriptionJob"] assert transcription_job["TranscriptionJobStatus"] == "COMPLETED" assert "CreationTime" in transcription_job assert "StartTime" in transcription_job assert "CompletionTime" in transcription_job assert "Transcript" in transcription_job # Check aws hosted bucket assert transcription_job["Transcript"]["TranscriptFileUri"] == ( "https://s3.us-east-1.amazonaws.com/my-output-bucket/MyJob2.json" ) @mock_transcribe def test_run_transcription_job_identify_languages_params(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) # IdentifyLanguage job_name = "MyJob" args = { "TranscriptionJobName": job_name, "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "IdentifyLanguage": True, "LanguageOptions": ["en-US", "en-GB", "es-ES", "de-DE"], } resp_data = [ client.start_transcription_job(**args), # CREATED client.get_transcription_job(TranscriptionJobName=job_name), # QUEUED client.get_transcription_job(TranscriptionJobName=job_name), # IN_PROGRESS client.list_transcription_jobs(), # IN_PROGRESS ] for resp in resp_data: assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 if "TranscriptionJob" in resp: transcription_job = resp["TranscriptionJob"] elif "TranscriptionJobSummaries" in resp: transcription_job = resp["TranscriptionJobSummaries"][0] assert "IdentifyLanguage" in transcription_job assert "LanguageCodes" not in transcription_job assert "IdentifyMultipleLanguages" not in transcription_job if "TranscriptionJobStatus" in transcription_job and ( transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" or transcription_job["TranscriptionJobStatus"] == "COMPLETED" ): assert transcription_job["LanguageCode"] == "en-US" assert transcription_job["IdentifiedLanguageScore"] == 0.999645948 # IdentifyMultipleLanguages job_name = "MyJob2" args = { "TranscriptionJobName": job_name, "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "IdentifyMultipleLanguages": True, "LanguageOptions": ["en-US", "en-GB", "es-ES", "de-DE"], } resp_data = [ client.start_transcription_job(**args), # CREATED client.get_transcription_job(TranscriptionJobName=job_name), # QUEUED client.get_transcription_job(TranscriptionJobName=job_name), # IN_PROGRESS client.list_transcription_jobs(), # IN_PROGRESS ] for resp in resp_data: assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 if "TranscriptionJob" in resp: transcription_job = resp["TranscriptionJob"] elif "TranscriptionJobSummaries" in resp: transcription_job = resp["TranscriptionJobSummaries"][1] assert "IdentifyMultipleLanguages" in transcription_job assert "LanguageCode" not in transcription_job assert "IdentifyLanguage" not in transcription_job if "TranscriptionJobStatus" in transcription_job and ( transcription_job["TranscriptionJobStatus"] == "IN_PROGRESS" or transcription_job["TranscriptionJobStatus"] == "COMPLETED" ): assert transcription_job["LanguageCodes"][0]["LanguageCode"] == "en-US" assert transcription_job["LanguageCodes"][0]["DurationInSeconds"] == 123.0 assert transcription_job["LanguageCodes"][1]["LanguageCode"] == "en-GB" assert transcription_job["LanguageCodes"][1]["DurationInSeconds"] == 321.0 assert transcription_job["IdentifiedLanguageScore"] == 0.999645948 @mock_transcribe def test_get_nonexistent_medical_transcription_job(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) with pytest.raises(client.exceptions.BadRequestException): client.get_medical_transcription_job( MedicalTranscriptionJobName="NonexistentJobName" ) @mock_transcribe def test_get_nonexistent_transcription_job(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) with pytest.raises(client.exceptions.BadRequestException): client.get_transcription_job(TranscriptionJobName="NonexistentJobName") @mock_transcribe def test_run_medical_transcription_job_with_existing_job_name(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob" args = { "MedicalTranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "OutputBucketName": "my-output-bucket", "Specialty": "PRIMARYCARE", "Type": "CONVERSATION", } resp = client.start_medical_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 with pytest.raises(client.exceptions.ConflictException): client.start_medical_transcription_job(**args) @mock_transcribe def test_run_transcription_job_with_existing_job_name(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob" args = { "TranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, } resp = client.start_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 with pytest.raises(client.exceptions.ConflictException): client.start_transcription_job(**args) @mock_transcribe def test_run_medical_transcription_job_nonexistent_vocabulary(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob3" args = { "MedicalTranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.dat"}, "OutputBucketName": "my-output-bucket", "Settings": {"VocabularyName": "NonexistentVocabulary"}, "Specialty": "PRIMARYCARE", "Type": "CONVERSATION", } with pytest.raises(client.exceptions.BadRequestException): client.start_medical_transcription_job(**args) @mock_transcribe def test_run_transcription_job_nonexistent_vocabulary(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) job_name = "MyJob3" args = { "TranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.dat"}, "OutputBucketName": "my-output-bucket", "Settings": {"VocabularyName": "NonexistentVocabulary"}, } with pytest.raises(client.exceptions.BadRequestException): client.start_transcription_job(**args) @mock_transcribe def test_list_medical_transcription_jobs(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) def run_job(index, target_status): job_name = f"Job_{index}" args = { "MedicalTranscriptionJobName": job_name, "LanguageCode": "en-US", "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "OutputBucketName": "my-output-bucket", "Specialty": "PRIMARYCARE", "Type": "CONVERSATION", } resp = client.start_medical_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # IMPLICITLY PROMOTE JOB STATUS TO QUEUED resp = client.get_medical_transcription_job( MedicalTranscriptionJobName=job_name ) # IN_PROGRESS if target_status in ["IN_PROGRESS", "COMPLETED"]: resp = client.get_medical_transcription_job( MedicalTranscriptionJobName=job_name ) # COMPLETED if target_status == "COMPLETED": resp = client.get_medical_transcription_job( MedicalTranscriptionJobName=job_name ) # Run 5 pending jobs for i in range(5): run_job(i, "PENDING") # Run 10 job to IN_PROGRESS for i in range(5, 15): run_job(i, "IN_PROGRESS") # Run 15 job to COMPLETED for i in range(15, 30): run_job(i, "COMPLETED") # List all response = client.list_medical_transcription_jobs() assert "MedicalTranscriptionJobSummaries" in response assert len(response["MedicalTranscriptionJobSummaries"]) == 30 assert "NextToken" not in response assert "Status" not in response # List IN_PROGRESS response = client.list_medical_transcription_jobs(Status="IN_PROGRESS") assert "MedicalTranscriptionJobSummaries" in response assert len(response["MedicalTranscriptionJobSummaries"]) == 10 assert "NextToken" not in response assert "Status" in response assert response["Status"] == "IN_PROGRESS" # List JobName contains "8" response = client.list_medical_transcription_jobs(JobNameContains="8") assert "MedicalTranscriptionJobSummaries" in response assert len(response["MedicalTranscriptionJobSummaries"]) == 3 assert "NextToken" not in response assert "Status" not in response # Pagination by 11 response = client.list_medical_transcription_jobs(MaxResults=11) assert "MedicalTranscriptionJobSummaries" in response assert len(response["MedicalTranscriptionJobSummaries"]) == 11 assert "NextToken" in response assert "Status" not in response response = client.list_medical_transcription_jobs( NextToken=response["NextToken"], MaxResults=11 ) assert "MedicalTranscriptionJobSummaries" in response assert len(response["MedicalTranscriptionJobSummaries"]) == 11 assert "NextToken" in response response = client.list_medical_transcription_jobs( NextToken=response["NextToken"], MaxResults=11 ) assert "MedicalTranscriptionJobSummaries" in response assert len(response["MedicalTranscriptionJobSummaries"]) == 8 assert "NextToken" not in response @mock_transcribe def test_list_transcription_jobs(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) def run_job(index, target_status): job_name = f"Job_{index}" args = { "TranscriptionJobName": job_name, "Media": {"MediaFileUri": "s3://my-bucket/my-media-file.wav"}, "OutputBucketName": "my-output-bucket", "IdentifyLanguage": True, } resp = client.start_transcription_job(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # IMPLICITLY PROMOTE JOB STATUS TO QUEUED resp = client.get_transcription_job(TranscriptionJobName=job_name) # IN_PROGRESS if target_status in ["IN_PROGRESS", "COMPLETED"]: resp = client.get_transcription_job(TranscriptionJobName=job_name) # COMPLETED if target_status == "COMPLETED": resp = client.get_transcription_job(TranscriptionJobName=job_name) # Run 5 pending jobs for i in range(5): run_job(i, "PENDING") # Run 10 job to IN_PROGRESS for i in range(5, 15): run_job(i, "IN_PROGRESS") # Run 15 job to COMPLETED for i in range(15, 30): run_job(i, "COMPLETED") # List all response = client.list_transcription_jobs() assert "TranscriptionJobSummaries" in response assert len(response["TranscriptionJobSummaries"]) == 30 assert "NextToken" not in response assert "Status" not in response # List IN_PROGRESS response = client.list_transcription_jobs(Status="IN_PROGRESS") assert "TranscriptionJobSummaries" in response assert len(response["TranscriptionJobSummaries"]) == 10 assert "NextToken" not in response assert "Status" in response assert response["Status"] == "IN_PROGRESS" # List JobName contains "8" response = client.list_transcription_jobs(JobNameContains="8") assert "TranscriptionJobSummaries" in response assert len(response["TranscriptionJobSummaries"]) == 3 assert "NextToken" not in response assert "Status" not in response # Pagination by 11 response = client.list_transcription_jobs(MaxResults=11) assert "TranscriptionJobSummaries" in response assert len(response["TranscriptionJobSummaries"]) == 11 assert "NextToken" in response assert "Status" not in response response = client.list_transcription_jobs( NextToken=response["NextToken"], MaxResults=11 ) assert "TranscriptionJobSummaries" in response assert len(response["TranscriptionJobSummaries"]) == 11 assert "NextToken" in response response = client.list_transcription_jobs( NextToken=response["NextToken"], MaxResults=11 ) assert "TranscriptionJobSummaries" in response assert len(response["TranscriptionJobSummaries"]) == 8 assert "NextToken" not in response @mock_transcribe def test_create_medical_vocabulary(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyVocabulary" resp = client.create_medical_vocabulary( VocabularyName=vocabulary_name, LanguageCode="en-US", VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", ) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # PENDING resp = client.get_medical_vocabulary(VocabularyName=vocabulary_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 assert resp["VocabularyName"] == vocabulary_name assert resp["LanguageCode"] == "en-US" assert resp["VocabularyState"] == "PENDING" assert "LastModifiedTime" in resp assert "FailureReason" not in resp assert vocabulary_name in resp["DownloadUri"] # IN_PROGRESS resp = client.get_medical_vocabulary(VocabularyName=vocabulary_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 assert resp["VocabularyState"] == "READY" # Delete client.delete_medical_vocabulary(VocabularyName=vocabulary_name) with pytest.raises(client.exceptions.BadRequestException): client.get_medical_vocabulary(VocabularyName=vocabulary_name) @mock_transcribe def test_create_vocabulary(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyVocabulary" resp = client.create_vocabulary( VocabularyName=vocabulary_name, LanguageCode="en-US", VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", ) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # PENDING resp = client.get_vocabulary(VocabularyName=vocabulary_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 assert resp["VocabularyName"] == vocabulary_name assert resp["LanguageCode"] == "en-US" assert resp["VocabularyState"] == "PENDING" assert "LastModifiedTime" in resp assert "FailureReason" not in resp assert vocabulary_name in resp["DownloadUri"] # IN_PROGRESS resp = client.get_vocabulary(VocabularyName=vocabulary_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 assert resp["VocabularyState"] == "READY" # Delete client.delete_vocabulary(VocabularyName=vocabulary_name) with pytest.raises(client.exceptions.BadRequestException): client.get_vocabulary(VocabularyName=vocabulary_name) # Create another vocabulary with Phrases client.create_vocabulary( VocabularyName=vocabulary_name, LanguageCode="en-US", Phrases=["moto", "is", "awesome"], ) resp = client.get_vocabulary(VocabularyName=vocabulary_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 assert resp["VocabularyName"] == vocabulary_name assert resp["LanguageCode"] == "en-US" assert resp["VocabularyState"] == "PENDING" assert vocabulary_name in resp["DownloadUri"] assert ( f"https://s3.{region_name}.amazonaws.com/aws-transcribe-dictionary-model-{region_name}-prod" ) in resp["DownloadUri"] # IN_PROGRESS resp = client.get_vocabulary(VocabularyName=vocabulary_name) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 assert resp["VocabularyState"] == "READY" @mock_transcribe def test_list_vocabularies(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) def create_vocab(index, target_status): vocabulary_name = f"Vocab_{index}" args = { "VocabularyName": vocabulary_name, "LanguageCode": "en-US", "Phrases": ["moto", "is", "awesome"], } resp = client.create_vocabulary(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # Forward to "PENDING" resp = client.get_vocabulary(VocabularyName=vocabulary_name) # READY if target_status == "READY": resp = client.get_vocabulary(VocabularyName=vocabulary_name) # Run 5 pending jobs for i in range(5): create_vocab(i, "PENDING") # Run 10 job to IN_PROGRESS for i in range(5, 15): create_vocab(i, "READY") # List all response = client.list_vocabularies() assert "Vocabularies" in response assert len(response["Vocabularies"]) == 15 assert "NextToken" not in response assert "ResponseMetadata" in response # List PENDING response = client.list_vocabularies(StateEquals="PENDING") assert "Vocabularies" in response assert len(response["Vocabularies"]) == 5 assert "NextToken" not in response assert "ResponseMetadata" in response # List READY response = client.list_vocabularies(StateEquals="READY") assert "Vocabularies" in response assert len(response["Vocabularies"]) == 10 assert "NextToken" not in response assert "ResponseMetadata" in response # List VocabularyName contains "8" response = client.list_vocabularies(NameContains="8") assert "Vocabularies" in response assert len(response["Vocabularies"]) == 1 assert "NextToken" not in response assert "ResponseMetadata" in response # Pagination by 3 response = client.list_vocabularies(MaxResults=3) assert "Vocabularies" in response assert len(response["Vocabularies"]) == 3 assert "NextToken" in response assert "ResponseMetadata" in response response = client.list_vocabularies(NextToken=response["NextToken"], MaxResults=3) assert "Vocabularies" in response assert len(response["Vocabularies"]) == 3 assert "NextToken" in response assert "ResponseMetadata" in response response = client.list_vocabularies(NextToken=response["NextToken"], MaxResults=30) assert "Vocabularies" in response assert len(response["Vocabularies"]) == 9 assert "NextToken" not in response assert "ResponseMetadata" in response client.delete_vocabulary(VocabularyName="Vocab_5") response = client.list_vocabularies() assert len(response["Vocabularies"]) == 14 @mock_transcribe def test_list_medical_vocabularies(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) def create_vocab(index, target_status): vocabulary_name = f"Vocab_{index}" resp = client.create_medical_vocabulary( VocabularyName=vocabulary_name, LanguageCode="en-US", VocabularyFileUri="https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", ) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 # Forward to "PENDING" resp = client.get_medical_vocabulary(VocabularyName=vocabulary_name) # READY if target_status == "READY": resp = client.get_medical_vocabulary(VocabularyName=vocabulary_name) # Run 5 pending jobs for i in range(5): create_vocab(i, "PENDING") # Run 10 job to IN_PROGRESS for i in range(5, 15): create_vocab(i, "READY") # List all response = client.list_medical_vocabularies() assert "Vocabularies" in response assert len(response["Vocabularies"]) == 15 assert "NextToken" not in response assert "ResponseMetadata" in response # List PENDING response = client.list_medical_vocabularies(StateEquals="PENDING") assert "Vocabularies" in response assert len(response["Vocabularies"]) == 5 assert "NextToken" not in response assert "ResponseMetadata" in response # List READY response = client.list_medical_vocabularies(StateEquals="READY") assert "Vocabularies" in response assert len(response["Vocabularies"]) == 10 assert "NextToken" not in response assert "ResponseMetadata" in response # List VocabularyName contains "8" response = client.list_medical_vocabularies(NameContains="8") assert "Vocabularies" in response assert len(response["Vocabularies"]) == 1 assert "NextToken" not in response assert "ResponseMetadata" in response # Pagination by 3 response = client.list_medical_vocabularies(MaxResults=3) assert "Vocabularies" in response assert len(response["Vocabularies"]) == 3 assert "NextToken" in response assert "ResponseMetadata" in response response = client.list_medical_vocabularies( NextToken=response["NextToken"], MaxResults=3 ) assert "Vocabularies" in response assert len(response["Vocabularies"]) == 3 assert "NextToken" in response assert "ResponseMetadata" in response response = client.list_medical_vocabularies( NextToken=response["NextToken"], MaxResults=30 ) assert "Vocabularies" in response assert len(response["Vocabularies"]) == 9 assert "NextToken" not in response assert "ResponseMetadata" in response client.delete_medical_vocabulary(VocabularyName="Vocab_5") response = client.list_medical_vocabularies() assert len(response["Vocabularies"]) == 14 @mock_transcribe def test_get_nonexistent_medical_vocabulary(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) with pytest.raises(client.exceptions.BadRequestException): client.get_medical_vocabulary(VocabularyName="NonexistentVocabularyName") @mock_transcribe def test_get_nonexistent_vocabulary(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) with pytest.raises(client.exceptions.BadRequestException): client.get_vocabulary(VocabularyName="NonexistentVocabularyName") @mock_transcribe def test_create_medical_vocabulary_with_existing_vocabulary_name(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyVocabulary" args = { "VocabularyName": vocabulary_name, "LanguageCode": "en-US", "VocabularyFileUri": "https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", } resp = client.create_medical_vocabulary(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 with pytest.raises(client.exceptions.ConflictException): client.create_medical_vocabulary(**args) @mock_transcribe def test_create_vocabulary_with_existing_vocabulary_name(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyVocabulary" args = { "VocabularyName": vocabulary_name, "LanguageCode": "en-US", "VocabularyFileUri": "https://s3.us-east-1.amazonaws.com/AWSDOC-EXAMPLE-BUCKET/vocab.txt", } resp = client.create_vocabulary(**args) assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200 with pytest.raises(client.exceptions.ConflictException): client.create_vocabulary(**args) @mock_transcribe def test_create_vocabulary_with_bad_request(): region_name = "us-east-1" client = boto3.client("transcribe", region_name=region_name) vocabulary_name = "MyVocabulary" args = { "VocabularyName": vocabulary_name, "LanguageCode": "en-US", } with pytest.raises(client.exceptions.BadRequestException): client.create_vocabulary(**args) args = { "VocabularyName": vocabulary_name, "Phrases": [], "LanguageCode": "en-US", } with pytest.raises(client.exceptions.BadRequestException): client.create_vocabulary(**args)