Nearly finished Polly implementation

This commit is contained in:
Terry Cain 2017-09-26 16:46:18 +01:00
parent b17136e36c
commit fcacecbef0
12 changed files with 520 additions and 3 deletions

View File

@ -100,6 +100,8 @@ It gets even better! Moto isn't just for Python code and it isn't just for S3. L
|------------------------------------------------------------------------------|
| KMS | @mock_kms | basic endpoints done |
|------------------------------------------------------------------------------|
| Polly | @mock_polly | all endpoints done |
|------------------------------------------------------------------------------|
| RDS | @mock_rds | core endpoints done |
|------------------------------------------------------------------------------|
| RDS2 | @mock_rds2 | core endpoints done |

View File

@ -22,10 +22,11 @@ from .elbv2 import mock_elbv2 # flake8: noqa
from .emr import mock_emr, mock_emr_deprecated # flake8: noqa
from .events import mock_events # flake8: noqa
from .glacier import mock_glacier, mock_glacier_deprecated # flake8: noqa
from .opsworks import mock_opsworks, mock_opsworks_deprecated # flake8: noqa
from .iam import mock_iam, mock_iam_deprecated # flake8: noqa
from .kinesis import mock_kinesis, mock_kinesis_deprecated # flake8: noqa
from .kms import mock_kms, mock_kms_deprecated # flake8: noqa
from .opsworks import mock_opsworks, mock_opsworks_deprecated # flake8: noqa
from .polly import mock_polly # flake8: noqa
from .rds import mock_rds, mock_rds_deprecated # flake8: noqa
from .rds2 import mock_rds2, mock_rds2_deprecated # flake8: noqa
from .redshift import mock_redshift, mock_redshift_deprecated # flake8: noqa

View File

@ -23,6 +23,7 @@ from moto.instance_metadata import instance_metadata_backends
from moto.kinesis import kinesis_backends
from moto.kms import kms_backends
from moto.opsworks import opsworks_backends
from moto.polly import polly_backends
from moto.rds2 import rds2_backends
from moto.redshift import redshift_backends
from moto.route53 import route53_backends
@ -54,9 +55,10 @@ BACKENDS = {
'iam': iam_backends,
'moto_api': moto_api_backends,
'instance_metadata': instance_metadata_backends,
'opsworks': opsworks_backends,
'kinesis': kinesis_backends,
'kms': kms_backends,
'opsworks': opsworks_backends,
'polly': polly_backends,
'redshift': redshift_backends,
'rds': rds2_backends,
's3': s3_backends,

6
moto/polly/__init__.py Normal file
View File

@ -0,0 +1,6 @@
from __future__ import unicode_literals
from .models import polly_backends
from ..core.models import base_decorator
polly_backend = polly_backends['us-east-1']
mock_polly = base_decorator(polly_backends)

114
moto/polly/models.py Normal file
View File

@ -0,0 +1,114 @@
from __future__ import unicode_literals
from xml.etree import ElementTree as ET
import datetime
import boto3
from moto.core import BaseBackend, BaseModel
from .resources import VOICE_DATA
from .utils import make_arn_for_lexicon
DEFAULT_ACCOUNT_ID = 123456789012
class Lexicon(BaseModel):
def __init__(self, name, content, region_name):
self.name = name
self.content = content
self.size = 0
self.alphabet = None
self.last_modified = None
self.language_code = None
self.lexemes_count = 0
self.arn = make_arn_for_lexicon(DEFAULT_ACCOUNT_ID, name, region_name)
self.update()
def update(self, content=None):
if content is not None:
self.content = content
# Probably a very naive approach, but it'll do for now.
try:
root = ET.fromstring(self.content)
self.size = len(self.content)
self.last_modified = int((datetime.datetime.now() -
datetime.datetime(1970, 1, 1)).total_seconds())
self.lexemes_count = len(root.findall('.'))
for key, value in root.attrib.items():
if key.endswith('alphabet'):
self.alphabet = value
elif key.endswith('lang'):
self.language_code = value
except Exception as err:
raise ValueError('Failure parsing XML: {0}'.format(err))
def to_dict(self):
return {
'Attributes': {
'Alphabet': self.alphabet,
'LanguageCode': self.language_code,
'LastModified': self.last_modified,
'LexemesCount': self.lexemes_count,
'LexiconArn': self.arn,
'Size': self.size
}
}
def __repr__(self):
return '<Lexicon {0}>'.format(self.name)
class PollyBackend(BaseBackend):
def __init__(self, region_name=None):
super(PollyBackend, self).__init__()
self.region_name = region_name
self._lexicons = {}
def reset(self):
region_name = self.region_name
self.__dict__ = {}
self.__init__(region_name)
def describe_voices(self, language_code, next_token):
if language_code is None:
return VOICE_DATA
return [item for item in VOICE_DATA if item['LanguageCode'] == language_code]
def delete_lexicon(self, name):
# implement here
del self._lexicons[name]
def get_lexicon(self, name):
# Raises KeyError
return self._lexicons[name]
def list_lexicons(self, next_token):
result = []
for name, lexicon in self._lexicons.items():
lexicon_dict = lexicon.to_dict()
lexicon_dict['Name'] = name
result.append(lexicon_dict)
return result
def put_lexicon(self, name, content):
# If lexicon content is bad, it will raise ValueError
if name in self._lexicons:
# Regenerated all the stats from the XML
# but keeps the ARN
self._lexicons.update(content)
else:
lexicon = Lexicon(name, content, region_name=self.region_name)
self._lexicons[name] = lexicon
available_regions = boto3.session.Session().get_available_regions("polly")
polly_backends = {region: PollyBackend(region_name=region) for region in available_regions}

61
moto/polly/resources.py Normal file
View File

@ -0,0 +1,61 @@
VOICE_DATA = [
{'Id': 'Joanna', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Female', 'Name': 'Joanna'},
{'Id': 'Mizuki', 'LanguageCode': 'ja-JP', 'LanguageName': 'Japanese', 'Gender': 'Female', 'Name': 'Mizuki'},
{'Id': 'Filiz', 'LanguageCode': 'tr-TR', 'LanguageName': 'Turkish', 'Gender': 'Female', 'Name': 'Filiz'},
{'Id': 'Astrid', 'LanguageCode': 'sv-SE', 'LanguageName': 'Swedish', 'Gender': 'Female', 'Name': 'Astrid'},
{'Id': 'Tatyana', 'LanguageCode': 'ru-RU', 'LanguageName': 'Russian', 'Gender': 'Female', 'Name': 'Tatyana'},
{'Id': 'Maxim', 'LanguageCode': 'ru-RU', 'LanguageName': 'Russian', 'Gender': 'Male', 'Name': 'Maxim'},
{'Id': 'Carmen', 'LanguageCode': 'ro-RO', 'LanguageName': 'Romanian', 'Gender': 'Female', 'Name': 'Carmen'},
{'Id': 'Ines', 'LanguageCode': 'pt-PT', 'LanguageName': 'Portuguese', 'Gender': 'Female', 'Name': 'Inês'},
{'Id': 'Cristiano', 'LanguageCode': 'pt-PT', 'LanguageName': 'Portuguese', 'Gender': 'Male', 'Name': 'Cristiano'},
{'Id': 'Vitoria', 'LanguageCode': 'pt-BR', 'LanguageName': 'Brazilian Portuguese', 'Gender': 'Female', 'Name': 'Vitória'},
{'Id': 'Ricardo', 'LanguageCode': 'pt-BR', 'LanguageName': 'Brazilian Portuguese', 'Gender': 'Male', 'Name': 'Ricardo'},
{'Id': 'Maja', 'LanguageCode': 'pl-PL', 'LanguageName': 'Polish', 'Gender': 'Female', 'Name': 'Maja'},
{'Id': 'Jan', 'LanguageCode': 'pl-PL', 'LanguageName': 'Polish', 'Gender': 'Male', 'Name': 'Jan'},
{'Id': 'Ewa', 'LanguageCode': 'pl-PL', 'LanguageName': 'Polish', 'Gender': 'Female', 'Name': 'Ewa'},
{'Id': 'Ruben', 'LanguageCode': 'nl-NL', 'LanguageName': 'Dutch', 'Gender': 'Male', 'Name': 'Ruben'},
{'Id': 'Lotte', 'LanguageCode': 'nl-NL', 'LanguageName': 'Dutch', 'Gender': 'Female', 'Name': 'Lotte'},
{'Id': 'Liv', 'LanguageCode': 'nb-NO', 'LanguageName': 'Norwegian', 'Gender': 'Female', 'Name': 'Liv'},
{'Id': 'Giorgio', 'LanguageCode': 'it-IT', 'LanguageName': 'Italian', 'Gender': 'Male', 'Name': 'Giorgio'},
{'Id': 'Carla', 'LanguageCode': 'it-IT', 'LanguageName': 'Italian', 'Gender': 'Female', 'Name': 'Carla'},
{'Id': 'Karl', 'LanguageCode': 'is-IS', 'LanguageName': 'Icelandic', 'Gender': 'Male', 'Name': 'Karl'},
{'Id': 'Dora', 'LanguageCode': 'is-IS', 'LanguageName': 'Icelandic', 'Gender': 'Female', 'Name': 'Dóra'},
{'Id': 'Mathieu', 'LanguageCode': 'fr-FR', 'LanguageName': 'French', 'Gender': 'Male', 'Name': 'Mathieu'},
{'Id': 'Celine', 'LanguageCode': 'fr-FR', 'LanguageName': 'French', 'Gender': 'Female', 'Name': 'Céline'},
{'Id': 'Chantal', 'LanguageCode': 'fr-CA', 'LanguageName': 'Canadian French', 'Gender': 'Female', 'Name': 'Chantal'},
{'Id': 'Penelope', 'LanguageCode': 'es-US', 'LanguageName': 'US Spanish', 'Gender': 'Female', 'Name': 'Penélope'},
{'Id': 'Miguel', 'LanguageCode': 'es-US', 'LanguageName': 'US Spanish', 'Gender': 'Male', 'Name': 'Miguel'},
{'Id': 'Enrique', 'LanguageCode': 'es-ES', 'LanguageName': 'Castilian Spanish', 'Gender': 'Male', 'Name': 'Enrique'},
{'Id': 'Conchita', 'LanguageCode': 'es-ES', 'LanguageName': 'Castilian Spanish', 'Gender': 'Female', 'Name': 'Conchita'},
{'Id': 'Geraint', 'LanguageCode': 'en-GB-WLS', 'LanguageName': 'Welsh English', 'Gender': 'Male', 'Name': 'Geraint'},
{'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Female', 'Name': 'Salli'},
{'Id': 'Kimberly', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Female', 'Name': 'Kimberly'},
{'Id': 'Kendra', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Female', 'Name': 'Kendra'},
{'Id': 'Justin', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Male', 'Name': 'Justin'},
{'Id': 'Joey', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Male', 'Name': 'Joey'},
{'Id': 'Ivy', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Gender': 'Female', 'Name': 'Ivy'},
{'Id': 'Raveena', 'LanguageCode': 'en-IN', 'LanguageName': 'Indian English', 'Gender': 'Female', 'Name': 'Raveena'},
{'Id': 'Emma', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Gender': 'Female', 'Name': 'Emma'},
{'Id': 'Brian', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Gender': 'Male', 'Name': 'Brian'},
{'Id': 'Amy', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Gender': 'Female', 'Name': 'Amy'},
{'Id': 'Russell', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English', 'Gender': 'Male', 'Name': 'Russell'},
{'Id': 'Nicole', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English', 'Gender': 'Female', 'Name': 'Nicole'},
{'Id': 'Vicki', 'LanguageCode': 'de-DE', 'LanguageName': 'German', 'Gender': 'Female', 'Name': 'Vicki'},
{'Id': 'Marlene', 'LanguageCode': 'de-DE', 'LanguageName': 'German', 'Gender': 'Female', 'Name': 'Marlene'},
{'Id': 'Hans', 'LanguageCode': 'de-DE', 'LanguageName': 'German', 'Gender': 'Male', 'Name': 'Hans'},
{'Id': 'Naja', 'LanguageCode': 'da-DK', 'LanguageName': 'Danish', 'Gender': 'Female', 'Name': 'Naja'},
{'Id': 'Mads', 'LanguageCode': 'da-DK', 'LanguageName': 'Danish', 'Gender': 'Male', 'Name': 'Mads'},
{'Id': 'Gwyneth', 'LanguageCode': 'cy-GB', 'LanguageName': 'Welsh', 'Gender': 'Female', 'Name': 'Gwyneth'},
{'Id': 'Jacek', 'LanguageCode': 'pl-PL', 'LanguageName': 'Polish', 'Gender': 'Male', 'Name': 'Jacek'}
]
# {...} is also shorthand set syntax
LANGUAGE_CODES = {'cy-GB', 'da-DK', 'de-DE', 'en-AU', 'en-GB', 'en-GB-WLS', 'en-IN', 'en-US', 'es-ES', 'es-US',
'fr-CA', 'fr-FR', 'is-IS', 'it-IT', 'ja-JP', 'nb-NO', 'nl-NL', 'pl-PL', 'pt-BR', 'pt-PT', 'ro-RO',
'ru-RU', 'sv-SE', 'tr-TR'}
VOICE_IDS = {'Geraint', 'Gwyneth', 'Mads', 'Naja', 'Hans', 'Marlene', 'Nicole', 'Russell', 'Amy', 'Brian', 'Emma',
'Raveena', 'Ivy', 'Joanna', 'Joey', 'Justin', 'Kendra', 'Kimberly', 'Salli', 'Conchita', 'Enrique',
'Miguel', 'Penelope', 'Chantal', 'Celine', 'Mathieu', 'Dora', 'Karl', 'Carla', 'Giorgio', 'Mizuki',
'Liv', 'Lotte', 'Ruben', 'Ewa', 'Jacek', 'Jan', 'Maja', 'Ricardo', 'Vitoria', 'Cristiano', 'Ines',
'Carmen', 'Maxim', 'Tatyana', 'Astrid', 'Filiz'}

188
moto/polly/responses.py Normal file
View File

@ -0,0 +1,188 @@
from __future__ import unicode_literals
import json
import re
from six.moves.urllib.parse import urlsplit
from moto.core.responses import BaseResponse
from .models import polly_backends
from .resources import LANGUAGE_CODES, VOICE_IDS
LEXICON_NAME_REGEX = re.compile(r'^[0-9A-Za-z]{1,20}$')
class PollyResponse(BaseResponse):
@property
def polly_backend(self):
return polly_backends[self.region]
@property
def json(self):
if not hasattr(self, '_json'):
self._json = json.loads(self.body)
return self._json
def _error(self, code, message):
return json.dumps({'__type': code, 'message': message}), dict(status=400)
def _get_action(self):
# Amazon is now naming things /v1/api_name
url_parts = urlsplit(self.uri).path.lstrip('/').split('/')
# [0] = 'v1'
return url_parts[1]
# DescribeVoices
def voices(self):
language_code = self._get_param('LanguageCode')
next_token = self._get_param('NextToken')
if language_code is not None and language_code not in LANGUAGE_CODES:
msg = "1 validation error detected: Value '{0}' at 'languageCode' failed to satisfy constraint: " \
"Member must satisfy enum value set: [{1}]".format(language_code, ', '.join(LANGUAGE_CODES))
return msg, dict(status=400)
voices = self.polly_backend.describe_voices(language_code, next_token)
return json.dumps({'Voices': voices})
def lexicons(self):
# Dish out requests based on methods
# anything after the /v1/lexicons/
args = urlsplit(self.uri).path.lstrip('/').split('/')[2:]
if self.method == 'GET':
if len(args) == 0:
return self._get_lexicons_list()
else:
return self._get_lexicon(*args)
elif self.method == 'PUT':
return self._put_lexicons(*args)
elif self.method == 'DELETE':
return self._delete_lexicon(*args)
return self._error('InvalidAction', 'Bad route')
# PutLexicon
def _put_lexicons(self, lexicon_name):
if LEXICON_NAME_REGEX.match(lexicon_name) is None:
return self._error('InvalidParameterValue', 'Lexicon name must match [0-9A-Za-z]{1,20}')
if 'Content' not in self.json:
return self._error('MissingParameter', 'Content is missing from the body')
self.polly_backend.put_lexicon(lexicon_name, self.json['Content'])
return ''
# ListLexicons
def _get_lexicons_list(self):
next_token = self._get_param('NextToken')
result = {
'Lexicons': self.polly_backend.list_lexicons(next_token)
}
return json.dumps(result)
# GetLexicon
def _get_lexicon(self, lexicon_name):
try:
lexicon = self.polly_backend.get_lexicon(lexicon_name)
except KeyError:
return self._error('LexiconNotFoundException', 'Lexicon not found')
result = {
'Lexicon': {
'Name': lexicon_name,
'Content': lexicon.content
},
'LexiconAttributes': lexicon.to_dict()['Attributes']
}
return json.dumps(result)
# DeleteLexicon
def _delete_lexicon(self, lexicon_name):
try:
self.polly_backend.delete_lexicon(lexicon_name)
except KeyError:
return self._error('LexiconNotFoundException', 'Lexicon not found')
return ''
# SynthesizeSpeech
def speech(self):
# Sanity check params
args = {
'lexicon_names': None,
'sample_rate': 22050,
'speech_marks': None,
'text': None,
'text_type': 'text'
}
if 'LexiconNames' in self.json:
for lex in self.json['LexiconNames']:
try:
self.polly_backend.get_lexicon(lex)
except KeyError:
return self._error('LexiconNotFoundException', 'Lexicon not found')
args['lexicon_names'] = self.json['LexiconNames']
if 'OutputFormat' not in self.json:
return self._error('LexiconNotFoundException', 'Lexicon not found')
if self.json['OutputFormat'] not in ('json', 'mp3', 'ogg_vorbis', 'pcm'):
return self._error('LexiconNotFoundException', 'Lexicon not found')
args['output_format'] = self.json['OutputFormat']
if 'SampleRate' in self.json:
sample_rate = int(self.json['SampleRate'])
if sample_rate not in (8000, 16000, 22050):
return self._error('InvalidSampleRateException', 'The specified sample rate is not valid.')
args['sample_rate'] = sample_rate
if 'SpeechMarkTypes' in self.json:
for value in self.json['SpeechMarkTypes']:
if value not in ('sentance', 'ssml', 'viseme', 'word'):
return self._error('LexiconNotFoundException', 'Lexicon not found')
args['speech_marks'] = self.json['SpeechMarkTypes']
if 'Text' not in self.json:
return self._error('LexiconNotFoundException', 'Lexicon not found')
args['text'] = self.json['Text']
if 'TextType' in self.json:
if self.json['TextType'] not in ('ssml', 'text'):
return self._error('LexiconNotFoundException', 'Lexicon not found')
args['text_type'] = self.json['TextType']
if 'VoiceId' not in self.json:
return self._error('LexiconNotFoundException', 'Lexicon not found')
if self.json['VoiceId'] not in VOICE_IDS:
return self._error('LexiconNotFoundException', 'Lexicon not found')
args['voice_id'] = self.json['VoiceId']
# More validation
if len(args['text']) > 3000:
return self._error('TextLengthExceededException', 'Text too long')
if args['speech_marks'] is not None and args['output_format'] != 'json':
return self._error('MarksNotSupportedForFormatException', 'OutputFormat must be json')
if args['speech_marks'] is not None and args['text_type'] == 'text':
return self._error('SsmlMarksNotSupportedForTextTypeException', 'TextType must be ssml')
content_type = 'audio/json'
if args['output_format'] == 'mp3':
content_type = 'audio/mpeg'
elif args['output_format'] == 'ogg_vorbis':
content_type = 'audio/ogg'
elif args['output_format'] == 'pcm':
content_type = 'audio/pcm'
headers = {'Content-Type': content_type}
return '\x00\x00\x00\x00\x00\x00\x00\x00', headers

13
moto/polly/urls.py Normal file
View File

@ -0,0 +1,13 @@
from __future__ import unicode_literals
from .responses import PollyResponse
url_bases = [
"https?://polly.(.+).amazonaws.com",
]
url_paths = {
'{0}/v1/voices': PollyResponse.dispatch,
'{0}/v1/lexicons/.+': PollyResponse.dispatch,
'{0}/v1/lexicons': PollyResponse.dispatch,
'{0}/v1/speech': PollyResponse.dispatch,
}

5
moto/polly/utils.py Normal file
View File

@ -0,0 +1,5 @@
from __future__ import unicode_literals
def make_arn_for_lexicon(account_id, name, region_name):
return "arn:aws:polly:{0}:{1}:lexicon/{2}".format(region_name, account_id, name)

View File

@ -11,5 +11,5 @@ class {{ service_class }}Response(BaseResponse):
# add methods from here
# add teampltes from here
# add templates from here

View File

@ -0,0 +1,109 @@
from __future__ import unicode_literals
from botocore.exceptions import ClientError
import boto3
import sure # noqa
from moto import mock_polly
# Polly only available in a few regions
DEFAULT_REGION = 'eu-west-1'
LEXICON_XML = """<?xml version="1.0" encoding="UTF-8"?>
<lexicon version="1.0"
xmlns="http://www.w3.org/2005/01/pronunciation-lexicon"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2005/01/pronunciation-lexicon
http://www.w3.org/TR/2007/CR-pronunciation-lexicon-20071212/pls.xsd"
alphabet="ipa"
xml:lang="en-US">
<lexeme>
<grapheme>W3C</grapheme>
<alias>World Wide Web Consortium</alias>
</lexeme>
</lexicon>"""
@mock_polly
def test_describe_voices():
client = boto3.client('polly', region_name=DEFAULT_REGION)
resp = client.describe_voices()
len(resp['Voices']).should.be.greater_than(1)
resp = client.describe_voices(LanguageCode='en-GB')
len(resp['Voices']).should.equal(3)
try:
client.describe_voices(LanguageCode='SOME_LANGUAGE')
except ClientError as err:
err.response['Error']['Code'].should.equal('400')
else:
raise RuntimeError('Should of raised an exception')
@mock_polly
def test_put_list_lexicon():
client = boto3.client('polly', region_name=DEFAULT_REGION)
# Return nothing
client.put_lexicon(
Name='test',
Content=LEXICON_XML
)
resp = client.list_lexicons()
len(resp['Lexicons']).should.equal(1)
@mock_polly
def test_put_get_lexicon():
client = boto3.client('polly', region_name=DEFAULT_REGION)
# Return nothing
client.put_lexicon(
Name='test',
Content=LEXICON_XML
)
resp = client.get_lexicon(Name='test')
resp.should.contain('Lexicon')
resp.should.contain('LexiconAttributes')
@mock_polly
def test_put_lexicon_bad_name():
client = boto3.client('polly', region_name=DEFAULT_REGION)
try:
client.put_lexicon(
Name='test-invalid',
Content=LEXICON_XML
)
except ClientError as err:
err.response['Error']['Code'].should.equal('InvalidParameterValue')
else:
raise RuntimeError('Should of raised an exception')
@mock_polly
def test_synthesize_speech():
client = boto3.client('polly', region_name=DEFAULT_REGION)
# Return nothing
client.put_lexicon(
Name='test',
Content=LEXICON_XML
)
a = client.synthesize_speech(
LexiconNames=['test'],
OutputFormat='pcm',
SampleRate='16000',
Text='test1234',
TextType='text',
VoiceId='Astrid'
)
# TODO check content type
print()
# Todo expand synthesize speech tests for bad config

View File

@ -0,0 +1,16 @@
from __future__ import unicode_literals
import sure # noqa
import moto.server as server
from moto import mock_polly
'''
Test the different server responses
'''
@mock_polly
def test_polly_list():
backend = server.create_backend_app("polly")
test_client = backend.test_client()
# do test