Better EMR coverage and boto3 request/response handling
This revision includes: - A handler for requests for which content-type is JSON (from boto3). - A decorator (generate_boto3_response) to convert XML responses to JSON (for boto3). This way, existing response templates for boto can be shared for generating boto3 response. - Utility class/functions to use botocore's service specification data (accessible under botocore.data) for type casting, from query parameters to Python objects and XML to JSON. - Updates to response handlers/models to cover more EMR end points and mockable parameters
This commit is contained in:
parent
4157abe8de
commit
7cd404808b
@ -3,3 +3,8 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
# python 2.6 or earlier, use backport
|
# python 2.6 or earlier, use backport
|
||||||
from ordereddict import OrderedDict # flake8: noqa
|
from ordereddict import OrderedDict # flake8: noqa
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urlparse import urlparse # flake8: noqa
|
||||||
|
except ImportError:
|
||||||
|
from urllib.parse import urlparse # flake8: noqa
|
||||||
|
@ -8,7 +8,10 @@ from jinja2 import Environment, DictLoader, TemplateNotFound
|
|||||||
import six
|
import six
|
||||||
from six.moves.urllib.parse import parse_qs, urlparse
|
from six.moves.urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
|
import xmltodict
|
||||||
|
from pkg_resources import resource_filename
|
||||||
from werkzeug.exceptions import HTTPException
|
from werkzeug.exceptions import HTTPException
|
||||||
|
from moto.compat import OrderedDict
|
||||||
from moto.core.utils import camelcase_to_underscores, method_names_from_class
|
from moto.core.utils import camelcase_to_underscores, method_names_from_class
|
||||||
|
|
||||||
|
|
||||||
@ -90,6 +93,7 @@ class BaseResponse(_TemplateEnvironmentMixin):
|
|||||||
|
|
||||||
default_region = 'us-east-1'
|
default_region = 'us-east-1'
|
||||||
region_regex = r'\.(.+?)\.amazonaws\.com'
|
region_regex = r'\.(.+?)\.amazonaws\.com'
|
||||||
|
aws_service_spec = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def dispatch(cls, *args, **kwargs):
|
def dispatch(cls, *args, **kwargs):
|
||||||
@ -115,7 +119,20 @@ class BaseResponse(_TemplateEnvironmentMixin):
|
|||||||
if not querystring:
|
if not querystring:
|
||||||
querystring.update(parse_qs(urlparse(full_url).query, keep_blank_values=True))
|
querystring.update(parse_qs(urlparse(full_url).query, keep_blank_values=True))
|
||||||
if not querystring:
|
if not querystring:
|
||||||
querystring.update(parse_qs(self.body, keep_blank_values=True))
|
if 'json' in request.headers.get('content-type', []) and self.aws_service_spec:
|
||||||
|
if isinstance(self.body, six.binary_type):
|
||||||
|
decoded = json.loads(self.body.decode('utf-8'))
|
||||||
|
else:
|
||||||
|
decoded = json.loads(self.body)
|
||||||
|
|
||||||
|
target = request.headers.get('x-amz-target') or request.headers.get('X-Amz-Target')
|
||||||
|
service, method = target.split('.')
|
||||||
|
input_spec = self.aws_service_spec.input_spec(method)
|
||||||
|
flat = flatten_json_request_body('', decoded, input_spec)
|
||||||
|
for key, value in flat.items():
|
||||||
|
querystring[key] = [value]
|
||||||
|
else:
|
||||||
|
querystring.update(parse_qs(self.body, keep_blank_values=True))
|
||||||
if not querystring:
|
if not querystring:
|
||||||
querystring.update(headers)
|
querystring.update(headers)
|
||||||
|
|
||||||
@ -125,15 +142,19 @@ class BaseResponse(_TemplateEnvironmentMixin):
|
|||||||
self.path = urlparse(full_url).path
|
self.path = urlparse(full_url).path
|
||||||
self.querystring = querystring
|
self.querystring = querystring
|
||||||
self.method = request.method
|
self.method = request.method
|
||||||
region = re.search(self.region_regex, full_url)
|
self.region = self.get_region_from_url(full_url)
|
||||||
if region:
|
|
||||||
self.region = region.group(1)
|
|
||||||
else:
|
|
||||||
self.region = self.default_region
|
|
||||||
|
|
||||||
self.headers = request.headers
|
self.headers = request.headers
|
||||||
self.response_headers = headers
|
self.response_headers = headers
|
||||||
|
|
||||||
|
def get_region_from_url(self, full_url):
|
||||||
|
match = re.search(self.region_regex, full_url)
|
||||||
|
if match:
|
||||||
|
region = match.group(1)
|
||||||
|
else:
|
||||||
|
region = self.default_region
|
||||||
|
return region
|
||||||
|
|
||||||
def _dispatch(self, request, full_url, headers):
|
def _dispatch(self, request, full_url, headers):
|
||||||
self.setup_class(request, full_url, headers)
|
self.setup_class(request, full_url, headers)
|
||||||
return self.call_action()
|
return self.call_action()
|
||||||
@ -164,21 +185,26 @@ class BaseResponse(_TemplateEnvironmentMixin):
|
|||||||
return status, headers, body
|
return status, headers, body
|
||||||
raise NotImplementedError("The {0} action has not been implemented".format(action))
|
raise NotImplementedError("The {0} action has not been implemented".format(action))
|
||||||
|
|
||||||
def _get_param(self, param_name):
|
def _get_param(self, param_name, if_none=None):
|
||||||
return self.querystring.get(param_name, [None])[0]
|
val = self.querystring.get(param_name)
|
||||||
|
if val is not None:
|
||||||
|
return val[0]
|
||||||
|
return if_none
|
||||||
|
|
||||||
def _get_int_param(self, param_name):
|
def _get_int_param(self, param_name, if_none=None):
|
||||||
val = self._get_param(param_name)
|
val = self._get_param(param_name)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return int(val)
|
return int(val)
|
||||||
|
return if_none
|
||||||
|
|
||||||
def _get_bool_param(self, param_name):
|
def _get_bool_param(self, param_name, if_none=None):
|
||||||
val = self._get_param(param_name)
|
val = self._get_param(param_name)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
if val.lower() == 'true':
|
if val.lower() == 'true':
|
||||||
return True
|
return True
|
||||||
elif val.lower() == 'false':
|
elif val.lower() == 'false':
|
||||||
return False
|
return False
|
||||||
|
return if_none
|
||||||
|
|
||||||
def _get_multi_param(self, param_prefix):
|
def _get_multi_param(self, param_prefix):
|
||||||
"""
|
"""
|
||||||
@ -257,6 +283,28 @@ class BaseResponse(_TemplateEnvironmentMixin):
|
|||||||
param_index += 1
|
param_index += 1
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def _get_map_prefix(self, param_prefix):
|
||||||
|
results = {}
|
||||||
|
param_index = 1
|
||||||
|
while 1:
|
||||||
|
index_prefix = '{0}.{1}.'.format(param_prefix, param_index)
|
||||||
|
|
||||||
|
k, v = None, None
|
||||||
|
for key, value in self.querystring.items():
|
||||||
|
if key.startswith(index_prefix):
|
||||||
|
if key.endswith('.key'):
|
||||||
|
k = value[0]
|
||||||
|
elif key.endswith('.value'):
|
||||||
|
v = value[0]
|
||||||
|
|
||||||
|
if not (k and v):
|
||||||
|
break
|
||||||
|
|
||||||
|
results[k] = v
|
||||||
|
param_index += 1
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def request_json(self):
|
def request_json(self):
|
||||||
return 'JSON' in self.querystring.get('ContentType', [])
|
return 'JSON' in self.querystring.get('ContentType', [])
|
||||||
@ -299,3 +347,227 @@ def metadata_response(request, full_url, headers):
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError("The {0} metadata path has not been implemented".format(path))
|
raise NotImplementedError("The {0} metadata path has not been implemented".format(path))
|
||||||
return 200, headers, result
|
return 200, headers, result
|
||||||
|
|
||||||
|
|
||||||
|
class _RecursiveDictRef(object):
|
||||||
|
"""Store a recursive reference to dict."""
|
||||||
|
def __init__(self):
|
||||||
|
self.key = None
|
||||||
|
self.dic = {}
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '{!r}'.format(self.dic)
|
||||||
|
|
||||||
|
def __getattr__(self, key):
|
||||||
|
return self.dic.__getattr__(key)
|
||||||
|
|
||||||
|
def set_reference(self, key, dic):
|
||||||
|
"""Set the RecursiveDictRef object to keep reference to dict object
|
||||||
|
(dic) at the key.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.key = key
|
||||||
|
self.dic = dic
|
||||||
|
|
||||||
|
|
||||||
|
class AWSServiceSpec(object):
|
||||||
|
"""Parse data model from botocore. This is used to recover type info
|
||||||
|
for fields in AWS API XML response.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
self.path = resource_filename('botocore', path)
|
||||||
|
with open(self.path) as f:
|
||||||
|
spec = json.load(f)
|
||||||
|
self.metadata = spec['metadata']
|
||||||
|
self.operations = spec['operations']
|
||||||
|
self.shapes = spec['shapes']
|
||||||
|
|
||||||
|
def input_spec(self, operation):
|
||||||
|
try:
|
||||||
|
op = self.operations[operation]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError('Invalid operation: {}'.format(operation))
|
||||||
|
if 'input' not in op:
|
||||||
|
return {}
|
||||||
|
shape = self.shapes[op['input']['shape']]
|
||||||
|
return self._expand(shape)
|
||||||
|
|
||||||
|
def output_spec(self, operation):
|
||||||
|
"""Produce a JSON with a valid API response syntax for operation, but
|
||||||
|
with type information. Each node represented by a key has the
|
||||||
|
value containing field type, e.g.,
|
||||||
|
|
||||||
|
output_spec["SomeBooleanNode"] => {"type": "boolean"}
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
op = self.operations[operation]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError('Invalid operation: {}'.format(operation))
|
||||||
|
if 'output' not in op:
|
||||||
|
return {}
|
||||||
|
shape = self.shapes[op['output']['shape']]
|
||||||
|
return self._expand(shape)
|
||||||
|
|
||||||
|
def _expand(self, shape):
|
||||||
|
def expand(dic, seen=None):
|
||||||
|
seen = seen or {}
|
||||||
|
if dic['type'] == 'structure':
|
||||||
|
nodes = {}
|
||||||
|
for k, v in dic['members'].items():
|
||||||
|
seen_till_here = dict(seen)
|
||||||
|
if k in seen_till_here:
|
||||||
|
nodes[k] = seen_till_here[k]
|
||||||
|
continue
|
||||||
|
seen_till_here[k] = _RecursiveDictRef()
|
||||||
|
nodes[k] = expand(self.shapes[v['shape']], seen_till_here)
|
||||||
|
seen_till_here[k].set_reference(k, nodes[k])
|
||||||
|
nodes['type'] = 'structure'
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
elif dic['type'] == 'list':
|
||||||
|
seen_till_here = dict(seen)
|
||||||
|
shape = dic['member']['shape']
|
||||||
|
if shape in seen_till_here:
|
||||||
|
return seen_till_here[shape]
|
||||||
|
seen_till_here[shape] = _RecursiveDictRef()
|
||||||
|
expanded = expand(self.shapes[shape], seen_till_here)
|
||||||
|
seen_till_here[shape].set_reference(shape, expanded)
|
||||||
|
return {'type': 'list', 'member': expanded}
|
||||||
|
|
||||||
|
elif dic['type'] == 'map':
|
||||||
|
seen_till_here = dict(seen)
|
||||||
|
node = {'type': 'map'}
|
||||||
|
|
||||||
|
if 'shape' in dic['key']:
|
||||||
|
shape = dic['key']['shape']
|
||||||
|
seen_till_here[shape] = _RecursiveDictRef()
|
||||||
|
node['key'] = expand(self.shapes[shape], seen_till_here)
|
||||||
|
seen_till_here[shape].set_reference(shape, node['key'])
|
||||||
|
else:
|
||||||
|
node['key'] = dic['key']['type']
|
||||||
|
|
||||||
|
if 'shape' in dic['value']:
|
||||||
|
shape = dic['value']['shape']
|
||||||
|
seen_till_here[shape] = _RecursiveDictRef()
|
||||||
|
node['value'] = expand(self.shapes[shape], seen_till_here)
|
||||||
|
seen_till_here[shape].set_reference(shape, node['value'])
|
||||||
|
else:
|
||||||
|
node['value'] = dic['value']['type']
|
||||||
|
|
||||||
|
return node
|
||||||
|
|
||||||
|
else:
|
||||||
|
return {'type': dic['type']}
|
||||||
|
|
||||||
|
return expand(shape)
|
||||||
|
|
||||||
|
|
||||||
|
def to_str(value, spec):
|
||||||
|
vtype = spec['type']
|
||||||
|
if vtype == 'boolean':
|
||||||
|
return 'true' if value else 'false'
|
||||||
|
elif vtype == 'integer':
|
||||||
|
return str(value)
|
||||||
|
elif vtype == 'string':
|
||||||
|
return str(value)
|
||||||
|
elif value is None:
|
||||||
|
return 'null'
|
||||||
|
else:
|
||||||
|
raise TypeError('Unknown type {}'.format(vtype))
|
||||||
|
|
||||||
|
|
||||||
|
def from_str(value, spec):
|
||||||
|
vtype = spec['type']
|
||||||
|
if vtype == 'boolean':
|
||||||
|
return True if value == 'true' else False
|
||||||
|
elif vtype == 'integer':
|
||||||
|
return int(value)
|
||||||
|
elif vtype == 'float':
|
||||||
|
return float(value)
|
||||||
|
elif vtype == 'timestamp':
|
||||||
|
return value
|
||||||
|
elif vtype == 'string':
|
||||||
|
return value
|
||||||
|
raise TypeError('Unknown type {}'.format(vtype))
|
||||||
|
|
||||||
|
|
||||||
|
def flatten_json_request_body(prefix, dict_body, spec):
|
||||||
|
"""Convert a JSON request body into query params."""
|
||||||
|
if len(spec) == 1 and 'type' in spec:
|
||||||
|
return {prefix: to_str(dict_body, spec)}
|
||||||
|
|
||||||
|
flat = {}
|
||||||
|
for key, value in dict_body.items():
|
||||||
|
node_type = spec[key]['type']
|
||||||
|
if node_type == 'list':
|
||||||
|
for idx, v in enumerate(value, 1):
|
||||||
|
pref = key + '.member.' + str(idx)
|
||||||
|
flat.update(flatten_json_request_body(pref, v, spec[key]['member']))
|
||||||
|
elif node_type == 'map':
|
||||||
|
for idx, (k, v) in enumerate(value.items(), 1):
|
||||||
|
pref = key + '.entry.' + str(idx)
|
||||||
|
flat.update(flatten_json_request_body(pref + '.key', k, spec[key]['key']))
|
||||||
|
flat.update(flatten_json_request_body(pref + '.value', v, spec[key]['value']))
|
||||||
|
else:
|
||||||
|
flat.update(flatten_json_request_body(key, value, spec[key]))
|
||||||
|
|
||||||
|
if prefix:
|
||||||
|
prefix = prefix + '.'
|
||||||
|
return dict((prefix + k, v) for k, v in flat.items())
|
||||||
|
|
||||||
|
|
||||||
|
def xml_to_json_response(service_spec, operation, xml, result_node=None):
|
||||||
|
"""Convert rendered XML response to JSON for use with boto3."""
|
||||||
|
|
||||||
|
def transform(value, spec):
|
||||||
|
"""Apply transformations to make the output JSON comply with the
|
||||||
|
expected form. This function applies:
|
||||||
|
|
||||||
|
(1) Type cast to nodes with "type" property (e.g., 'true' to
|
||||||
|
True). XML field values are all in text so this step is
|
||||||
|
necessary to convert it to valid JSON objects.
|
||||||
|
|
||||||
|
(2) Squashes "member" nodes to lists.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if len(spec) == 1:
|
||||||
|
return from_str(value, spec)
|
||||||
|
|
||||||
|
od = OrderedDict()
|
||||||
|
for k, v in value.items():
|
||||||
|
if k.startswith('@') or v is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if spec[k]['type'] == 'list':
|
||||||
|
if len(spec[k]['member']) == 1:
|
||||||
|
if isinstance(v['member'], list):
|
||||||
|
od[k] = transform(v['member'], spec[k]['member'])
|
||||||
|
else:
|
||||||
|
od[k] = [transform(v['member'], spec[k]['member'])]
|
||||||
|
elif isinstance(v['member'], list):
|
||||||
|
od[k] = [transform(o, spec[k]['member']) for o in v['member']]
|
||||||
|
elif isinstance(v['member'], OrderedDict):
|
||||||
|
od[k] = [transform(v['member'], spec[k]['member'])]
|
||||||
|
else:
|
||||||
|
raise ValueError('Malformatted input')
|
||||||
|
elif spec[k]['type'] == 'map':
|
||||||
|
key = from_str(v['entry']['key'], spec[k]['key'])
|
||||||
|
val = from_str(v['entry']['value'], spec[k]['value'])
|
||||||
|
od[k] = {key: val}
|
||||||
|
else:
|
||||||
|
od[k] = transform(v, spec[k])
|
||||||
|
return od
|
||||||
|
|
||||||
|
dic = xmltodict.parse(xml)
|
||||||
|
output_spec = service_spec.output_spec(operation)
|
||||||
|
try:
|
||||||
|
for k in (result_node or (operation + 'Response', operation + 'Result')):
|
||||||
|
dic = dic[k]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return transform(dic, output_spec)
|
||||||
|
return None
|
||||||
|
@ -1,47 +1,227 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import boto.emr
|
import boto.emr
|
||||||
|
import pytz
|
||||||
from moto.core import BaseBackend
|
from moto.core import BaseBackend
|
||||||
|
|
||||||
from .utils import random_instance_group_id, random_job_id
|
from .utils import random_instance_group_id, random_cluster_id, random_step_id
|
||||||
|
|
||||||
DEFAULT_JOB_FLOW_ROLE = 'EMRJobflowDefault'
|
|
||||||
|
class FakeApplication(object):
|
||||||
|
def __init__(self, name, version, args=None, additional_info=None):
|
||||||
|
self.additional_info = additional_info or {}
|
||||||
|
self.args = args or []
|
||||||
|
self.name = name
|
||||||
|
self.version = version
|
||||||
|
|
||||||
|
|
||||||
|
class FakeBootstrapAction(object):
|
||||||
|
def __init__(self, args, name, script_path):
|
||||||
|
self.args = args or []
|
||||||
|
self.name = name
|
||||||
|
self.script_path = script_path
|
||||||
|
|
||||||
|
|
||||||
class FakeInstanceGroup(object):
|
class FakeInstanceGroup(object):
|
||||||
def __init__(self, id, instance_count, instance_role, instance_type, market, name, bid_price=None):
|
def __init__(self, instance_count, instance_role, instance_type, market, name,
|
||||||
self.id = id
|
id=None, bid_price=None):
|
||||||
|
self.id = id or random_instance_group_id()
|
||||||
|
|
||||||
|
self.bid_price = bid_price
|
||||||
|
self.market = market
|
||||||
|
self.name = name
|
||||||
self.num_instances = instance_count
|
self.num_instances = instance_count
|
||||||
self.role = instance_role
|
self.role = instance_role
|
||||||
self.type = instance_type
|
self.type = instance_type
|
||||||
self.market = market
|
|
||||||
self.name = name
|
self.creation_datetime = datetime.now(pytz.utc)
|
||||||
self.bid_price = bid_price
|
self.start_datetime = datetime.now(pytz.utc)
|
||||||
|
self.ready_datetime = datetime.now(pytz.utc)
|
||||||
|
self.end_datetime = None
|
||||||
|
self.state = 'RUNNING'
|
||||||
|
|
||||||
def set_instance_count(self, instance_count):
|
def set_instance_count(self, instance_count):
|
||||||
self.num_instances = instance_count
|
self.num_instances = instance_count
|
||||||
|
|
||||||
|
|
||||||
class Cluster(object):
|
class FakeStep(object):
|
||||||
def __init__(self, id, name, availability_zone, ec2_key_name, subnet_id,
|
def __init__(self,
|
||||||
ec2_iam_profile, log_uri):
|
state,
|
||||||
self.id = id
|
name='',
|
||||||
|
jar='',
|
||||||
|
args=None,
|
||||||
|
properties=None,
|
||||||
|
action_on_failure='TERMINATE_CLUSTER'):
|
||||||
|
self.id = random_step_id()
|
||||||
|
|
||||||
|
self.action_on_failure = action_on_failure
|
||||||
|
self.args = args or []
|
||||||
self.name = name
|
self.name = name
|
||||||
|
self.jar = jar
|
||||||
|
self.properties = properties or {}
|
||||||
|
|
||||||
|
self.creation_datetime = datetime.now(pytz.utc)
|
||||||
|
self.end_datetime = None
|
||||||
|
self.ready_datetime = None
|
||||||
|
self.start_datetime = None
|
||||||
|
self.state = state
|
||||||
|
|
||||||
|
|
||||||
|
class FakeCluster(object):
|
||||||
|
def __init__(self,
|
||||||
|
emr_backend,
|
||||||
|
name,
|
||||||
|
log_uri,
|
||||||
|
job_flow_role,
|
||||||
|
service_role,
|
||||||
|
steps,
|
||||||
|
instance_attrs,
|
||||||
|
bootstrap_actions=None,
|
||||||
|
configurations=None,
|
||||||
|
cluster_id=None,
|
||||||
|
visible_to_all_users='false',
|
||||||
|
release_label=None,
|
||||||
|
requested_ami_version=None,
|
||||||
|
running_ami_version=None):
|
||||||
|
self.id = cluster_id or random_cluster_id()
|
||||||
|
emr_backend.clusters[self.id] = self
|
||||||
|
self.emr_backend = emr_backend
|
||||||
|
|
||||||
self.applications = []
|
self.applications = []
|
||||||
self.auto_terminate = "false"
|
|
||||||
self.availability_zone = availability_zone
|
self.bootstrap_actions = []
|
||||||
self.subnet_id = subnet_id
|
for bootstrap_action in (bootstrap_actions or []):
|
||||||
self.ec2_iam_profile = ec2_iam_profile
|
self.add_bootstrap_action(bootstrap_action)
|
||||||
self.log_uri = log_uri
|
|
||||||
self.master_public_dns_name = ""
|
self.configurations = configurations or []
|
||||||
self.normalized_instance_hours = 0
|
|
||||||
self.requested_ami_version = "2.4.2"
|
|
||||||
self.running_ami_version = "2.4.2"
|
|
||||||
self.service_role = "my-service-role"
|
|
||||||
self.state = "RUNNING"
|
|
||||||
self.tags = {}
|
self.tags = {}
|
||||||
self.termination_protected = "false"
|
|
||||||
self.visible_to_all_users = "false"
|
self.log_uri = log_uri
|
||||||
|
self.name = name
|
||||||
|
self.normalized_instance_hours = 0
|
||||||
|
|
||||||
|
self.steps = []
|
||||||
|
self.add_steps(steps)
|
||||||
|
|
||||||
|
self.set_visibility(visible_to_all_users)
|
||||||
|
|
||||||
|
self.instance_group_ids = []
|
||||||
|
self.master_instance_group_id = None
|
||||||
|
self.core_instance_group_id = None
|
||||||
|
if 'master_instance_type' in instance_attrs and instance_attrs['master_instance_type']:
|
||||||
|
self.emr_backend.add_instance_groups(
|
||||||
|
self.id,
|
||||||
|
[{'instance_count': 1,
|
||||||
|
'instance_role': 'MASTER',
|
||||||
|
'instance_type': instance_attrs['master_instance_type'],
|
||||||
|
'market': 'ON_DEMAND',
|
||||||
|
'name': 'master'}])
|
||||||
|
if 'slave_instance_type' in instance_attrs and instance_attrs['slave_instance_type']:
|
||||||
|
self.emr_backend.add_instance_groups(
|
||||||
|
self.id,
|
||||||
|
[{'instance_count': instance_attrs['instance_count'] - 1,
|
||||||
|
'instance_role': 'CORE',
|
||||||
|
'instance_type': instance_attrs['slave_instance_type'],
|
||||||
|
'market': 'ON_DEMAND',
|
||||||
|
'name': 'slave'}])
|
||||||
|
self.additional_master_security_groups = instance_attrs.get('additional_master_security_groups')
|
||||||
|
self.additional_slave_security_groups = instance_attrs.get('additional_slave_security_groups')
|
||||||
|
self.availability_zone = instance_attrs.get('availability_zone')
|
||||||
|
self.ec2_key_name = instance_attrs.get('ec2_key_name')
|
||||||
|
self.ec2_subnet_id = instance_attrs.get('ec2_subnet_id')
|
||||||
|
self.hadoop_version = instance_attrs.get('hadoop_version')
|
||||||
|
self.keep_job_flow_alive_when_no_steps = instance_attrs.get('keep_job_flow_alive_when_no_steps')
|
||||||
|
self.master_security_group = instance_attrs.get('emr_managed_master_security_group')
|
||||||
|
self.service_access_security_group = instance_attrs.get('service_access_security_group')
|
||||||
|
self.slave_security_group = instance_attrs.get('emr_managed_slave_security_group')
|
||||||
|
self.termination_protected = instance_attrs.get('termination_protected')
|
||||||
|
|
||||||
|
self.release_label = release_label
|
||||||
|
self.requested_ami_version = requested_ami_version
|
||||||
|
self.running_ami_version = running_ami_version
|
||||||
|
|
||||||
|
self.role = job_flow_role or 'EMRJobflowDefault'
|
||||||
|
self.service_role = service_role
|
||||||
|
|
||||||
|
self.creation_datetime = datetime.now(pytz.utc)
|
||||||
|
self.start_datetime = None
|
||||||
|
self.ready_datetime = None
|
||||||
|
self.end_datetime = None
|
||||||
|
self.state = None
|
||||||
|
|
||||||
|
self.start_cluster()
|
||||||
|
self.run_bootstrap_actions()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def instance_groups(self):
|
||||||
|
return self.emr_backend.get_instance_groups(self.instance_group_ids)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def master_instance_type(self):
|
||||||
|
return self.emr_backend.instance_groups[self.master_instance_group_id].type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def slave_instance_type(self):
|
||||||
|
return self.emr_backend.instance_groups[self.core_instance_group_id].type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def instance_count(self):
|
||||||
|
return sum(group.num_instances for group in self.instance_groups)
|
||||||
|
|
||||||
|
def start_cluster(self):
|
||||||
|
self.state = 'STARTING'
|
||||||
|
self.start_datetime = datetime.now(pytz.utc)
|
||||||
|
|
||||||
|
def run_bootstrap_actions(self):
|
||||||
|
self.state = 'BOOTSTRAPPING'
|
||||||
|
self.ready_datetime = datetime.now(pytz.utc)
|
||||||
|
self.state = 'WAITING'
|
||||||
|
if not self.steps:
|
||||||
|
if not self.keep_job_flow_alive_when_no_steps:
|
||||||
|
self.terminate()
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
self.state = 'TERMINATING'
|
||||||
|
self.end_datetime = datetime.now(pytz.utc)
|
||||||
|
self.state = 'TERMINATED'
|
||||||
|
|
||||||
|
def add_applications(self, applications):
|
||||||
|
self.applications.extend([
|
||||||
|
FakeApplication(
|
||||||
|
name=app.get('name', ''),
|
||||||
|
version=app.get('version', ''),
|
||||||
|
args=app.get('args', []),
|
||||||
|
additional_info=app.get('additiona_info', {}))
|
||||||
|
for app in applications])
|
||||||
|
|
||||||
|
def add_bootstrap_action(self, bootstrap_action):
|
||||||
|
self.bootstrap_actions.append(FakeBootstrapAction(**bootstrap_action))
|
||||||
|
|
||||||
|
def add_instance_group(self, instance_group):
|
||||||
|
if instance_group.role == 'MASTER':
|
||||||
|
if self.master_instance_group_id:
|
||||||
|
raise Exception('Cannot add another master instance group')
|
||||||
|
self.master_instance_group_id = instance_group.id
|
||||||
|
if instance_group.role == 'CORE':
|
||||||
|
if self.core_instance_group_id:
|
||||||
|
raise Exception('Cannot add another core instance group')
|
||||||
|
self.core_instance_group_id = instance_group.id
|
||||||
|
self.instance_group_ids.append(instance_group.id)
|
||||||
|
|
||||||
|
def add_steps(self, steps):
|
||||||
|
added_steps = []
|
||||||
|
for step in steps:
|
||||||
|
if self.steps:
|
||||||
|
# If we already have other steps, this one is pending
|
||||||
|
fake = FakeStep(state='PENDING', **step)
|
||||||
|
else:
|
||||||
|
fake = FakeStep(state='STARTING', **step)
|
||||||
|
self.steps.append(fake)
|
||||||
|
added_steps.append(fake)
|
||||||
|
self.state = 'RUNNING'
|
||||||
|
return added_steps
|
||||||
|
|
||||||
def add_tags(self, tags):
|
def add_tags(self, tags):
|
||||||
self.tags.update(tags)
|
self.tags.update(tags)
|
||||||
@ -50,166 +230,61 @@ class Cluster(object):
|
|||||||
for key in tag_keys:
|
for key in tag_keys:
|
||||||
self.tags.pop(key, None)
|
self.tags.pop(key, None)
|
||||||
|
|
||||||
|
|
||||||
class FakeStep(object):
|
|
||||||
def __init__(self, state, **kwargs):
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Jar': ['/home/hadoop/contrib/streaming/hadoop-streaming.jar'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.1': ['-mapper'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.2': ['s3n://elasticmapreduce/samples/wordcount/wordSplitter.py'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.3': ['-reducer'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.4': ['aggregate'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.5': ['-input'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.6': ['s3n://elasticmapreduce/samples/wordcount/input'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.7': ['-output'],
|
|
||||||
# 'Steps.member.1.HadoopJarStep.Args.member.8': ['s3n://<my output bucket>/output/wordcount_output'],
|
|
||||||
# 'Steps.member.1.ActionOnFailure': ['TERMINATE_JOB_FLOW'],
|
|
||||||
# 'Steps.member.1.Name': ['My wordcount example']}
|
|
||||||
|
|
||||||
self.action_on_failure = kwargs['action_on_failure']
|
|
||||||
self.name = kwargs['name']
|
|
||||||
self.jar = kwargs['hadoop_jar_step._jar']
|
|
||||||
self.args = []
|
|
||||||
self.state = state
|
|
||||||
|
|
||||||
arg_index = 1
|
|
||||||
while True:
|
|
||||||
arg = kwargs.get('hadoop_jar_step._args.member.{0}'.format(arg_index))
|
|
||||||
if arg:
|
|
||||||
self.args.append(arg)
|
|
||||||
arg_index += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
class FakeJobFlow(object):
|
|
||||||
def __init__(self, job_id, name, log_uri, job_flow_role, visible_to_all_users, steps, instance_attrs, emr_backend):
|
|
||||||
self.id = job_id
|
|
||||||
self.name = name
|
|
||||||
self.log_uri = log_uri
|
|
||||||
self.role = job_flow_role or DEFAULT_JOB_FLOW_ROLE
|
|
||||||
self.state = "STARTING"
|
|
||||||
self.steps = []
|
|
||||||
self.add_steps(steps)
|
|
||||||
|
|
||||||
self.initial_instance_count = instance_attrs.get('instance_count', 0)
|
|
||||||
self.initial_master_instance_type = instance_attrs.get('master_instance_type')
|
|
||||||
self.initial_slave_instance_type = instance_attrs.get('slave_instance_type')
|
|
||||||
|
|
||||||
self.set_visibility(visible_to_all_users)
|
|
||||||
self.normalized_instance_hours = 0
|
|
||||||
self.ec2_key_name = instance_attrs.get('ec2_key_name')
|
|
||||||
self.availability_zone = instance_attrs.get('placement.availability_zone')
|
|
||||||
self.subnet_id = instance_attrs.get('ec2_subnet_id')
|
|
||||||
self.keep_job_flow_alive_when_no_steps = instance_attrs.get('keep_job_flow_alive_when_no_steps')
|
|
||||||
self.termination_protected = instance_attrs.get('termination_protected')
|
|
||||||
|
|
||||||
self.instance_group_ids = []
|
|
||||||
|
|
||||||
self.emr_backend = emr_backend
|
|
||||||
|
|
||||||
def create_cluster(self):
|
|
||||||
cluster = Cluster(
|
|
||||||
id=self.id,
|
|
||||||
name=self.name,
|
|
||||||
availability_zone=self.availability_zone,
|
|
||||||
ec2_key_name=self.ec2_key_name,
|
|
||||||
subnet_id=self.subnet_id,
|
|
||||||
ec2_iam_profile=self.role,
|
|
||||||
log_uri=self.log_uri,
|
|
||||||
)
|
|
||||||
return cluster
|
|
||||||
|
|
||||||
def terminate(self):
|
|
||||||
self.state = 'TERMINATED'
|
|
||||||
|
|
||||||
def set_visibility(self, visibility):
|
|
||||||
if visibility == 'true':
|
|
||||||
self.visible_to_all_users = True
|
|
||||||
else:
|
|
||||||
self.visible_to_all_users = False
|
|
||||||
|
|
||||||
def set_termination_protection(self, value):
|
def set_termination_protection(self, value):
|
||||||
self.termination_protected = value
|
self.termination_protected = value
|
||||||
|
|
||||||
def add_steps(self, steps):
|
def set_visibility(self, visibility):
|
||||||
for index, step in enumerate(steps):
|
self.visible_to_all_users = visibility
|
||||||
if self.steps:
|
|
||||||
# If we already have other steps, this one is pending
|
|
||||||
self.steps.append(FakeStep(state='PENDING', **step))
|
|
||||||
else:
|
|
||||||
self.steps.append(FakeStep(state='STARTING', **step))
|
|
||||||
|
|
||||||
def add_instance_group(self, instance_group_id):
|
|
||||||
self.instance_group_ids.append(instance_group_id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def instance_groups(self):
|
|
||||||
return self.emr_backend.get_instance_groups(self.instance_group_ids)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def master_instance_type(self):
|
|
||||||
groups = self.instance_groups
|
|
||||||
if groups:
|
|
||||||
return groups[0].type
|
|
||||||
else:
|
|
||||||
return self.initial_master_instance_type
|
|
||||||
|
|
||||||
@property
|
|
||||||
def slave_instance_type(self):
|
|
||||||
groups = self.instance_groups
|
|
||||||
if groups:
|
|
||||||
return groups[0].type
|
|
||||||
else:
|
|
||||||
return self.initial_slave_instance_type
|
|
||||||
|
|
||||||
@property
|
|
||||||
def instance_count(self):
|
|
||||||
groups = self.instance_groups
|
|
||||||
if not groups:
|
|
||||||
# No groups,return initial instance count
|
|
||||||
return self.initial_instance_count
|
|
||||||
count = 0
|
|
||||||
for group in groups:
|
|
||||||
count += int(group.num_instances)
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
class ElasticMapReduceBackend(BaseBackend):
|
class ElasticMapReduceBackend(BaseBackend):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, region_name):
|
||||||
self.job_flows = {}
|
super(ElasticMapReduceBackend, self).__init__()
|
||||||
|
self.region_name = region_name
|
||||||
self.clusters = {}
|
self.clusters = {}
|
||||||
self.instance_groups = {}
|
self.instance_groups = {}
|
||||||
|
|
||||||
def run_job_flow(self, name, log_uri, job_flow_role, visible_to_all_users, steps, instance_attrs):
|
def reset(self):
|
||||||
job_id = random_job_id()
|
region_name = self.region_name
|
||||||
job_flow = FakeJobFlow(
|
self.__dict__ = {}
|
||||||
job_id, name, log_uri, job_flow_role, visible_to_all_users, steps, instance_attrs, self)
|
self.__init__(region_name)
|
||||||
self.job_flows[job_id] = job_flow
|
|
||||||
cluster = job_flow.create_cluster()
|
def add_applications(self, cluster_id, applications):
|
||||||
self.clusters[cluster.id] = cluster
|
cluster = self.get_cluster(cluster_id)
|
||||||
return job_flow
|
cluster.add_applications(applications)
|
||||||
|
|
||||||
|
def add_instance_groups(self, cluster_id, instance_groups):
|
||||||
|
cluster = self.clusters[cluster_id]
|
||||||
|
result_groups = []
|
||||||
|
for instance_group in instance_groups:
|
||||||
|
group = FakeInstanceGroup(**instance_group)
|
||||||
|
self.instance_groups[group.id] = group
|
||||||
|
cluster.add_instance_group(group)
|
||||||
|
result_groups.append(group)
|
||||||
|
return result_groups
|
||||||
|
|
||||||
def add_job_flow_steps(self, job_flow_id, steps):
|
def add_job_flow_steps(self, job_flow_id, steps):
|
||||||
job_flow = self.job_flows[job_flow_id]
|
cluster = self.clusters[job_flow_id]
|
||||||
job_flow.add_steps(steps)
|
steps = cluster.add_steps(steps)
|
||||||
return job_flow
|
return steps
|
||||||
|
|
||||||
|
def add_tags(self, cluster_id, tags):
|
||||||
|
cluster = self.get_cluster(cluster_id)
|
||||||
|
cluster.add_tags(tags)
|
||||||
|
|
||||||
def describe_job_flows(self, job_flow_ids=None):
|
def describe_job_flows(self, job_flow_ids=None):
|
||||||
jobs = self.job_flows.values()
|
clusters = self.clusters.values()
|
||||||
if job_flow_ids:
|
if job_flow_ids:
|
||||||
return [job for job in jobs if job.id in job_flow_ids]
|
return [cluster for cluster in clusters if cluster.id in job_flow_ids]
|
||||||
else:
|
else:
|
||||||
return jobs
|
return clusters
|
||||||
|
|
||||||
def terminate_job_flows(self, job_ids):
|
def describe_step(self, cluster_id, step_id):
|
||||||
flows = [flow for flow in self.describe_job_flows() if flow.id in job_ids]
|
cluster = self.clusters[cluster_id]
|
||||||
for flow in flows:
|
for step in cluster.steps:
|
||||||
flow.terminate()
|
if step.id == step_id:
|
||||||
return flows
|
return step
|
||||||
|
|
||||||
def list_clusters(self):
|
|
||||||
return self.clusters.values()
|
|
||||||
|
|
||||||
def get_cluster(self, cluster_id):
|
def get_cluster(self, cluster_id):
|
||||||
return self.clusters[cluster_id]
|
return self.clusters[cluster_id]
|
||||||
@ -221,43 +296,50 @@ class ElasticMapReduceBackend(BaseBackend):
|
|||||||
if group_id in instance_group_ids
|
if group_id in instance_group_ids
|
||||||
]
|
]
|
||||||
|
|
||||||
def add_instance_groups(self, job_flow_id, instance_groups):
|
def list_bootstrap_actions(self, cluster_id):
|
||||||
job_flow = self.job_flows[job_flow_id]
|
return self.clusters[cluster_id].bootstrap_actions
|
||||||
result_groups = []
|
|
||||||
for instance_group in instance_groups:
|
def list_clusters(self):
|
||||||
instance_group_id = random_instance_group_id()
|
return self.clusters.values()
|
||||||
group = FakeInstanceGroup(instance_group_id, **instance_group)
|
|
||||||
self.instance_groups[instance_group_id] = group
|
def list_instance_groups(self, cluster_id):
|
||||||
job_flow.add_instance_group(instance_group_id)
|
return self.clusters[cluster_id].instance_groups
|
||||||
result_groups.append(group)
|
|
||||||
return result_groups
|
def list_steps(self, cluster_id, step_states=None):
|
||||||
|
return self.clusters[cluster_id].steps
|
||||||
|
|
||||||
def modify_instance_groups(self, instance_groups):
|
def modify_instance_groups(self, instance_groups):
|
||||||
result_groups = []
|
result_groups = []
|
||||||
for instance_group in instance_groups:
|
for instance_group in instance_groups:
|
||||||
group = self.instance_groups[instance_group['instance_group_id']]
|
group = self.instance_groups[instance_group['instance_group_id']]
|
||||||
group.set_instance_count(instance_group['instance_count'])
|
group.set_instance_count(int(instance_group['instance_count']))
|
||||||
return result_groups
|
return result_groups
|
||||||
|
|
||||||
def set_visible_to_all_users(self, job_ids, visible_to_all_users):
|
|
||||||
for job_id in job_ids:
|
|
||||||
job = self.job_flows[job_id]
|
|
||||||
job.set_visibility(visible_to_all_users)
|
|
||||||
|
|
||||||
def set_termination_protection(self, job_ids, value):
|
|
||||||
for job_id in job_ids:
|
|
||||||
job = self.job_flows[job_id]
|
|
||||||
job.set_termination_protection(value)
|
|
||||||
|
|
||||||
def add_tags(self, cluster_id, tags):
|
|
||||||
cluster = self.get_cluster(cluster_id)
|
|
||||||
cluster.add_tags(tags)
|
|
||||||
|
|
||||||
def remove_tags(self, cluster_id, tag_keys):
|
def remove_tags(self, cluster_id, tag_keys):
|
||||||
cluster = self.get_cluster(cluster_id)
|
cluster = self.get_cluster(cluster_id)
|
||||||
cluster.remove_tags(tag_keys)
|
cluster.remove_tags(tag_keys)
|
||||||
|
|
||||||
|
def run_job_flow(self, **kwargs):
|
||||||
|
return FakeCluster(self, **kwargs)
|
||||||
|
|
||||||
|
def set_visible_to_all_users(self, job_flow_ids, visible_to_all_users):
|
||||||
|
for job_flow_id in job_flow_ids:
|
||||||
|
cluster = self.clusters[job_flow_id]
|
||||||
|
cluster.set_visibility(visible_to_all_users)
|
||||||
|
|
||||||
|
def set_termination_protection(self, job_flow_ids, value):
|
||||||
|
for job_flow_id in job_flow_ids:
|
||||||
|
cluster = self.clusters[job_flow_id]
|
||||||
|
cluster.set_termination_protection(value)
|
||||||
|
|
||||||
|
def terminate_job_flows(self, job_flow_ids):
|
||||||
|
clusters = [cluster for cluster in self.describe_job_flows()
|
||||||
|
if cluster.id in job_flow_ids]
|
||||||
|
for cluster in clusters:
|
||||||
|
cluster.terminate()
|
||||||
|
return clusters
|
||||||
|
|
||||||
|
|
||||||
emr_backends = {}
|
emr_backends = {}
|
||||||
for region in boto.emr.regions():
|
for region in boto.emr.regions():
|
||||||
emr_backends[region.name] = ElasticMapReduceBackend()
|
emr_backends[region.name] = ElasticMapReduceBackend(region.name)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
from .responses import ElasticMapReduceResponse
|
from .responses import ElasticMapReduceResponse
|
||||||
|
|
||||||
url_bases = [
|
url_bases = [
|
||||||
|
"https?://(.+).elasticmapreduce.amazonaws.com",
|
||||||
"https?://elasticmapreduce.(.+).amazonaws.com",
|
"https?://elasticmapreduce.(.+).amazonaws.com",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1,19 +1,25 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
|
||||||
def random_job_id(size=13):
|
def random_id(size=13):
|
||||||
chars = list(range(10)) + list(string.ascii_uppercase)
|
chars = list(range(10)) + list(string.ascii_uppercase)
|
||||||
job_tag = ''.join(six.text_type(random.choice(chars)) for x in range(size))
|
return ''.join(six.text_type(random.choice(chars)) for x in range(size))
|
||||||
return 'j-{0}'.format(job_tag)
|
|
||||||
|
|
||||||
|
def random_cluster_id(size=13):
|
||||||
|
return 'j-{0}'.format(random_id())
|
||||||
|
|
||||||
|
|
||||||
|
def random_step_id(size=13):
|
||||||
|
return 's-{0}'.format(random_id())
|
||||||
|
|
||||||
|
|
||||||
def random_instance_group_id(size=13):
|
def random_instance_group_id(size=13):
|
||||||
chars = list(range(10)) + list(string.ascii_uppercase)
|
return 'i-{0}'.format(random_id())
|
||||||
job_tag = ''.join(six.text_type(random.choice(chars)) for x in range(size))
|
|
||||||
return 'i-{0}'.format(job_tag)
|
|
||||||
|
|
||||||
|
|
||||||
def tags_from_query_string(querystring_dict):
|
def tags_from_query_string(querystring_dict):
|
||||||
@ -30,3 +36,18 @@ def tags_from_query_string(querystring_dict):
|
|||||||
else:
|
else:
|
||||||
response_values[tag_key] = None
|
response_values[tag_key] = None
|
||||||
return response_values
|
return response_values
|
||||||
|
|
||||||
|
|
||||||
|
def steps_from_query_string(querystring_dict):
|
||||||
|
steps = []
|
||||||
|
for step in querystring_dict:
|
||||||
|
step['jar'] = step.pop('hadoop_jar_step._jar')
|
||||||
|
step['properties'] = dict((o['Key'], o['Value']) for o in step.get('properties', []))
|
||||||
|
step['args'] = []
|
||||||
|
idx = 1
|
||||||
|
keyfmt = 'hadoop_jar_step._args.member.{0}'
|
||||||
|
while keyfmt.format(idx) in step:
|
||||||
|
step['args'].append(step.pop(keyfmt.format(idx)))
|
||||||
|
idx += 1
|
||||||
|
steps.append(step)
|
||||||
|
return steps
|
||||||
|
1
setup.py
1
setup.py
@ -10,6 +10,7 @@ install_requires = [
|
|||||||
"xmltodict",
|
"xmltodict",
|
||||||
"six",
|
"six",
|
||||||
"werkzeug",
|
"werkzeug",
|
||||||
|
"pytz"
|
||||||
]
|
]
|
||||||
|
|
||||||
extras_require = {
|
extras_require = {
|
||||||
|
73
tests/test_core/test_responses.py
Normal file
73
tests/test_core/test_responses.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import sure # noqa
|
||||||
|
|
||||||
|
from moto.core.responses import AWSServiceSpec
|
||||||
|
from moto.core.responses import flatten_json_request_body
|
||||||
|
|
||||||
|
|
||||||
|
def test_flatten_json_request_body():
|
||||||
|
spec = AWSServiceSpec('data/emr/2009-03-31/service-2.json').input_spec('RunJobFlow')
|
||||||
|
|
||||||
|
body = {
|
||||||
|
'Name': 'cluster',
|
||||||
|
'Instances': {
|
||||||
|
'Ec2KeyName': 'ec2key',
|
||||||
|
'InstanceGroups': [
|
||||||
|
{'InstanceRole': 'MASTER',
|
||||||
|
'InstanceType': 'm1.small'},
|
||||||
|
{'InstanceRole': 'CORE',
|
||||||
|
'InstanceType': 'm1.medium'},
|
||||||
|
],
|
||||||
|
'Placement': {'AvailabilityZone': 'us-east-1'},
|
||||||
|
},
|
||||||
|
'Steps': [
|
||||||
|
{'HadoopJarStep': {
|
||||||
|
'Properties': [
|
||||||
|
{'Key': 'k1', 'Value': 'v1'},
|
||||||
|
{'Key': 'k2', 'Value': 'v2'}
|
||||||
|
],
|
||||||
|
'Args': ['arg1', 'arg2']}},
|
||||||
|
],
|
||||||
|
'Configurations': [
|
||||||
|
{'Classification': 'class',
|
||||||
|
'Properties': {'propkey1': 'propkey1',
|
||||||
|
'propkey2': 'propkey2'}},
|
||||||
|
{'Classification': 'anotherclass',
|
||||||
|
'Properties': {'propkey3': 'propkey3'}},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
flat = flatten_json_request_body('', body, spec)
|
||||||
|
flat['Name'].should.equal(body['Name'])
|
||||||
|
flat['Instances.Ec2KeyName'].should.equal(body['Instances']['Ec2KeyName'])
|
||||||
|
for idx in range(2):
|
||||||
|
flat['Instances.InstanceGroups.member.' + str(idx + 1) + '.InstanceRole'].should.equal(body['Instances']['InstanceGroups'][idx]['InstanceRole'])
|
||||||
|
flat['Instances.InstanceGroups.member.' + str(idx + 1) + '.InstanceType'].should.equal(body['Instances']['InstanceGroups'][idx]['InstanceType'])
|
||||||
|
flat['Instances.Placement.AvailabilityZone'].should.equal(body['Instances']['Placement']['AvailabilityZone'])
|
||||||
|
|
||||||
|
for idx in range(1):
|
||||||
|
prefix = 'Steps.member.' + str(idx + 1) + '.HadoopJarStep'
|
||||||
|
step = body['Steps'][idx]['HadoopJarStep']
|
||||||
|
i = 0
|
||||||
|
while prefix + '.Properties.member.' + str(i + 1) + '.Key' in flat:
|
||||||
|
flat[prefix + '.Properties.member.' + str(i + 1) + '.Key'].should.equal(step['Properties'][i]['Key'])
|
||||||
|
flat[prefix + '.Properties.member.' + str(i + 1) + '.Value'].should.equal(step['Properties'][i]['Value'])
|
||||||
|
i += 1
|
||||||
|
i = 0
|
||||||
|
while prefix + '.Args.member.' + str(i + 1) in flat:
|
||||||
|
flat[prefix + '.Args.member.' + str(i + 1)].should.equal(step['Args'][i])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
for idx in range(2):
|
||||||
|
flat['Configurations.member.' + str(idx + 1) + '.Classification'].should.equal(body['Configurations'][idx]['Classification'])
|
||||||
|
|
||||||
|
props = {}
|
||||||
|
i = 1
|
||||||
|
keyfmt = 'Configurations.member.{0}.Properties.entry.{1}'
|
||||||
|
key = keyfmt.format(idx + 1, i)
|
||||||
|
while key + '.key' in flat:
|
||||||
|
props[flat[key + '.key']] = flat[key + '.value']
|
||||||
|
i += 1
|
||||||
|
key = keyfmt.format(idx + 1, i)
|
||||||
|
props.should.equal(body['Configurations'][idx]['Properties'])
|
@ -1,197 +1,111 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import boto
|
import boto
|
||||||
|
from boto.emr.bootstrap_action import BootstrapAction
|
||||||
from boto.emr.instance_group import InstanceGroup
|
from boto.emr.instance_group import InstanceGroup
|
||||||
|
|
||||||
from boto.emr.step import StreamingStep
|
from boto.emr.step import StreamingStep
|
||||||
|
|
||||||
|
import six
|
||||||
import sure # noqa
|
import sure # noqa
|
||||||
|
|
||||||
from moto import mock_emr
|
from moto import mock_emr
|
||||||
from tests.helpers import requires_boto_gte
|
from tests.helpers import requires_boto_gte
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
run_jobflow_args = dict(
|
||||||
def test_create_job_flow_in_multiple_regions():
|
job_flow_role='EMR_EC2_DefaultRole',
|
||||||
step = StreamingStep(
|
keep_alive=True,
|
||||||
name='My wordcount example',
|
log_uri='s3://some_bucket/jobflow_logs',
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
master_instance_type='c1.medium',
|
||||||
reducer='aggregate',
|
name='My jobflow',
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input',
|
num_instances=2,
|
||||||
output='s3n://output_bucket/output/wordcount_output'
|
service_role='EMR_DefaultRole',
|
||||||
)
|
slave_instance_type='c1.medium',
|
||||||
|
)
|
||||||
|
|
||||||
west1_conn = boto.emr.connect_to_region('us-east-1')
|
|
||||||
west1_job_id = west1_conn.run_jobflow(
|
|
||||||
name='us-east-1',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
master_instance_type='m1.medium',
|
|
||||||
slave_instance_type='m1.small',
|
|
||||||
steps=[step],
|
|
||||||
)
|
|
||||||
|
|
||||||
west2_conn = boto.emr.connect_to_region('eu-west-1')
|
input_instance_groups = [
|
||||||
west2_job_id = west2_conn.run_jobflow(
|
InstanceGroup(1, 'MASTER', 'c1.medium', 'ON_DEMAND', 'master'),
|
||||||
name='eu-west-1',
|
InstanceGroup(3, 'CORE', 'c1.medium', 'ON_DEMAND', 'core'),
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
InstanceGroup(6, 'TASK', 'c1.large', 'SPOT', 'task-1', '0.07'),
|
||||||
master_instance_type='m1.medium',
|
InstanceGroup(10, 'TASK', 'c1.xlarge', 'SPOT', 'task-2', '0.05'),
|
||||||
slave_instance_type='m1.small',
|
]
|
||||||
steps=[step],
|
|
||||||
)
|
|
||||||
|
|
||||||
west1_job_flow = west1_conn.describe_jobflow(west1_job_id)
|
|
||||||
west1_job_flow.name.should.equal('us-east-1')
|
|
||||||
west2_job_flow = west2_conn.describe_jobflow(west2_job_id)
|
|
||||||
west2_job_flow.name.should.equal('eu-west-1')
|
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_create_job_flow():
|
def test_describe_cluster():
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
|
args = run_jobflow_args.copy()
|
||||||
step1 = StreamingStep(
|
args.update(dict(
|
||||||
name='My wordcount example',
|
api_params={
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
'Applications.member.1.Name': 'Spark',
|
||||||
reducer='aggregate',
|
'Applications.member.1.Version': '2.4.2',
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input',
|
'Configurations.member.1.Classification': 'yarn-site',
|
||||||
output='s3n://output_bucket/output/wordcount_output'
|
'Configurations.member.1.Properties.entry.1.key': 'someproperty',
|
||||||
)
|
'Configurations.member.1.Properties.entry.1.value': 'somevalue',
|
||||||
|
'Instances.EmrManagedMasterSecurityGroup': 'master-security-group',
|
||||||
step2 = StreamingStep(
|
'Instances.Ec2SubnetId': 'subnet-8be41cec',
|
||||||
name='My wordcount example2',
|
},
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
|
availability_zone='us-east-2b',
|
||||||
reducer='aggregate',
|
ec2_keyname='mykey',
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input2',
|
job_flow_role='EMR_EC2_DefaultRole',
|
||||||
output='s3n://output_bucket/output/wordcount_output2'
|
keep_alive=False,
|
||||||
)
|
|
||||||
|
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
log_uri='s3://some_bucket/jobflow_logs',
|
||||||
master_instance_type='m1.medium',
|
|
||||||
slave_instance_type='m1.small',
|
|
||||||
steps=[step1, step2],
|
|
||||||
)
|
|
||||||
|
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
|
||||||
job_flow.state.should.equal('STARTING')
|
|
||||||
job_flow.jobflowid.should.equal(job_id)
|
|
||||||
job_flow.name.should.equal('My jobflow')
|
|
||||||
job_flow.masterinstancetype.should.equal('m1.medium')
|
|
||||||
job_flow.slaveinstancetype.should.equal('m1.small')
|
|
||||||
job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')
|
|
||||||
job_flow.visibletoallusers.should.equal('False')
|
|
||||||
int(job_flow.normalizedinstancehours).should.equal(0)
|
|
||||||
job_step = job_flow.steps[0]
|
|
||||||
job_step.name.should.equal('My wordcount example')
|
|
||||||
job_step.state.should.equal('STARTING')
|
|
||||||
args = [arg.value for arg in job_step.args]
|
|
||||||
args.should.equal([
|
|
||||||
'-mapper',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
|
||||||
'-reducer',
|
|
||||||
'aggregate',
|
|
||||||
'-input',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/input',
|
|
||||||
'-output',
|
|
||||||
's3n://output_bucket/output/wordcount_output',
|
|
||||||
])
|
|
||||||
|
|
||||||
job_step2 = job_flow.steps[1]
|
|
||||||
job_step2.name.should.equal('My wordcount example2')
|
|
||||||
job_step2.state.should.equal('PENDING')
|
|
||||||
args = [arg.value for arg in job_step2.args]
|
|
||||||
args.should.equal([
|
|
||||||
'-mapper',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
|
|
||||||
'-reducer',
|
|
||||||
'aggregate',
|
|
||||||
'-input',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/input2',
|
|
||||||
'-output',
|
|
||||||
's3n://output_bucket/output/wordcount_output2',
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
@requires_boto_gte("2.8")
|
|
||||||
@mock_emr
|
|
||||||
def test_create_job_flow_with_new_params():
|
|
||||||
# Test that run_jobflow works with newer params
|
|
||||||
conn = boto.connect_emr()
|
|
||||||
|
|
||||||
conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
name='My jobflow',
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
service_role='EMR_DefaultRole',
|
||||||
master_instance_type='m1.medium',
|
|
||||||
slave_instance_type='m1.small',
|
|
||||||
job_flow_role='some-role-arn',
|
|
||||||
steps=[],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@requires_boto_gte("2.8")
|
|
||||||
@mock_emr
|
|
||||||
def test_create_job_flow_visible_to_all_users():
|
|
||||||
conn = boto.connect_emr()
|
|
||||||
|
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[],
|
|
||||||
visible_to_all_users=True,
|
visible_to_all_users=True,
|
||||||
)
|
))
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
cluster_id = conn.run_jobflow(**args)
|
||||||
job_flow.visibletoallusers.should.equal('True')
|
input_tags = {'tag1': 'val1', 'tag2': 'val2'}
|
||||||
|
conn.add_tags(cluster_id, input_tags)
|
||||||
|
|
||||||
|
cluster = conn.describe_cluster(cluster_id)
|
||||||
|
cluster.applications[0].name.should.equal('Spark')
|
||||||
|
cluster.applications[0].version.should.equal('2.4.2')
|
||||||
|
cluster.autoterminate.should.equal('true')
|
||||||
|
|
||||||
@requires_boto_gte("2.8")
|
# configurations appear not be supplied as attributes?
|
||||||
@mock_emr
|
|
||||||
def test_create_job_flow_with_instance_groups():
|
|
||||||
conn = boto.connect_emr()
|
|
||||||
|
|
||||||
instance_groups = [InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07'),
|
attrs = cluster.ec2instanceattributes
|
||||||
InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')]
|
# AdditionalMasterSecurityGroups
|
||||||
job_id = conn.run_jobflow(
|
# AdditionalSlaveSecurityGroups
|
||||||
name='My jobflow',
|
attrs.ec2availabilityzone.should.equal(args['availability_zone'])
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
attrs.ec2keyname.should.equal(args['ec2_keyname'])
|
||||||
steps=[],
|
attrs.ec2subnetid.should.equal(args['api_params']['Instances.Ec2SubnetId'])
|
||||||
instance_groups=instance_groups
|
# EmrManagedMasterSecurityGroups
|
||||||
)
|
# EmrManagedSlaveSecurityGroups
|
||||||
|
attrs.iaminstanceprofile.should.equal(args['job_flow_role'])
|
||||||
|
# ServiceAccessSecurityGroup
|
||||||
|
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
cluster.id.should.equal(cluster_id)
|
||||||
int(job_flow.instancecount).should.equal(12)
|
cluster.loguri.should.equal(args['log_uri'])
|
||||||
instance_group = job_flow.instancegroups[0]
|
cluster.masterpublicdnsname.should.be.a(six.string_types)
|
||||||
int(instance_group.instancerunningcount).should.equal(6)
|
cluster.name.should.equal(args['name'])
|
||||||
|
int(cluster.normalizedinstancehours).should.equal(0)
|
||||||
|
# cluster.release_label
|
||||||
|
cluster.shouldnt.have.property('requestedamiversion')
|
||||||
|
cluster.runningamiversion.should.equal('1.0.0')
|
||||||
|
# cluster.securityconfiguration
|
||||||
|
cluster.servicerole.should.equal(args['service_role'])
|
||||||
|
|
||||||
|
cluster.status.state.should.equal('TERMINATED')
|
||||||
|
cluster.status.statechangereason.message.should.be.a(six.string_types)
|
||||||
|
cluster.status.statechangereason.code.should.be.a(six.string_types)
|
||||||
|
cluster.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# cluster.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||||
|
# cluster.status.timeline.readydatetime.should.be.a(six.string_types)
|
||||||
|
|
||||||
|
dict((item.key, item.value) for item in cluster.tags).should.equal(input_tags)
|
||||||
|
|
||||||
|
cluster.terminationprotected.should.equal('false')
|
||||||
|
cluster.visibletoallusers.should.equal('true')
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_terminate_job_flow():
|
def test_describe_jobflows():
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
job_id = conn.run_jobflow(
|
job1_id = conn.run_jobflow(**run_jobflow_args)
|
||||||
name='My jobflow',
|
job2_id = conn.run_jobflow(**run_jobflow_args)
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[]
|
|
||||||
)
|
|
||||||
|
|
||||||
flow = conn.describe_jobflows()[0]
|
|
||||||
flow.state.should.equal('STARTING')
|
|
||||||
conn.terminate_jobflow(job_id)
|
|
||||||
flow = conn.describe_jobflows()[0]
|
|
||||||
flow.state.should.equal('TERMINATED')
|
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
|
||||||
def test_describe_job_flows():
|
|
||||||
conn = boto.connect_emr()
|
|
||||||
job1_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[]
|
|
||||||
)
|
|
||||||
job2_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[]
|
|
||||||
)
|
|
||||||
|
|
||||||
jobs = conn.describe_jobflows()
|
jobs = conn.describe_jobflows()
|
||||||
jobs.should.have.length_of(2)
|
jobs.should.have.length_of(2)
|
||||||
@ -205,252 +119,454 @@ def test_describe_job_flows():
|
|||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_add_steps_to_flow():
|
def test_describe_jobflow():
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
|
args = run_jobflow_args.copy()
|
||||||
|
args.update(dict(
|
||||||
|
ami_version='3.8.1',
|
||||||
|
api_params={
|
||||||
|
#'Applications.member.1.Name': 'Spark',
|
||||||
|
#'Applications.member.1.Version': '2.4.2',
|
||||||
|
#'Configurations.member.1.Classification': 'yarn-site',
|
||||||
|
#'Configurations.member.1.Properties.entry.1.key': 'someproperty',
|
||||||
|
#'Configurations.member.1.Properties.entry.1.value': 'somevalue',
|
||||||
|
#'Instances.EmrManagedMasterSecurityGroup': 'master-security-group',
|
||||||
|
'Instances.Ec2SubnetId': 'subnet-8be41cec',
|
||||||
|
},
|
||||||
|
ec2_keyname='mykey',
|
||||||
|
hadoop_version='2.4.0',
|
||||||
|
|
||||||
step1 = StreamingStep(
|
|
||||||
name='My wordcount example',
|
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
|
||||||
reducer='aggregate',
|
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input',
|
|
||||||
output='s3n://output_bucket/output/wordcount_output'
|
|
||||||
)
|
|
||||||
|
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
name='My jobflow',
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
log_uri='s3://some_bucket/jobflow_logs',
|
||||||
steps=[step1]
|
keep_alive=True,
|
||||||
)
|
master_instance_type='c1.medium',
|
||||||
|
slave_instance_type='c1.medium',
|
||||||
|
num_instances=2,
|
||||||
|
|
||||||
|
availability_zone='us-west-2b',
|
||||||
|
|
||||||
|
job_flow_role='EMR_EC2_DefaultRole',
|
||||||
|
service_role='EMR_DefaultRole',
|
||||||
|
visible_to_all_users=True,
|
||||||
|
))
|
||||||
|
|
||||||
|
cluster_id = conn.run_jobflow(**args)
|
||||||
|
jf = conn.describe_jobflow(cluster_id)
|
||||||
|
jf.amiversion.should.equal(args['ami_version'])
|
||||||
|
jf.bootstrapactions.should.equal(None)
|
||||||
|
jf.creationdatetime.should.be.a(six.string_types)
|
||||||
|
jf.should.have.property('laststatechangereason')
|
||||||
|
jf.readydatetime.should.be.a(six.string_types)
|
||||||
|
jf.startdatetime.should.be.a(six.string_types)
|
||||||
|
jf.state.should.equal('WAITING')
|
||||||
|
|
||||||
|
jf.ec2keyname.should.equal(args['ec2_keyname'])
|
||||||
|
# Ec2SubnetId
|
||||||
|
jf.hadoopversion.should.equal(args['hadoop_version'])
|
||||||
|
int(jf.instancecount).should.equal(2)
|
||||||
|
|
||||||
|
for ig in jf.instancegroups:
|
||||||
|
ig.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# ig.enddatetime.should.be.a(six.string_types)
|
||||||
|
ig.should.have.property('instancegroupid').being.a(six.string_types)
|
||||||
|
int(ig.instancerequestcount).should.equal(1)
|
||||||
|
ig.instancerole.should.be.within(['MASTER', 'CORE'])
|
||||||
|
int(ig.instancerunningcount).should.equal(1)
|
||||||
|
ig.instancetype.should.equal('c1.medium')
|
||||||
|
ig.laststatechangereason.should.be.a(six.string_types)
|
||||||
|
ig.market.should.equal('ON_DEMAND')
|
||||||
|
ig.name.should.be.a(six.string_types)
|
||||||
|
ig.readydatetime.should.be.a(six.string_types)
|
||||||
|
ig.startdatetime.should.be.a(six.string_types)
|
||||||
|
ig.state.should.equal('RUNNING')
|
||||||
|
|
||||||
|
jf.keepjobflowalivewhennosteps.should.equal('true')
|
||||||
|
jf.masterinstanceid.should.be.a(six.string_types)
|
||||||
|
jf.masterinstancetype.should.equal(args['master_instance_type'])
|
||||||
|
jf.masterpublicdnsname.should.be.a(six.string_types)
|
||||||
|
int(jf.normalizedinstancehours).should.equal(0)
|
||||||
|
jf.availabilityzone.should.equal(args['availability_zone'])
|
||||||
|
jf.slaveinstancetype.should.equal(args['slave_instance_type'])
|
||||||
|
jf.terminationprotected.should.equal('false')
|
||||||
|
|
||||||
|
jf.jobflowid.should.equal(cluster_id)
|
||||||
|
# jf.jobflowrole.should.equal(args['job_flow_role'])
|
||||||
|
jf.loguri.should.equal(args['log_uri'])
|
||||||
|
jf.name.should.equal(args['name'])
|
||||||
|
# jf.servicerole.should.equal(args['service_role'])
|
||||||
|
|
||||||
|
jf.steps.should.have.length_of(0)
|
||||||
|
|
||||||
|
list(i.value for i in jf.supported_products).should.equal([])
|
||||||
|
jf.visibletoallusers.should.equal('true')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_list_clusters():
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
|
||||||
|
args = run_jobflow_args.copy()
|
||||||
|
args['name'] = 'jobflow1'
|
||||||
|
cluster1_id = conn.run_jobflow(**args)
|
||||||
|
args['name'] = 'jobflow2'
|
||||||
|
cluster2_id = conn.run_jobflow(**args)
|
||||||
|
conn.terminate_jobflow(cluster2_id)
|
||||||
|
|
||||||
|
summary = conn.list_clusters()
|
||||||
|
clusters = summary.clusters
|
||||||
|
clusters.should.have.length_of(2)
|
||||||
|
|
||||||
|
expected = {
|
||||||
|
cluster1_id: {
|
||||||
|
'id': cluster1_id,
|
||||||
|
'name': 'jobflow1',
|
||||||
|
'normalizedinstancehours': 0,
|
||||||
|
'state': 'WAITING'},
|
||||||
|
cluster2_id: {
|
||||||
|
'id': cluster2_id,
|
||||||
|
'name': 'jobflow2',
|
||||||
|
'normalizedinstancehours': 0,
|
||||||
|
'state': 'TERMINATED'},
|
||||||
|
}
|
||||||
|
|
||||||
|
for x in clusters:
|
||||||
|
y = expected[x.id]
|
||||||
|
x.id.should.equal(y['id'])
|
||||||
|
x.name.should.equal(y['name'])
|
||||||
|
int(x.normalizedinstancehours).should.equal(y['normalizedinstancehours'])
|
||||||
|
x.status.state.should.equal(y['state'])
|
||||||
|
x.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||||
|
if y['state'] == 'TERMINATED':
|
||||||
|
x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||||
|
else:
|
||||||
|
x.status.timeline.shouldnt.have.property('enddatetime')
|
||||||
|
x.status.timeline.readydatetime.should.be.a(six.string_types)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_jobflow():
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
args = run_jobflow_args.copy()
|
||||||
|
job_id = conn.run_jobflow(**args)
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
job_flow.state.should.equal('STARTING')
|
job_flow.state.should.equal('WAITING')
|
||||||
job_flow.jobflowid.should.equal(job_id)
|
job_flow.jobflowid.should.equal(job_id)
|
||||||
job_flow.name.should.equal('My jobflow')
|
job_flow.name.should.equal(args['name'])
|
||||||
job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')
|
job_flow.masterinstancetype.should.equal(args['master_instance_type'])
|
||||||
|
job_flow.slaveinstancetype.should.equal(args['slave_instance_type'])
|
||||||
step2 = StreamingStep(
|
job_flow.loguri.should.equal(args['log_uri'])
|
||||||
name='My wordcount example2',
|
job_flow.visibletoallusers.should.equal('false')
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
|
int(job_flow.normalizedinstancehours).should.equal(0)
|
||||||
reducer='aggregate',
|
job_flow.steps.should.have.length_of(0)
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input2',
|
|
||||||
output='s3n://output_bucket/output/wordcount_output2'
|
|
||||||
)
|
|
||||||
|
|
||||||
conn.add_jobflow_steps(job_id, [step2])
|
|
||||||
|
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
|
||||||
job_step = job_flow.steps[0]
|
|
||||||
job_step.name.should.equal('My wordcount example')
|
|
||||||
job_step.state.should.equal('STARTING')
|
|
||||||
args = [arg.value for arg in job_step.args]
|
|
||||||
args.should.equal([
|
|
||||||
'-mapper',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
|
||||||
'-reducer',
|
|
||||||
'aggregate',
|
|
||||||
'-input',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/input',
|
|
||||||
'-output',
|
|
||||||
's3n://output_bucket/output/wordcount_output',
|
|
||||||
])
|
|
||||||
|
|
||||||
job_step2 = job_flow.steps[1]
|
|
||||||
job_step2.name.should.equal('My wordcount example2')
|
|
||||||
job_step2.state.should.equal('PENDING')
|
|
||||||
args = [arg.value for arg in job_step2.args]
|
|
||||||
args.should.equal([
|
|
||||||
'-mapper',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
|
|
||||||
'-reducer',
|
|
||||||
'aggregate',
|
|
||||||
'-input',
|
|
||||||
's3n://elasticmapreduce/samples/wordcount/input2',
|
|
||||||
'-output',
|
|
||||||
's3n://output_bucket/output/wordcount_output2',
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_create_instance_groups():
|
def test_run_jobflow_in_multiple_regions():
|
||||||
conn = boto.connect_emr()
|
regions = {}
|
||||||
|
for region in ['us-east-1', 'eu-west-1']:
|
||||||
|
conn = boto.emr.connect_to_region(region)
|
||||||
|
args = run_jobflow_args.copy()
|
||||||
|
args['name'] = region
|
||||||
|
cluster_id = conn.run_jobflow(**args)
|
||||||
|
regions[region] = {'conn': conn, 'cluster_id': cluster_id}
|
||||||
|
|
||||||
step1 = StreamingStep(
|
for region in regions.keys():
|
||||||
name='My wordcount example',
|
conn = regions[region]['conn']
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
jf = conn.describe_jobflow(regions[region]['cluster_id'])
|
||||||
reducer='aggregate',
|
jf.name.should.equal(region)
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input',
|
|
||||||
output='s3n://output_bucket/output/wordcount_output'
|
|
||||||
)
|
|
||||||
|
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[step1],
|
|
||||||
)
|
|
||||||
|
|
||||||
instance_group = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
|
|
||||||
instance_group = conn.add_instance_groups(job_id, [instance_group])
|
|
||||||
instance_group_id = instance_group.instancegroupids
|
|
||||||
job_flow = conn.describe_jobflows()[0]
|
|
||||||
int(job_flow.instancecount).should.equal(6)
|
|
||||||
instance_group = job_flow.instancegroups[0]
|
|
||||||
instance_group.instancegroupid.should.equal(instance_group_id)
|
|
||||||
int(instance_group.instancerunningcount).should.equal(6)
|
|
||||||
instance_group.instancerole.should.equal('TASK')
|
|
||||||
instance_group.instancetype.should.equal('c1.medium')
|
|
||||||
instance_group.market.should.equal('SPOT')
|
|
||||||
instance_group.name.should.equal('spot-0.07')
|
|
||||||
instance_group.bidprice.should.equal('0.07')
|
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
|
||||||
def test_modify_instance_groups():
|
|
||||||
conn = boto.connect_emr()
|
|
||||||
|
|
||||||
step1 = StreamingStep(
|
|
||||||
name='My wordcount example',
|
|
||||||
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
|
||||||
reducer='aggregate',
|
|
||||||
input='s3n://elasticmapreduce/samples/wordcount/input',
|
|
||||||
output='s3n://output_bucket/output/wordcount_output'
|
|
||||||
)
|
|
||||||
|
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[step1]
|
|
||||||
)
|
|
||||||
|
|
||||||
instance_group1 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
|
|
||||||
instance_group2 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
|
|
||||||
instance_group = conn.add_instance_groups(job_id, [instance_group1, instance_group2])
|
|
||||||
instance_group_ids = instance_group.instancegroupids.split(",")
|
|
||||||
|
|
||||||
job_flow = conn.describe_jobflows()[0]
|
|
||||||
int(job_flow.instancecount).should.equal(12)
|
|
||||||
instance_group = job_flow.instancegroups[0]
|
|
||||||
int(instance_group.instancerunningcount).should.equal(6)
|
|
||||||
|
|
||||||
conn.modify_instance_groups(instance_group_ids, [2, 3])
|
|
||||||
|
|
||||||
job_flow = conn.describe_jobflows()[0]
|
|
||||||
int(job_flow.instancecount).should.equal(5)
|
|
||||||
instance_group1 = [
|
|
||||||
group for group
|
|
||||||
in job_flow.instancegroups
|
|
||||||
if group.instancegroupid == instance_group_ids[0]
|
|
||||||
][0]
|
|
||||||
int(instance_group1.instancerunningcount).should.equal(2)
|
|
||||||
instance_group2 = [
|
|
||||||
group for group
|
|
||||||
in job_flow.instancegroups
|
|
||||||
if group.instancegroupid == instance_group_ids[1]
|
|
||||||
][0]
|
|
||||||
int(instance_group2.instancerunningcount).should.equal(3)
|
|
||||||
|
|
||||||
|
|
||||||
@requires_boto_gte("2.8")
|
@requires_boto_gte("2.8")
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_set_visible_to_all_users():
|
def test_run_jobflow_with_new_params():
|
||||||
|
# Test that run_jobflow works with newer params
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
|
conn.run_jobflow(**run_jobflow_args)
|
||||||
|
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
@requires_boto_gte("2.8")
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
@mock_emr
|
||||||
steps=[],
|
def test_run_jobflow_with_visible_to_all_users():
|
||||||
visible_to_all_users=False,
|
conn = boto.connect_emr()
|
||||||
)
|
for expected in (True, False):
|
||||||
|
job_id = conn.run_jobflow(
|
||||||
|
visible_to_all_users=expected,
|
||||||
|
**run_jobflow_args
|
||||||
|
)
|
||||||
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
|
job_flow.visibletoallusers.should.equal(str(expected).lower())
|
||||||
|
|
||||||
|
|
||||||
|
@requires_boto_gte("2.8")
|
||||||
|
@mock_emr
|
||||||
|
def test_run_jobflow_with_instance_groups():
|
||||||
|
input_groups = dict((g.name, g) for g in input_instance_groups)
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
job_id = conn.run_jobflow(instance_groups=input_instance_groups,
|
||||||
|
**run_jobflow_args)
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
job_flow.visibletoallusers.should.equal('False')
|
int(job_flow.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
|
||||||
|
for instance_group in job_flow.instancegroups:
|
||||||
conn.set_visible_to_all_users(job_id, True)
|
expected = input_groups[instance_group.name]
|
||||||
|
instance_group.should.have.property('instancegroupid')
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
int(instance_group.instancerunningcount).should.equal(expected.num_instances)
|
||||||
job_flow.visibletoallusers.should.equal('True')
|
instance_group.instancerole.should.equal(expected.role)
|
||||||
|
instance_group.instancetype.should.equal(expected.type)
|
||||||
conn.set_visible_to_all_users(job_id, False)
|
instance_group.market.should.equal(expected.market)
|
||||||
|
if hasattr(expected, 'bidprice'):
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
instance_group.bidprice.should.equal(expected.bidprice)
|
||||||
job_flow.visibletoallusers.should.equal('False')
|
|
||||||
|
|
||||||
|
|
||||||
@requires_boto_gte("2.8")
|
@requires_boto_gte("2.8")
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_set_termination_protection():
|
def test_set_termination_protection():
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
|
job_id = conn.run_jobflow(**run_jobflow_args)
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[]
|
|
||||||
)
|
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
job_flow.terminationprotected.should.equal(u'None')
|
job_flow.terminationprotected.should.equal('false')
|
||||||
|
|
||||||
conn.set_termination_protection(job_id, True)
|
conn.set_termination_protection(job_id, True)
|
||||||
|
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
job_flow.terminationprotected.should.equal('true')
|
job_flow.terminationprotected.should.equal('true')
|
||||||
|
|
||||||
conn.set_termination_protection(job_id, False)
|
conn.set_termination_protection(job_id, False)
|
||||||
|
|
||||||
job_flow = conn.describe_jobflow(job_id)
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
job_flow.terminationprotected.should.equal('false')
|
job_flow.terminationprotected.should.equal('false')
|
||||||
|
|
||||||
|
|
||||||
|
@requires_boto_gte("2.8")
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_list_clusters():
|
def test_set_visible_to_all_users():
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
conn.run_jobflow(
|
args = run_jobflow_args.copy()
|
||||||
name='My jobflow',
|
args['visible_to_all_users'] = False
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
job_id = conn.run_jobflow(**args)
|
||||||
steps=[],
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
)
|
job_flow.visibletoallusers.should.equal('false')
|
||||||
|
|
||||||
summary = conn.list_clusters()
|
conn.set_visible_to_all_users(job_id, True)
|
||||||
clusters = summary.clusters
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
clusters.should.have.length_of(1)
|
job_flow.visibletoallusers.should.equal('true')
|
||||||
cluster = clusters[0]
|
|
||||||
cluster.name.should.equal("My jobflow")
|
conn.set_visible_to_all_users(job_id, False)
|
||||||
cluster.normalizedinstancehours.should.equal('0')
|
job_flow = conn.describe_jobflow(job_id)
|
||||||
cluster.status.state.should.equal("RUNNING")
|
job_flow.visibletoallusers.should.equal('false')
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_describe_cluster():
|
def test_terminate_jobflow():
|
||||||
conn = boto.connect_emr()
|
conn = boto.connect_emr()
|
||||||
job_id = conn.run_jobflow(
|
job_id = conn.run_jobflow(**run_jobflow_args)
|
||||||
name='My jobflow',
|
flow = conn.describe_jobflows()[0]
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
flow.state.should.equal('WAITING')
|
||||||
steps=[],
|
|
||||||
|
conn.terminate_jobflow(job_id)
|
||||||
|
flow = conn.describe_jobflows()[0]
|
||||||
|
flow.state.should.equal('TERMINATED')
|
||||||
|
|
||||||
|
|
||||||
|
# testing multiple end points for each feature
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_bootstrap_actions():
|
||||||
|
bootstrap_actions = [
|
||||||
|
BootstrapAction(
|
||||||
|
name='bs1',
|
||||||
|
path='path/to/script',
|
||||||
|
bootstrap_action_args=['arg1', 'arg2']),
|
||||||
|
BootstrapAction(
|
||||||
|
name='bs2',
|
||||||
|
path='path/to/anotherscript',
|
||||||
|
bootstrap_action_args=[])
|
||||||
|
]
|
||||||
|
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
cluster_id = conn.run_jobflow(
|
||||||
|
bootstrap_actions=bootstrap_actions,
|
||||||
|
**run_jobflow_args
|
||||||
)
|
)
|
||||||
|
|
||||||
cluster = conn.describe_cluster(job_id)
|
jf = conn.describe_jobflow(cluster_id)
|
||||||
cluster.name.should.equal("My jobflow")
|
for x, y in zip(jf.bootstrapactions, bootstrap_actions):
|
||||||
cluster.normalizedinstancehours.should.equal('0')
|
x.name.should.equal(y.name)
|
||||||
cluster.status.state.should.equal("RUNNING")
|
x.path.should.equal(y.path)
|
||||||
|
list(o.value for o in x.args).should.equal(y.args())
|
||||||
|
|
||||||
|
resp = conn.list_bootstrap_actions(cluster_id)
|
||||||
|
for i, y in enumerate(bootstrap_actions):
|
||||||
|
x = resp.actions[i]
|
||||||
|
x.name.should.equal(y.name)
|
||||||
|
x.scriptpath.should.equal(y.path)
|
||||||
|
list(arg.value for arg in x.args).should.equal(y.args())
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_cluster_tagging():
|
def test_instance_groups():
|
||||||
conn = boto.connect_emr()
|
input_groups = dict((g.name, g) for g in input_instance_groups)
|
||||||
job_id = conn.run_jobflow(
|
|
||||||
name='My jobflow',
|
|
||||||
log_uri='s3://some_bucket/jobflow_logs',
|
|
||||||
steps=[],
|
|
||||||
)
|
|
||||||
cluster_id = job_id
|
|
||||||
conn.add_tags(cluster_id, {"tag1": "val1", "tag2": "val2"})
|
|
||||||
|
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
args = run_jobflow_args.copy()
|
||||||
|
for key in ['master_instance_type', 'slave_instance_type', 'num_instances']:
|
||||||
|
del args[key]
|
||||||
|
args['instance_groups'] = input_instance_groups[:2]
|
||||||
|
job_id = conn.run_jobflow(**args)
|
||||||
|
|
||||||
|
jf = conn.describe_jobflow(job_id)
|
||||||
|
base_instance_count = int(jf.instancecount)
|
||||||
|
|
||||||
|
conn.add_instance_groups(job_id, input_instance_groups[2:])
|
||||||
|
|
||||||
|
jf = conn.describe_jobflow(job_id)
|
||||||
|
int(jf.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
|
||||||
|
for x in jf.instancegroups:
|
||||||
|
y = input_groups[x.name]
|
||||||
|
if hasattr(y, 'bidprice'):
|
||||||
|
x.bidprice.should.equal(y.bidprice)
|
||||||
|
x.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# x.enddatetime.should.be.a(six.string_types)
|
||||||
|
x.should.have.property('instancegroupid')
|
||||||
|
int(x.instancerequestcount).should.equal(y.num_instances)
|
||||||
|
x.instancerole.should.equal(y.role)
|
||||||
|
int(x.instancerunningcount).should.equal(y.num_instances)
|
||||||
|
x.instancetype.should.equal(y.type)
|
||||||
|
x.laststatechangereason.should.be.a(six.string_types)
|
||||||
|
x.market.should.equal(y.market)
|
||||||
|
x.name.should.be.a(six.string_types)
|
||||||
|
x.readydatetime.should.be.a(six.string_types)
|
||||||
|
x.startdatetime.should.be.a(six.string_types)
|
||||||
|
x.state.should.equal('RUNNING')
|
||||||
|
|
||||||
|
for x in conn.list_instance_groups(job_id).instancegroups:
|
||||||
|
y = input_groups[x.name]
|
||||||
|
if hasattr(y, 'bidprice'):
|
||||||
|
x.bidprice.should.equal(y.bidprice)
|
||||||
|
# Configurations
|
||||||
|
# EbsBlockDevices
|
||||||
|
# EbsOptimized
|
||||||
|
x.should.have.property('id')
|
||||||
|
x.instancegrouptype.should.equal(y.role)
|
||||||
|
x.instancetype.should.equal(y.type)
|
||||||
|
x.market.should.equal(y.market)
|
||||||
|
x.name.should.equal(y.name)
|
||||||
|
int(x.requestedinstancecount).should.equal(y.num_instances)
|
||||||
|
int(x.runninginstancecount).should.equal(y.num_instances)
|
||||||
|
# ShrinkPolicy
|
||||||
|
x.status.state.should.equal('RUNNING')
|
||||||
|
x.status.statechangereason.code.should.be.a(six.string_types)
|
||||||
|
x.status.statechangereason.message.should.be.a(six.string_types)
|
||||||
|
x.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||||
|
x.status.timeline.readydatetime.should.be.a(six.string_types)
|
||||||
|
|
||||||
|
igs = dict((g.name, g) for g in jf.instancegroups)
|
||||||
|
|
||||||
|
conn.modify_instance_groups(
|
||||||
|
[igs['task-1'].instancegroupid, igs['task-2'].instancegroupid],
|
||||||
|
[2, 3])
|
||||||
|
jf = conn.describe_jobflow(job_id)
|
||||||
|
int(jf.instancecount).should.equal(base_instance_count + 5)
|
||||||
|
igs = dict((g.name, g) for g in jf.instancegroups)
|
||||||
|
int(igs['task-1'].instancerunningcount).should.equal(2)
|
||||||
|
int(igs['task-2'].instancerunningcount).should.equal(3)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_steps():
|
||||||
|
input_steps = [
|
||||||
|
StreamingStep(
|
||||||
|
name='My wordcount example',
|
||||||
|
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
|
||||||
|
reducer='aggregate',
|
||||||
|
input='s3n://elasticmapreduce/samples/wordcount/input',
|
||||||
|
output='s3n://output_bucket/output/wordcount_output'),
|
||||||
|
StreamingStep(
|
||||||
|
name='My wordcount example2',
|
||||||
|
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
|
||||||
|
reducer='aggregate',
|
||||||
|
input='s3n://elasticmapreduce/samples/wordcount/input2',
|
||||||
|
output='s3n://output_bucket/output/wordcount_output2')
|
||||||
|
]
|
||||||
|
|
||||||
|
# TODO: implementation and test for cancel_steps
|
||||||
|
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
cluster_id = conn.run_jobflow(
|
||||||
|
steps=[input_steps[0]],
|
||||||
|
**run_jobflow_args)
|
||||||
|
|
||||||
|
jf = conn.describe_jobflow(cluster_id)
|
||||||
|
jf.steps.should.have.length_of(1)
|
||||||
|
|
||||||
|
conn.add_jobflow_steps(cluster_id, [input_steps[1]])
|
||||||
|
|
||||||
|
jf = conn.describe_jobflow(cluster_id)
|
||||||
|
jf.steps.should.have.length_of(2)
|
||||||
|
for step in jf.steps:
|
||||||
|
step.actiononfailure.should.equal('TERMINATE_JOB_FLOW')
|
||||||
|
list(arg.value for arg in step.args).should.have.length_of(8)
|
||||||
|
step.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# step.enddatetime.should.be.a(six.string_types)
|
||||||
|
step.jar.should.equal('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
|
||||||
|
step.laststatechangereason.should.be.a(six.string_types)
|
||||||
|
step.mainclass.should.equal('')
|
||||||
|
step.name.should.be.a(six.string_types)
|
||||||
|
# step.readydatetime.should.be.a(six.string_types)
|
||||||
|
# step.startdatetime.should.be.a(six.string_types)
|
||||||
|
step.state.should.be.within(['STARTING', 'PENDING'])
|
||||||
|
|
||||||
|
expected = dict((s.name, s) for s in input_steps)
|
||||||
|
|
||||||
|
for x in conn.list_steps(cluster_id).steps:
|
||||||
|
y = expected[x.name]
|
||||||
|
# actiononfailure
|
||||||
|
list(arg.value for arg in x.config.args).should.equal([
|
||||||
|
'-mapper', y.mapper,
|
||||||
|
'-reducer', y.reducer,
|
||||||
|
'-input', y.input,
|
||||||
|
'-output', y.output,
|
||||||
|
])
|
||||||
|
x.config.jar.should.equal('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
|
||||||
|
x.config.mainclass.should.equal('')
|
||||||
|
# properties
|
||||||
|
x.should.have.property('id').should.be.a(six.string_types)
|
||||||
|
x.name.should.equal(y.name)
|
||||||
|
x.status.state.should.be.within(['STARTING', 'PENDING'])
|
||||||
|
# x.status.statechangereason
|
||||||
|
x.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||||
|
# x.status.timeline.startdatetime.should.be.a(six.string_types)
|
||||||
|
|
||||||
|
x = conn.describe_step(cluster_id, x.id)
|
||||||
|
list(arg.value for arg in x.config.args).should.equal([
|
||||||
|
'-mapper', y.mapper,
|
||||||
|
'-reducer', y.reducer,
|
||||||
|
'-input', y.input,
|
||||||
|
'-output', y.output,
|
||||||
|
])
|
||||||
|
x.config.jar.should.equal('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
|
||||||
|
x.config.mainclass.should.equal('')
|
||||||
|
# properties
|
||||||
|
x.should.have.property('id').should.be.a(six.string_types)
|
||||||
|
x.name.should.equal(y.name)
|
||||||
|
x.status.state.should.be.within(['STARTING', 'PENDING'])
|
||||||
|
# x.status.statechangereason
|
||||||
|
x.status.timeline.creationdatetime.should.be.a(six.string_types)
|
||||||
|
# x.status.timeline.enddatetime.should.be.a(six.string_types)
|
||||||
|
# x.status.timeline.startdatetime.should.be.a(six.string_types)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_tags():
|
||||||
|
input_tags = {"tag1": "val1", "tag2": "val2"}
|
||||||
|
|
||||||
|
conn = boto.connect_emr()
|
||||||
|
cluster_id = conn.run_jobflow(**run_jobflow_args)
|
||||||
|
|
||||||
|
conn.add_tags(cluster_id, input_tags)
|
||||||
cluster = conn.describe_cluster(cluster_id)
|
cluster = conn.describe_cluster(cluster_id)
|
||||||
cluster.tags.should.have.length_of(2)
|
cluster.tags.should.have.length_of(2)
|
||||||
tags = dict((tag.key, tag.value) for tag in cluster.tags)
|
dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)
|
||||||
tags['tag1'].should.equal('val1')
|
|
||||||
tags['tag2'].should.equal('val2')
|
|
||||||
|
|
||||||
# Remove a tag
|
conn.remove_tags(cluster_id, list(input_tags.keys()))
|
||||||
conn.remove_tags(cluster_id, ["tag1"])
|
|
||||||
cluster = conn.describe_cluster(cluster_id)
|
cluster = conn.describe_cluster(cluster_id)
|
||||||
cluster.tags.should.have.length_of(1)
|
cluster.tags.should.have.length_of(0)
|
||||||
tags = dict((tag.key, tag.value) for tag in cluster.tags)
|
|
||||||
tags['tag2'].should.equal('val2')
|
|
||||||
|
@ -1,46 +1,586 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
import six
|
||||||
import sure # noqa
|
import sure # noqa
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
from nose.tools import assert_raises
|
||||||
|
|
||||||
from moto import mock_emr
|
from moto import mock_emr
|
||||||
|
|
||||||
|
|
||||||
|
run_job_flow_args = dict(
|
||||||
|
Instances={
|
||||||
|
'InstanceCount': 3,
|
||||||
|
'KeepJobFlowAliveWhenNoSteps': True,
|
||||||
|
'MasterInstanceType': 'c3.medium',
|
||||||
|
'Placement': {'AvailabilityZone': 'us-east-1a'},
|
||||||
|
'SlaveInstanceType': 'c3.xlarge',
|
||||||
|
},
|
||||||
|
JobFlowRole='EMR_EC2_DefaultRole',
|
||||||
|
LogUri='s3://mybucket/log',
|
||||||
|
Name='cluster',
|
||||||
|
ServiceRole='EMR_DefaultRole',
|
||||||
|
VisibleToAllUsers=True)
|
||||||
|
|
||||||
|
|
||||||
|
input_instance_groups = [
|
||||||
|
{'InstanceCount': 1,
|
||||||
|
'InstanceRole': 'MASTER',
|
||||||
|
'InstanceType': 'c1.medium',
|
||||||
|
'Market': 'ON_DEMAND',
|
||||||
|
'Name': 'master'},
|
||||||
|
{'InstanceCount': 3,
|
||||||
|
'InstanceRole': 'CORE',
|
||||||
|
'InstanceType': 'c1.medium',
|
||||||
|
'Market': 'ON_DEMAND',
|
||||||
|
'Name': 'core'},
|
||||||
|
{'InstanceCount': 6,
|
||||||
|
'InstanceRole': 'TASK',
|
||||||
|
'InstanceType': 'c1.large',
|
||||||
|
'Market': 'SPOT',
|
||||||
|
'Name': 'task-1',
|
||||||
|
'BidPrice': '0.07'},
|
||||||
|
{'InstanceCount': 10,
|
||||||
|
'InstanceRole': 'TASK',
|
||||||
|
'InstanceType': 'c1.xlarge',
|
||||||
|
'Market': 'SPOT',
|
||||||
|
'Name': 'task-2',
|
||||||
|
'BidPrice': '0.05'},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_run_job_flow():
|
def test_describe_cluster():
|
||||||
client = boto3.client('emr', region_name='us-east-1')
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
cluster_id = client.run_job_flow(
|
|
||||||
Name='cluster',
|
args = deepcopy(run_job_flow_args)
|
||||||
Instances={
|
args['Applications'] = [{'Name': 'Spark', 'Version': '2.4.2'}]
|
||||||
'MasterInstanceType': 'c3.xlarge',
|
args['Configurations'] = [
|
||||||
'SlaveInstanceType': 'c3.xlarge',
|
{'Classification': 'yarn-site',
|
||||||
'InstanceCount': 3,
|
'Properties': {'someproperty': 'somevalue'}}]
|
||||||
'Placement': {'AvailabilityZone': 'us-east-1a'},
|
args['Instances']['AdditionalMasterSecurityGroups'] = ['additional-master']
|
||||||
'KeepJobFlowAliveWhenNoSteps': True,
|
args['Instances']['AdditionalSlaveSecurityGroups'] = ['additional-slave']
|
||||||
},
|
args['Instances']['Ec2KeyName'] = 'mykey'
|
||||||
VisibleToAllUsers=True,
|
args['Instances']['Ec2SubnetId'] = 'subnet-8be41cec'
|
||||||
)
|
args['Instances']['EmrManagedMasterSecurityGroup'] = 'master-security-group'
|
||||||
cluster_id.should.have.key('JobFlowId')
|
args['Instances']['EmrManagedSlaveSecurityGroup'] = 'slave-security-group'
|
||||||
|
args['Instances']['KeepJobFlowAliveWhenNoSteps'] = False
|
||||||
|
args['Instances']['ServiceAccessSecurityGroup'] = 'service-access-security-group'
|
||||||
|
args['Tags'] = [{'Key': 'tag1', 'Value': 'val1'},
|
||||||
|
{'Key': 'tag2', 'Value': 'val2'}]
|
||||||
|
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
|
||||||
|
cl = client.describe_cluster(ClusterId=cluster_id)['Cluster']
|
||||||
|
cl['Applications'][0]['Name'].should.equal('Spark')
|
||||||
|
cl['Applications'][0]['Version'].should.equal('2.4.2')
|
||||||
|
cl['AutoTerminate'].should.equal(True)
|
||||||
|
|
||||||
|
config = cl['Configurations'][0]
|
||||||
|
config['Classification'].should.equal('yarn-site')
|
||||||
|
config['Properties'].should.equal(args['Configurations'][0]['Properties'])
|
||||||
|
|
||||||
|
attrs = cl['Ec2InstanceAttributes']
|
||||||
|
attrs['AdditionalMasterSecurityGroups'].should.equal(args['Instances']['AdditionalMasterSecurityGroups'])
|
||||||
|
attrs['AdditionalSlaveSecurityGroups'].should.equal(args['Instances']['AdditionalSlaveSecurityGroups'])
|
||||||
|
attrs['Ec2AvailabilityZone'].should.equal('us-east-1a')
|
||||||
|
attrs['Ec2KeyName'].should.equal(args['Instances']['Ec2KeyName'])
|
||||||
|
attrs['Ec2SubnetId'].should.equal(args['Instances']['Ec2SubnetId'])
|
||||||
|
attrs['EmrManagedMasterSecurityGroup'].should.equal(args['Instances']['EmrManagedMasterSecurityGroup'])
|
||||||
|
attrs['EmrManagedSlaveSecurityGroup'].should.equal(args['Instances']['EmrManagedSlaveSecurityGroup'])
|
||||||
|
attrs['IamInstanceProfile'].should.equal(args['JobFlowRole'])
|
||||||
|
attrs['ServiceAccessSecurityGroup'].should.equal(args['Instances']['ServiceAccessSecurityGroup'])
|
||||||
|
cl['Id'].should.equal(cluster_id)
|
||||||
|
cl['LogUri'].should.equal(args['LogUri'])
|
||||||
|
cl['MasterPublicDnsName'].should.be.a(six.string_types)
|
||||||
|
cl['Name'].should.equal(args['Name'])
|
||||||
|
cl['NormalizedInstanceHours'].should.equal(0)
|
||||||
|
# cl['ReleaseLabel'].should.equal('emr-5.0.0')
|
||||||
|
cl.shouldnt.have.key('RequestedAmiVersion')
|
||||||
|
cl['RunningAmiVersion'].should.equal('1.0.0')
|
||||||
|
# cl['SecurityConfiguration'].should.be.a(six.string_types)
|
||||||
|
cl['ServiceRole'].should.equal(args['ServiceRole'])
|
||||||
|
|
||||||
|
status = cl['Status']
|
||||||
|
status['State'].should.equal('TERMINATED')
|
||||||
|
# cluster['Status']['StateChangeReason']
|
||||||
|
status['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# status['Timeline']['EndDateTime'].should.equal(datetime(2014, 1, 24, 2, 19, 46, tzinfo=pytz.utc))
|
||||||
|
status['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||||
|
|
||||||
|
dict((t['Key'], t['Value']) for t in cl['Tags']).should.equal(
|
||||||
|
dict((t['Key'], t['Value']) for t in args['Tags']))
|
||||||
|
|
||||||
|
cl['TerminationProtected'].should.equal(False)
|
||||||
|
cl['VisibleToAllUsers'].should.equal(True)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_describe_job_flows():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
cluster1_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
|
||||||
|
cluster2_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
|
||||||
|
|
||||||
|
resp = client.describe_job_flows()
|
||||||
|
resp['JobFlows'].should.have.length_of(2)
|
||||||
|
|
||||||
|
resp = client.describe_job_flows(JobFlowIds=[cluster2_id])
|
||||||
|
resp['JobFlows'].should.have.length_of(1)
|
||||||
|
resp['JobFlows'][0]['JobFlowId'].should.equal(cluster2_id)
|
||||||
|
|
||||||
|
resp = client.describe_job_flows(JobFlowIds=[cluster1_id])
|
||||||
|
resp['JobFlows'].should.have.length_of(1)
|
||||||
|
resp['JobFlows'][0]['JobFlowId'].should.equal(cluster1_id)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_describe_job_flow():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['AmiVersion'] = '3.8.1'
|
||||||
|
args['Instances'].update(
|
||||||
|
{'Ec2KeyName': 'ec2keyname',
|
||||||
|
'Ec2SubnetId': 'subnet-8be41cec',
|
||||||
|
'HadoopVersion': '2.4.0'})
|
||||||
|
args['VisibleToAllUsers'] = True
|
||||||
|
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
|
||||||
|
jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
|
||||||
|
jf['AmiVersion'].should.equal(args['AmiVersion'])
|
||||||
|
jf.shouldnt.have.key('BootstrapActions')
|
||||||
|
esd = jf['ExecutionStatusDetail']
|
||||||
|
esd['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# esd['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# esd['LastStateChangeReason'].should.be.a(six.string_types)
|
||||||
|
esd['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||||
|
esd['StartDateTime'].should.be.a('datetime.datetime')
|
||||||
|
esd['State'].should.equal('WAITING')
|
||||||
|
attrs = jf['Instances']
|
||||||
|
attrs['Ec2KeyName'].should.equal(args['Instances']['Ec2KeyName'])
|
||||||
|
attrs['Ec2SubnetId'].should.equal(args['Instances']['Ec2SubnetId'])
|
||||||
|
attrs['HadoopVersion'].should.equal(args['Instances']['HadoopVersion'])
|
||||||
|
attrs['InstanceCount'].should.equal(args['Instances']['InstanceCount'])
|
||||||
|
for ig in attrs['InstanceGroups']:
|
||||||
|
# ig['BidPrice']
|
||||||
|
ig['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# ig['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
ig['InstanceGroupId'].should.be.a(six.string_types)
|
||||||
|
ig['InstanceRequestCount'].should.be.a(int)
|
||||||
|
ig['InstanceRole'].should.be.within(['MASTER', 'CORE'])
|
||||||
|
ig['InstanceRunningCount'].should.be.a(int)
|
||||||
|
ig['InstanceType'].should.be.within(['c3.medium', 'c3.xlarge'])
|
||||||
|
# ig['LastStateChangeReason'].should.be.a(six.string_types)
|
||||||
|
ig['Market'].should.equal('ON_DEMAND')
|
||||||
|
ig['Name'].should.be.a(six.string_types)
|
||||||
|
ig['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||||
|
ig['StartDateTime'].should.be.a('datetime.datetime')
|
||||||
|
ig['State'].should.equal('RUNNING')
|
||||||
|
attrs['KeepJobFlowAliveWhenNoSteps'].should.equal(True)
|
||||||
|
# attrs['MasterInstanceId'].should.be.a(six.string_types)
|
||||||
|
attrs['MasterInstanceType'].should.equal(args['Instances']['MasterInstanceType'])
|
||||||
|
attrs['MasterPublicDnsName'].should.be.a(six.string_types)
|
||||||
|
attrs['NormalizedInstanceHours'].should.equal(0)
|
||||||
|
attrs['Placement']['AvailabilityZone'].should.equal(args['Instances']['Placement']['AvailabilityZone'])
|
||||||
|
attrs['SlaveInstanceType'].should.equal(args['Instances']['SlaveInstanceType'])
|
||||||
|
attrs['TerminationProtected'].should.equal(False)
|
||||||
|
jf['JobFlowId'].should.equal(cluster_id)
|
||||||
|
jf['JobFlowRole'].should.equal(args['JobFlowRole'])
|
||||||
|
jf['LogUri'].should.equal(args['LogUri'])
|
||||||
|
jf['Name'].should.equal(args['Name'])
|
||||||
|
jf['ServiceRole'].should.equal(args['ServiceRole'])
|
||||||
|
jf.shouldnt.have.key('Steps')
|
||||||
|
jf.shouldnt.have.key('SupportedProducts')
|
||||||
|
jf['VisibleToAllUsers'].should.equal(True)
|
||||||
|
|
||||||
|
|
||||||
@mock_emr
|
@mock_emr
|
||||||
def test_list_clusters():
|
def test_list_clusters():
|
||||||
client = boto3.client('emr', region_name='us-east-1')
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
client.run_job_flow(
|
args = deepcopy(run_job_flow_args)
|
||||||
Name='cluster',
|
args['Name'] = 'jobflow1'
|
||||||
Instances={
|
cluster1_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
'MasterInstanceType': 'c3.xlarge',
|
args['Name'] = 'jobflow2'
|
||||||
'SlaveInstanceType': 'c3.xlarge',
|
cluster2_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
'InstanceCount': 3,
|
client.terminate_job_flows(JobFlowIds=[cluster2_id])
|
||||||
'Placement': {'AvailabilityZone': 'us-east-1a'},
|
|
||||||
'KeepJobFlowAliveWhenNoSteps': True,
|
|
||||||
},
|
|
||||||
VisibleToAllUsers=True,
|
|
||||||
)
|
|
||||||
summary = client.list_clusters()
|
summary = client.list_clusters()
|
||||||
clusters = summary['Clusters']
|
clusters = summary['Clusters']
|
||||||
clusters.should.have.length_of(1)
|
clusters.should.have.length_of(2)
|
||||||
cluster = clusters[0]
|
|
||||||
cluster['NormalizedInstanceHours'].should.equal(0)
|
expected = {
|
||||||
cluster['Status']['State'].should.equal("RUNNING")
|
cluster1_id: {
|
||||||
|
'Id': cluster1_id,
|
||||||
|
'Name': 'jobflow1',
|
||||||
|
'NormalizedInstanceHours': 0,
|
||||||
|
'State': 'WAITING'},
|
||||||
|
cluster2_id: {
|
||||||
|
'Id': cluster2_id,
|
||||||
|
'Name': 'jobflow2',
|
||||||
|
'NormalizedInstanceHours': 0,
|
||||||
|
'State': 'TERMINATED'},
|
||||||
|
}
|
||||||
|
|
||||||
|
for x in clusters:
|
||||||
|
y = expected[x['Id']]
|
||||||
|
x['Id'].should.equal(y['Id'])
|
||||||
|
x['Name'].should.equal(y['Name'])
|
||||||
|
x['NormalizedInstanceHours'].should.equal(y['NormalizedInstanceHours'])
|
||||||
|
x['Status']['State'].should.equal(y['State'])
|
||||||
|
x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
if y['State'] == 'TERMINATED':
|
||||||
|
x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
else:
|
||||||
|
x['Status']['Timeline'].shouldnt.have.key('EndDateTime')
|
||||||
|
x['Status']['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_job_flow():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
resp = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
resp['ExecutionStatusDetail']['State'].should.equal('WAITING')
|
||||||
|
resp['JobFlowId'].should.equal(cluster_id)
|
||||||
|
resp['Name'].should.equal(args['Name'])
|
||||||
|
resp['Instances']['MasterInstanceType'].should.equal(args['Instances']['MasterInstanceType'])
|
||||||
|
resp['Instances']['SlaveInstanceType'].should.equal(args['Instances']['SlaveInstanceType'])
|
||||||
|
resp['LogUri'].should.equal(args['LogUri'])
|
||||||
|
resp['VisibleToAllUsers'].should.equal(args['VisibleToAllUsers'])
|
||||||
|
resp['Instances']['NormalizedInstanceHours'].should.equal(0)
|
||||||
|
resp.shouldnt.have.key('Steps')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_job_flow_with_invalid_params():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
with assert_raises(ClientError) as e:
|
||||||
|
# cannot set both AmiVersion and ReleaseLabel
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['AmiVersion'] = '2.4'
|
||||||
|
args['ReleaseLabel'] = 'emr-5.0.0'
|
||||||
|
client.run_job_flow(**args)
|
||||||
|
e.exception.response['Error']['Code'].should.equal('ValidationException')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_job_flow_in_multiple_regions():
|
||||||
|
regions = {}
|
||||||
|
for region in ['us-east-1', 'eu-west-1']:
|
||||||
|
client = boto3.client('emr', region_name=region)
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['Name'] = region
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
regions[region] = {'client': client, 'cluster_id': cluster_id}
|
||||||
|
|
||||||
|
for region in regions.keys():
|
||||||
|
client = regions[region]['client']
|
||||||
|
resp = client.describe_cluster(ClusterId=regions[region]['cluster_id'])
|
||||||
|
resp['Cluster']['Name'].should.equal(region)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_job_flow_with_new_params():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
resp = client.run_job_flow(**run_job_flow_args)
|
||||||
|
resp.should.have.key('JobFlowId')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_job_flow_with_visible_to_all_users():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
for expected in (True, False):
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['VisibleToAllUsers'] = expected
|
||||||
|
resp = client.run_job_flow(**args)
|
||||||
|
cluster_id = resp['JobFlowId']
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['VisibleToAllUsers'].should.equal(expected)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_run_job_flow_with_instance_groups():
|
||||||
|
input_groups = dict((g['Name'], g) for g in input_instance_groups)
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['Instances'] = {'InstanceGroups': input_instance_groups}
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
groups = client.list_instance_groups(ClusterId=cluster_id)['InstanceGroups']
|
||||||
|
for x in groups:
|
||||||
|
y = input_groups[x['Name']]
|
||||||
|
x.should.have.key('Id')
|
||||||
|
x['RequestedInstanceCount'].should.equal(y['InstanceCount'])
|
||||||
|
x['InstanceGroupType'].should.equal(y['InstanceRole'])
|
||||||
|
x['InstanceType'].should.equal(y['InstanceType'])
|
||||||
|
x['Market'].should.equal(y['Market'])
|
||||||
|
if 'BidPrice' in y:
|
||||||
|
x['BidPrice'].should.equal(y['BidPrice'])
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_set_termination_protection():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['Instances']['TerminationProtected'] = False
|
||||||
|
resp = client.run_job_flow(**args)
|
||||||
|
cluster_id = resp['JobFlowId']
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['TerminationProtected'].should.equal(False)
|
||||||
|
|
||||||
|
for expected in (True, False):
|
||||||
|
resp = client.set_termination_protection(JobFlowIds=[cluster_id],
|
||||||
|
TerminationProtected=expected)
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['TerminationProtected'].should.equal(expected)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_set_visible_to_all_users():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['VisibleToAllUsers'] = False
|
||||||
|
resp = client.run_job_flow(**args)
|
||||||
|
cluster_id = resp['JobFlowId']
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['VisibleToAllUsers'].should.equal(False)
|
||||||
|
|
||||||
|
for expected in (True, False):
|
||||||
|
resp = client.set_visible_to_all_users(JobFlowIds=[cluster_id],
|
||||||
|
VisibleToAllUsers=expected)
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['VisibleToAllUsers'].should.equal(expected)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_terminate_job_flows():
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
|
||||||
|
resp = client.run_job_flow(**run_job_flow_args)
|
||||||
|
cluster_id = resp['JobFlowId']
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['Status']['State'].should.equal('WAITING')
|
||||||
|
|
||||||
|
resp = client.terminate_job_flows(JobFlowIds=[cluster_id])
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)
|
||||||
|
resp['Cluster']['Status']['State'].should.equal('TERMINATED')
|
||||||
|
|
||||||
|
|
||||||
|
# testing multiple end points for each feature
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_bootstrap_actions():
|
||||||
|
bootstrap_actions = [
|
||||||
|
{'Name': 'bs1',
|
||||||
|
'ScriptBootstrapAction': {
|
||||||
|
'Args': ['arg1', 'arg2'],
|
||||||
|
'Path': 'path/to/script'}},
|
||||||
|
{'Name': 'bs2',
|
||||||
|
'ScriptBootstrapAction': {
|
||||||
|
'Path': 'path/to/anotherscript'}}
|
||||||
|
]
|
||||||
|
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['BootstrapActions'] = bootstrap_actions
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
|
||||||
|
cl = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
for x, y in zip(cl['BootstrapActions'], bootstrap_actions):
|
||||||
|
x['BootstrapActionConfig'].should.equal(y)
|
||||||
|
|
||||||
|
resp = client.list_bootstrap_actions(ClusterId=cluster_id)
|
||||||
|
for x, y in zip(resp['BootstrapActions'], bootstrap_actions):
|
||||||
|
x['Name'].should.equal(y['Name'])
|
||||||
|
if 'Args' in y['ScriptBootstrapAction']:
|
||||||
|
x['Args'].should.equal(y['ScriptBootstrapAction']['Args'])
|
||||||
|
x['ScriptPath'].should.equal(y['ScriptBootstrapAction']['Path'])
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_instance_groups():
|
||||||
|
input_groups = dict((g['Name'], g) for g in input_instance_groups)
|
||||||
|
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
for key in ['MasterInstanceType', 'SlaveInstanceType', 'InstanceCount']:
|
||||||
|
del args['Instances'][key]
|
||||||
|
args['Instances']['InstanceGroups'] = input_instance_groups[:2]
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
|
||||||
|
jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
base_instance_count = jf['Instances']['InstanceCount']
|
||||||
|
|
||||||
|
client.add_instance_groups(JobFlowId=cluster_id, InstanceGroups=input_instance_groups[2:])
|
||||||
|
|
||||||
|
jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
jf['Instances']['InstanceCount'].should.equal(sum(g['InstanceCount'] for g in input_instance_groups))
|
||||||
|
for x in jf['Instances']['InstanceGroups']:
|
||||||
|
y = input_groups[x['Name']]
|
||||||
|
if hasattr(y, 'BidPrice'):
|
||||||
|
x['BidPrice'].should.equal('BidPrice')
|
||||||
|
x['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# x['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
x.should.have.key('InstanceGroupId')
|
||||||
|
x['InstanceRequestCount'].should.equal(y['InstanceCount'])
|
||||||
|
x['InstanceRole'].should.equal(y['InstanceRole'])
|
||||||
|
x['InstanceRunningCount'].should.equal(y['InstanceCount'])
|
||||||
|
x['InstanceType'].should.equal(y['InstanceType'])
|
||||||
|
# x['LastStateChangeReason'].should.equal(y['LastStateChangeReason'])
|
||||||
|
x['Market'].should.equal(y['Market'])
|
||||||
|
x['Name'].should.equal(y['Name'])
|
||||||
|
x['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||||
|
x['StartDateTime'].should.be.a('datetime.datetime')
|
||||||
|
x['State'].should.equal('RUNNING')
|
||||||
|
|
||||||
|
groups = client.list_instance_groups(ClusterId=cluster_id)['InstanceGroups']
|
||||||
|
for x in groups:
|
||||||
|
y = input_groups[x['Name']]
|
||||||
|
if hasattr(y, 'BidPrice'):
|
||||||
|
x['BidPrice'].should.equal('BidPrice')
|
||||||
|
# Configurations
|
||||||
|
# EbsBlockDevices
|
||||||
|
# EbsOptimized
|
||||||
|
x.should.have.key('Id')
|
||||||
|
x['InstanceGroupType'].should.equal(y['InstanceRole'])
|
||||||
|
x['InstanceType'].should.equal(y['InstanceType'])
|
||||||
|
x['Market'].should.equal(y['Market'])
|
||||||
|
x['Name'].should.equal(y['Name'])
|
||||||
|
x['RequestedInstanceCount'].should.equal(y['InstanceCount'])
|
||||||
|
x['RunningInstanceCount'].should.equal(y['InstanceCount'])
|
||||||
|
# ShrinkPolicy
|
||||||
|
x['Status']['State'].should.equal('RUNNING')
|
||||||
|
x['Status']['StateChangeReason']['Code'].should.be.a(six.string_types)
|
||||||
|
# x['Status']['StateChangeReason']['Message'].should.be.a(six.string_types)
|
||||||
|
x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
x['Status']['Timeline']['ReadyDateTime'].should.be.a('datetime.datetime')
|
||||||
|
|
||||||
|
igs = dict((g['Name'], g) for g in groups)
|
||||||
|
client.modify_instance_groups(
|
||||||
|
InstanceGroups=[
|
||||||
|
{'InstanceGroupId': igs['task-1']['Id'],
|
||||||
|
'InstanceCount': 2},
|
||||||
|
{'InstanceGroupId': igs['task-2']['Id'],
|
||||||
|
'InstanceCount': 3}])
|
||||||
|
jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
jf['Instances']['InstanceCount'].should.equal(base_instance_count + 5)
|
||||||
|
igs = dict((g['Name'], g) for g in jf['Instances']['InstanceGroups'])
|
||||||
|
igs['task-1']['InstanceRunningCount'].should.equal(2)
|
||||||
|
igs['task-2']['InstanceRunningCount'].should.equal(3)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_steps():
|
||||||
|
input_steps = [{
|
||||||
|
'HadoopJarStep': {
|
||||||
|
'Args': [
|
||||||
|
'hadoop-streaming',
|
||||||
|
'-files', 's3://elasticmapreduce/samples/wordcount/wordSplitter.py#wordSplitter.py',
|
||||||
|
'-mapper', 'python wordSplitter.py',
|
||||||
|
'-input', 's3://elasticmapreduce/samples/wordcount/input',
|
||||||
|
'-output', 's3://output_bucket/output/wordcount_output',
|
||||||
|
'-reducer', 'aggregate'
|
||||||
|
],
|
||||||
|
'Jar': 'command-runner.jar',
|
||||||
|
},
|
||||||
|
'Name': 'My wordcount example',
|
||||||
|
}, {
|
||||||
|
'HadoopJarStep': {
|
||||||
|
'Args': [
|
||||||
|
'hadoop-streaming',
|
||||||
|
'-files', 's3://elasticmapreduce/samples/wordcount/wordSplitter2.py#wordSplitter2.py',
|
||||||
|
'-mapper', 'python wordSplitter2.py',
|
||||||
|
'-input', 's3://elasticmapreduce/samples/wordcount/input2',
|
||||||
|
'-output', 's3://output_bucket/output/wordcount_output2',
|
||||||
|
'-reducer', 'aggregate'
|
||||||
|
],
|
||||||
|
'Jar': 'command-runner.jar',
|
||||||
|
},
|
||||||
|
'Name': 'My wordcount example2',
|
||||||
|
}]
|
||||||
|
|
||||||
|
# TODO: implementation and test for cancel_steps
|
||||||
|
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
args = deepcopy(run_job_flow_args)
|
||||||
|
args['Steps'] = [input_steps[0]]
|
||||||
|
cluster_id = client.run_job_flow(**args)['JobFlowId']
|
||||||
|
|
||||||
|
jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
jf['Steps'].should.have.length_of(1)
|
||||||
|
|
||||||
|
client.add_job_flow_steps(JobFlowId=cluster_id, Steps=[input_steps[1]])
|
||||||
|
|
||||||
|
jf = client.describe_job_flows(JobFlowIds=[cluster_id])['JobFlows'][0]
|
||||||
|
jf['Steps'].should.have.length_of(2)
|
||||||
|
for idx, (x, y) in enumerate(zip(jf['Steps'], input_steps)):
|
||||||
|
x['ExecutionStatusDetail'].should.have.key('CreationDateTime')
|
||||||
|
# x['ExecutionStatusDetail'].should.have.key('EndDateTime')
|
||||||
|
# x['ExecutionStatusDetail'].should.have.key('LastStateChangeReason')
|
||||||
|
# x['ExecutionStatusDetail'].should.have.key('StartDateTime')
|
||||||
|
x['ExecutionStatusDetail']['State'].should.equal('STARTING' if idx == 0 else 'PENDING')
|
||||||
|
x['StepConfig']['ActionOnFailure'].should.equal('TERMINATE_CLUSTER')
|
||||||
|
x['StepConfig']['HadoopJarStep']['Args'].should.equal(y['HadoopJarStep']['Args'])
|
||||||
|
x['StepConfig']['HadoopJarStep']['Jar'].should.equal(y['HadoopJarStep']['Jar'])
|
||||||
|
if 'MainClass' in y['HadoopJarStep']:
|
||||||
|
x['StepConfig']['HadoopJarStep']['MainClass'].should.equal(y['HadoopJarStep']['MainClass'])
|
||||||
|
if 'Properties' in y['HadoopJarStep']:
|
||||||
|
x['StepConfig']['HadoopJarStep']['Properties'].should.equal(y['HadoopJarStep']['Properties'])
|
||||||
|
x['StepConfig']['Name'].should.equal(y['Name'])
|
||||||
|
|
||||||
|
expected = dict((s['Name'], s) for s in input_steps)
|
||||||
|
|
||||||
|
steps = client.list_steps(ClusterId=cluster_id)['Steps']
|
||||||
|
steps.should.have.length_of(2)
|
||||||
|
for x in steps:
|
||||||
|
y = expected[x['Name']]
|
||||||
|
x['ActionOnFailure'].should.equal('TERMINATE_CLUSTER')
|
||||||
|
x['Config']['Args'].should.equal(y['HadoopJarStep']['Args'])
|
||||||
|
x['Config']['Jar'].should.equal(y['HadoopJarStep']['Jar'])
|
||||||
|
# x['Config']['MainClass'].should.equal(y['HadoopJarStep']['MainClass'])
|
||||||
|
# Properties
|
||||||
|
x['Id'].should.be.a(six.string_types)
|
||||||
|
x['Name'].should.equal(y['Name'])
|
||||||
|
x['Status']['State'].should.be.within(['STARTING', 'PENDING'])
|
||||||
|
# StateChangeReason
|
||||||
|
x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# x['Status']['Timeline']['StartDateTime'].should.be.a('datetime.datetime')
|
||||||
|
|
||||||
|
x = client.describe_step(ClusterId=cluster_id, StepId=x['Id'])['Step']
|
||||||
|
x['ActionOnFailure'].should.equal('TERMINATE_CLUSTER')
|
||||||
|
x['Config']['Args'].should.equal(y['HadoopJarStep']['Args'])
|
||||||
|
x['Config']['Jar'].should.equal(y['HadoopJarStep']['Jar'])
|
||||||
|
# x['Config']['MainClass'].should.equal(y['HadoopJarStep']['MainClass'])
|
||||||
|
# Properties
|
||||||
|
x['Id'].should.be.a(six.string_types)
|
||||||
|
x['Name'].should.equal(y['Name'])
|
||||||
|
x['Status']['State'].should.be.within(['STARTING', 'PENDING'])
|
||||||
|
# StateChangeReason
|
||||||
|
x['Status']['Timeline']['CreationDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# x['Status']['Timeline']['EndDateTime'].should.be.a('datetime.datetime')
|
||||||
|
# x['Status']['Timeline']['StartDateTime'].should.be.a('datetime.datetime')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_emr
|
||||||
|
def test_tags():
|
||||||
|
input_tags = [{'Key': 'newkey1', 'Value': 'newval1'},
|
||||||
|
{'Key': 'newkey2', 'Value': 'newval2'}]
|
||||||
|
|
||||||
|
client = boto3.client('emr', region_name='us-east-1')
|
||||||
|
cluster_id = client.run_job_flow(**run_job_flow_args)['JobFlowId']
|
||||||
|
|
||||||
|
client.add_tags(ResourceId=cluster_id, Tags=input_tags)
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)['Cluster']
|
||||||
|
resp['Tags'].should.have.length_of(2)
|
||||||
|
dict((t['Key'], t['Value']) for t in resp['Tags']).should.equal(dict((t['Key'], t['Value']) for t in input_tags))
|
||||||
|
|
||||||
|
client.remove_tags(ResourceId=cluster_id, TagKeys=[t['Key'] for t in input_tags])
|
||||||
|
resp = client.describe_cluster(ClusterId=cluster_id)['Cluster']
|
||||||
|
resp.shouldnt.have.key('Tags')
|
||||||
|
Loading…
Reference in New Issue
Block a user