680 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			680 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import unicode_literals
 | |
| import time
 | |
| from datetime import datetime
 | |
| 
 | |
| import boto
 | |
| import pytz
 | |
| from boto.emr.bootstrap_action import BootstrapAction
 | |
| from boto.emr.instance_group import InstanceGroup
 | |
| from boto.emr.step import StreamingStep
 | |
| 
 | |
| import sure  # noqa
 | |
| 
 | |
| from moto import mock_emr_deprecated
 | |
| from tests.helpers import requires_boto_gte
 | |
| 
 | |
| 
 | |
| run_jobflow_args = dict(
 | |
|     job_flow_role="EMR_EC2_DefaultRole",
 | |
|     keep_alive=True,
 | |
|     log_uri="s3://some_bucket/jobflow_logs",
 | |
|     master_instance_type="c1.medium",
 | |
|     name="My jobflow",
 | |
|     num_instances=2,
 | |
|     service_role="EMR_DefaultRole",
 | |
|     slave_instance_type="c1.medium",
 | |
| )
 | |
| 
 | |
| 
 | |
| input_instance_groups = [
 | |
|     InstanceGroup(1, "MASTER", "c1.medium", "ON_DEMAND", "master"),
 | |
|     InstanceGroup(3, "CORE", "c1.medium", "ON_DEMAND", "core"),
 | |
|     InstanceGroup(6, "TASK", "c1.large", "SPOT", "task-1", "0.07"),
 | |
|     InstanceGroup(10, "TASK", "c1.xlarge", "SPOT", "task-2", "0.05"),
 | |
| ]
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_describe_cluster():
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     args.update(
 | |
|         dict(
 | |
|             api_params={
 | |
|                 "Applications.member.1.Name": "Spark",
 | |
|                 "Applications.member.1.Version": "2.4.2",
 | |
|                 "Configurations.member.1.Classification": "yarn-site",
 | |
|                 "Configurations.member.1.Properties.entry.1.key": "someproperty",
 | |
|                 "Configurations.member.1.Properties.entry.1.value": "somevalue",
 | |
|                 "Configurations.member.1.Properties.entry.2.key": "someotherproperty",
 | |
|                 "Configurations.member.1.Properties.entry.2.value": "someothervalue",
 | |
|                 "Instances.EmrManagedMasterSecurityGroup": "master-security-group",
 | |
|                 "Instances.Ec2SubnetId": "subnet-8be41cec",
 | |
|             },
 | |
|             availability_zone="us-east-2b",
 | |
|             ec2_keyname="mykey",
 | |
|             job_flow_role="EMR_EC2_DefaultRole",
 | |
|             keep_alive=False,
 | |
|             log_uri="s3://some_bucket/jobflow_logs",
 | |
|             name="My jobflow",
 | |
|             service_role="EMR_DefaultRole",
 | |
|             visible_to_all_users=True,
 | |
|         )
 | |
|     )
 | |
|     cluster_id = conn.run_jobflow(**args)
 | |
|     input_tags = {"tag1": "val1", "tag2": "val2"}
 | |
|     conn.add_tags(cluster_id, input_tags)
 | |
| 
 | |
|     cluster = conn.describe_cluster(cluster_id)
 | |
|     cluster.applications[0].name.should.equal("Spark")
 | |
|     cluster.applications[0].version.should.equal("2.4.2")
 | |
|     cluster.autoterminate.should.equal("true")
 | |
| 
 | |
|     # configurations appear not be supplied as attributes?
 | |
| 
 | |
|     attrs = cluster.ec2instanceattributes
 | |
|     # AdditionalMasterSecurityGroups
 | |
|     # AdditionalSlaveSecurityGroups
 | |
|     attrs.ec2availabilityzone.should.equal(args["availability_zone"])
 | |
|     attrs.ec2keyname.should.equal(args["ec2_keyname"])
 | |
|     attrs.ec2subnetid.should.equal(args["api_params"]["Instances.Ec2SubnetId"])
 | |
|     # EmrManagedMasterSecurityGroups
 | |
|     # EmrManagedSlaveSecurityGroups
 | |
|     attrs.iaminstanceprofile.should.equal(args["job_flow_role"])
 | |
|     # ServiceAccessSecurityGroup
 | |
| 
 | |
|     cluster.id.should.equal(cluster_id)
 | |
|     cluster.loguri.should.equal(args["log_uri"])
 | |
|     cluster.masterpublicdnsname.should.be.a(str)
 | |
|     cluster.name.should.equal(args["name"])
 | |
|     int(cluster.normalizedinstancehours).should.equal(0)
 | |
|     # cluster.release_label
 | |
|     cluster.shouldnt.have.property("requestedamiversion")
 | |
|     cluster.runningamiversion.should.equal("1.0.0")
 | |
|     # cluster.securityconfiguration
 | |
|     cluster.servicerole.should.equal(args["service_role"])
 | |
| 
 | |
|     cluster.status.state.should.equal("TERMINATED")
 | |
|     cluster.status.statechangereason.message.should.be.a(str)
 | |
|     cluster.status.statechangereason.code.should.be.a(str)
 | |
|     cluster.status.timeline.creationdatetime.should.be.a(str)
 | |
|     # cluster.status.timeline.enddatetime.should.be.a(str)
 | |
|     # cluster.status.timeline.readydatetime.should.be.a(str)
 | |
| 
 | |
|     dict((item.key, item.value) for item in cluster.tags).should.equal(input_tags)
 | |
| 
 | |
|     cluster.terminationprotected.should.equal("false")
 | |
|     cluster.visibletoallusers.should.equal("true")
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_describe_jobflows():
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     expected = {}
 | |
| 
 | |
|     for idx in range(4):
 | |
|         cluster_name = "cluster" + str(idx)
 | |
|         args["name"] = cluster_name
 | |
|         cluster_id = conn.run_jobflow(**args)
 | |
|         expected[cluster_id] = {
 | |
|             "id": cluster_id,
 | |
|             "name": cluster_name,
 | |
|             "state": "WAITING",
 | |
|         }
 | |
| 
 | |
|     # need sleep since it appears the timestamp is always rounded to
 | |
|     # the nearest second internally
 | |
|     time.sleep(1)
 | |
|     timestamp = datetime.now(pytz.utc)
 | |
|     time.sleep(1)
 | |
| 
 | |
|     for idx in range(4, 6):
 | |
|         cluster_name = "cluster" + str(idx)
 | |
|         args["name"] = cluster_name
 | |
|         cluster_id = conn.run_jobflow(**args)
 | |
|         conn.terminate_jobflow(cluster_id)
 | |
|         expected[cluster_id] = {
 | |
|             "id": cluster_id,
 | |
|             "name": cluster_name,
 | |
|             "state": "TERMINATED",
 | |
|         }
 | |
|     jobs = conn.describe_jobflows()
 | |
|     jobs.should.have.length_of(6)
 | |
| 
 | |
|     for cluster_id, y in expected.items():
 | |
|         resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
 | |
|         resp.should.have.length_of(1)
 | |
|         resp[0].jobflowid.should.equal(cluster_id)
 | |
| 
 | |
|     resp = conn.describe_jobflows(states=["WAITING"])
 | |
|     resp.should.have.length_of(4)
 | |
|     for x in resp:
 | |
|         x.state.should.equal("WAITING")
 | |
| 
 | |
|     resp = conn.describe_jobflows(created_before=timestamp)
 | |
|     resp.should.have.length_of(4)
 | |
| 
 | |
|     resp = conn.describe_jobflows(created_after=timestamp)
 | |
|     resp.should.have.length_of(2)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_describe_jobflow():
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     args.update(
 | |
|         dict(
 | |
|             ami_version="3.8.1",
 | |
|             api_params={
 | |
|                 #'Applications.member.1.Name': 'Spark',
 | |
|                 #'Applications.member.1.Version': '2.4.2',
 | |
|                 #'Configurations.member.1.Classification': 'yarn-site',
 | |
|                 #'Configurations.member.1.Properties.entry.1.key': 'someproperty',
 | |
|                 #'Configurations.member.1.Properties.entry.1.value': 'somevalue',
 | |
|                 #'Instances.EmrManagedMasterSecurityGroup': 'master-security-group',
 | |
|                 "Instances.Ec2SubnetId": "subnet-8be41cec"
 | |
|             },
 | |
|             ec2_keyname="mykey",
 | |
|             hadoop_version="2.4.0",
 | |
|             name="My jobflow",
 | |
|             log_uri="s3://some_bucket/jobflow_logs",
 | |
|             keep_alive=True,
 | |
|             master_instance_type="c1.medium",
 | |
|             slave_instance_type="c1.medium",
 | |
|             num_instances=2,
 | |
|             availability_zone="us-west-2b",
 | |
|             job_flow_role="EMR_EC2_DefaultRole",
 | |
|             service_role="EMR_DefaultRole",
 | |
|             visible_to_all_users=True,
 | |
|         )
 | |
|     )
 | |
| 
 | |
|     cluster_id = conn.run_jobflow(**args)
 | |
|     jf = conn.describe_jobflow(cluster_id)
 | |
|     jf.amiversion.should.equal(args["ami_version"])
 | |
|     jf.bootstrapactions.should.equal(None)
 | |
|     jf.creationdatetime.should.be.a(str)
 | |
|     jf.should.have.property("laststatechangereason")
 | |
|     jf.readydatetime.should.be.a(str)
 | |
|     jf.startdatetime.should.be.a(str)
 | |
|     jf.state.should.equal("WAITING")
 | |
| 
 | |
|     jf.ec2keyname.should.equal(args["ec2_keyname"])
 | |
|     # Ec2SubnetId
 | |
|     jf.hadoopversion.should.equal(args["hadoop_version"])
 | |
|     int(jf.instancecount).should.equal(2)
 | |
| 
 | |
|     for ig in jf.instancegroups:
 | |
|         ig.creationdatetime.should.be.a(str)
 | |
|         # ig.enddatetime.should.be.a(str)
 | |
|         ig.should.have.property("instancegroupid").being.a(str)
 | |
|         int(ig.instancerequestcount).should.equal(1)
 | |
|         ig.instancerole.should.be.within(["MASTER", "CORE"])
 | |
|         int(ig.instancerunningcount).should.equal(1)
 | |
|         ig.instancetype.should.equal("c1.medium")
 | |
|         ig.laststatechangereason.should.be.a(str)
 | |
|         ig.market.should.equal("ON_DEMAND")
 | |
|         ig.name.should.be.a(str)
 | |
|         ig.readydatetime.should.be.a(str)
 | |
|         ig.startdatetime.should.be.a(str)
 | |
|         ig.state.should.equal("RUNNING")
 | |
| 
 | |
|     jf.keepjobflowalivewhennosteps.should.equal("true")
 | |
|     jf.masterinstanceid.should.be.a(str)
 | |
|     jf.masterinstancetype.should.equal(args["master_instance_type"])
 | |
|     jf.masterpublicdnsname.should.be.a(str)
 | |
|     int(jf.normalizedinstancehours).should.equal(0)
 | |
|     jf.availabilityzone.should.equal(args["availability_zone"])
 | |
|     jf.slaveinstancetype.should.equal(args["slave_instance_type"])
 | |
|     jf.terminationprotected.should.equal("false")
 | |
| 
 | |
|     jf.jobflowid.should.equal(cluster_id)
 | |
|     # jf.jobflowrole.should.equal(args['job_flow_role'])
 | |
|     jf.loguri.should.equal(args["log_uri"])
 | |
|     jf.name.should.equal(args["name"])
 | |
|     # jf.servicerole.should.equal(args['service_role'])
 | |
| 
 | |
|     jf.steps.should.have.length_of(0)
 | |
| 
 | |
|     list(i.value for i in jf.supported_products).should.equal([])
 | |
|     jf.visibletoallusers.should.equal("true")
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_list_clusters():
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     expected = {}
 | |
| 
 | |
|     for idx in range(40):
 | |
|         cluster_name = "jobflow" + str(idx)
 | |
|         args["name"] = cluster_name
 | |
|         cluster_id = conn.run_jobflow(**args)
 | |
|         expected[cluster_id] = {
 | |
|             "id": cluster_id,
 | |
|             "name": cluster_name,
 | |
|             "normalizedinstancehours": "0",
 | |
|             "state": "WAITING",
 | |
|         }
 | |
| 
 | |
|     # need sleep since it appears the timestamp is always rounded to
 | |
|     # the nearest second internally
 | |
|     time.sleep(1)
 | |
|     timestamp = datetime.now(pytz.utc)
 | |
|     time.sleep(1)
 | |
| 
 | |
|     for idx in range(40, 70):
 | |
|         cluster_name = "jobflow" + str(idx)
 | |
|         args["name"] = cluster_name
 | |
|         cluster_id = conn.run_jobflow(**args)
 | |
|         conn.terminate_jobflow(cluster_id)
 | |
|         expected[cluster_id] = {
 | |
|             "id": cluster_id,
 | |
|             "name": cluster_name,
 | |
|             "normalizedinstancehours": "0",
 | |
|             "state": "TERMINATED",
 | |
|         }
 | |
| 
 | |
|     args = {}
 | |
|     while 1:
 | |
|         resp = conn.list_clusters(**args)
 | |
|         clusters = resp.clusters
 | |
|         len(clusters).should.be.lower_than_or_equal_to(50)
 | |
|         for x in clusters:
 | |
|             y = expected[x.id]
 | |
|             x.id.should.equal(y["id"])
 | |
|             x.name.should.equal(y["name"])
 | |
|             x.normalizedinstancehours.should.equal(y["normalizedinstancehours"])
 | |
|             x.status.state.should.equal(y["state"])
 | |
|             x.status.timeline.creationdatetime.should.be.a(str)
 | |
|             if y["state"] == "TERMINATED":
 | |
|                 x.status.timeline.enddatetime.should.be.a(str)
 | |
|             else:
 | |
|                 x.status.timeline.shouldnt.have.property("enddatetime")
 | |
|             x.status.timeline.readydatetime.should.be.a(str)
 | |
|         if not hasattr(resp, "marker"):
 | |
|             break
 | |
|         args = {"marker": resp.marker}
 | |
| 
 | |
|     resp = conn.list_clusters(cluster_states=["TERMINATED"])
 | |
|     resp.clusters.should.have.length_of(30)
 | |
|     for x in resp.clusters:
 | |
|         x.status.state.should.equal("TERMINATED")
 | |
| 
 | |
|     resp = conn.list_clusters(created_before=timestamp)
 | |
|     resp.clusters.should.have.length_of(40)
 | |
| 
 | |
|     resp = conn.list_clusters(created_after=timestamp)
 | |
|     resp.clusters.should.have.length_of(30)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_run_jobflow():
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     job_id = conn.run_jobflow(**args)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.state.should.equal("WAITING")
 | |
|     job_flow.jobflowid.should.equal(job_id)
 | |
|     job_flow.name.should.equal(args["name"])
 | |
|     job_flow.masterinstancetype.should.equal(args["master_instance_type"])
 | |
|     job_flow.slaveinstancetype.should.equal(args["slave_instance_type"])
 | |
|     job_flow.loguri.should.equal(args["log_uri"])
 | |
|     job_flow.visibletoallusers.should.equal("false")
 | |
|     int(job_flow.normalizedinstancehours).should.equal(0)
 | |
|     job_flow.steps.should.have.length_of(0)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_run_jobflow_in_multiple_regions():
 | |
|     regions = {}
 | |
|     for region in ["us-east-1", "eu-west-1"]:
 | |
|         conn = boto.emr.connect_to_region(region)
 | |
|         args = run_jobflow_args.copy()
 | |
|         args["name"] = region
 | |
|         cluster_id = conn.run_jobflow(**args)
 | |
|         regions[region] = {"conn": conn, "cluster_id": cluster_id}
 | |
| 
 | |
|     for region in regions.keys():
 | |
|         conn = regions[region]["conn"]
 | |
|         jf = conn.describe_jobflow(regions[region]["cluster_id"])
 | |
|         jf.name.should.equal(region)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @requires_boto_gte("2.8")
 | |
| @mock_emr_deprecated
 | |
| def test_run_jobflow_with_new_params():
 | |
|     # Test that run_jobflow works with newer params
 | |
|     conn = boto.connect_emr()
 | |
|     conn.run_jobflow(**run_jobflow_args)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @requires_boto_gte("2.8")
 | |
| @mock_emr_deprecated
 | |
| def test_run_jobflow_with_visible_to_all_users():
 | |
|     conn = boto.connect_emr()
 | |
|     for expected in (True, False):
 | |
|         job_id = conn.run_jobflow(visible_to_all_users=expected, **run_jobflow_args)
 | |
|         job_flow = conn.describe_jobflow(job_id)
 | |
|         job_flow.visibletoallusers.should.equal(str(expected).lower())
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @requires_boto_gte("2.8")
 | |
| @mock_emr_deprecated
 | |
| def test_run_jobflow_with_instance_groups():
 | |
|     input_groups = dict((g.name, g) for g in input_instance_groups)
 | |
|     conn = boto.connect_emr()
 | |
|     job_id = conn.run_jobflow(instance_groups=input_instance_groups, **run_jobflow_args)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     int(job_flow.instancecount).should.equal(
 | |
|         sum(g.num_instances for g in input_instance_groups)
 | |
|     )
 | |
|     for instance_group in job_flow.instancegroups:
 | |
|         expected = input_groups[instance_group.name]
 | |
|         instance_group.should.have.property("instancegroupid")
 | |
|         int(instance_group.instancerunningcount).should.equal(expected.num_instances)
 | |
|         instance_group.instancerole.should.equal(expected.role)
 | |
|         instance_group.instancetype.should.equal(expected.type)
 | |
|         instance_group.market.should.equal(expected.market)
 | |
|         if hasattr(expected, "bidprice"):
 | |
|             instance_group.bidprice.should.equal(expected.bidprice)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @requires_boto_gte("2.8")
 | |
| @mock_emr_deprecated
 | |
| def test_set_termination_protection():
 | |
|     conn = boto.connect_emr()
 | |
|     job_id = conn.run_jobflow(**run_jobflow_args)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.terminationprotected.should.equal("false")
 | |
| 
 | |
|     conn.set_termination_protection(job_id, True)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.terminationprotected.should.equal("true")
 | |
| 
 | |
|     conn.set_termination_protection(job_id, False)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.terminationprotected.should.equal("false")
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @requires_boto_gte("2.8")
 | |
| @mock_emr_deprecated
 | |
| def test_set_visible_to_all_users():
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     args["visible_to_all_users"] = False
 | |
|     job_id = conn.run_jobflow(**args)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.visibletoallusers.should.equal("false")
 | |
| 
 | |
|     conn.set_visible_to_all_users(job_id, True)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.visibletoallusers.should.equal("true")
 | |
| 
 | |
|     conn.set_visible_to_all_users(job_id, False)
 | |
|     job_flow = conn.describe_jobflow(job_id)
 | |
|     job_flow.visibletoallusers.should.equal("false")
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_terminate_jobflow():
 | |
|     conn = boto.connect_emr()
 | |
|     job_id = conn.run_jobflow(**run_jobflow_args)
 | |
|     flow = conn.describe_jobflows()[0]
 | |
|     flow.state.should.equal("WAITING")
 | |
| 
 | |
|     conn.terminate_jobflow(job_id)
 | |
|     flow = conn.describe_jobflows()[0]
 | |
|     flow.state.should.equal("TERMINATED")
 | |
| 
 | |
| 
 | |
| # testing multiple end points for each feature
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_bootstrap_actions():
 | |
|     bootstrap_actions = [
 | |
|         BootstrapAction(
 | |
|             name="bs1",
 | |
|             path="path/to/script",
 | |
|             bootstrap_action_args=["arg1", "arg2&arg3"],
 | |
|         ),
 | |
|         BootstrapAction(
 | |
|             name="bs2", path="path/to/anotherscript", bootstrap_action_args=[]
 | |
|         ),
 | |
|     ]
 | |
| 
 | |
|     conn = boto.connect_emr()
 | |
|     cluster_id = conn.run_jobflow(
 | |
|         bootstrap_actions=bootstrap_actions, **run_jobflow_args
 | |
|     )
 | |
| 
 | |
|     jf = conn.describe_jobflow(cluster_id)
 | |
|     for x, y in zip(jf.bootstrapactions, bootstrap_actions):
 | |
|         x.name.should.equal(y.name)
 | |
|         x.path.should.equal(y.path)
 | |
|         list(o.value for o in x.args).should.equal(y.args())
 | |
| 
 | |
|     resp = conn.list_bootstrap_actions(cluster_id)
 | |
|     for i, y in enumerate(bootstrap_actions):
 | |
|         x = resp.actions[i]
 | |
|         x.name.should.equal(y.name)
 | |
|         x.scriptpath.should.equal(y.path)
 | |
|         list(arg.value for arg in x.args).should.equal(y.args())
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_instance_groups():
 | |
|     input_groups = dict((g.name, g) for g in input_instance_groups)
 | |
| 
 | |
|     conn = boto.connect_emr()
 | |
|     args = run_jobflow_args.copy()
 | |
|     for key in ["master_instance_type", "slave_instance_type", "num_instances"]:
 | |
|         del args[key]
 | |
|     args["instance_groups"] = input_instance_groups[:2]
 | |
|     job_id = conn.run_jobflow(**args)
 | |
| 
 | |
|     jf = conn.describe_jobflow(job_id)
 | |
|     base_instance_count = int(jf.instancecount)
 | |
| 
 | |
|     conn.add_instance_groups(job_id, input_instance_groups[2:])
 | |
| 
 | |
|     jf = conn.describe_jobflow(job_id)
 | |
|     int(jf.instancecount).should.equal(
 | |
|         sum(g.num_instances for g in input_instance_groups)
 | |
|     )
 | |
|     for x in jf.instancegroups:
 | |
|         y = input_groups[x.name]
 | |
|         if hasattr(y, "bidprice"):
 | |
|             x.bidprice.should.equal(y.bidprice)
 | |
|         x.creationdatetime.should.be.a(str)
 | |
|         # x.enddatetime.should.be.a(str)
 | |
|         x.should.have.property("instancegroupid")
 | |
|         int(x.instancerequestcount).should.equal(y.num_instances)
 | |
|         x.instancerole.should.equal(y.role)
 | |
|         int(x.instancerunningcount).should.equal(y.num_instances)
 | |
|         x.instancetype.should.equal(y.type)
 | |
|         x.laststatechangereason.should.be.a(str)
 | |
|         x.market.should.equal(y.market)
 | |
|         x.name.should.be.a(str)
 | |
|         x.readydatetime.should.be.a(str)
 | |
|         x.startdatetime.should.be.a(str)
 | |
|         x.state.should.equal("RUNNING")
 | |
| 
 | |
|     for x in conn.list_instance_groups(job_id).instancegroups:
 | |
|         y = input_groups[x.name]
 | |
|         if hasattr(y, "bidprice"):
 | |
|             x.bidprice.should.equal(y.bidprice)
 | |
|         # Configurations
 | |
|         # EbsBlockDevices
 | |
|         # EbsOptimized
 | |
|         x.should.have.property("id")
 | |
|         x.instancegrouptype.should.equal(y.role)
 | |
|         x.instancetype.should.equal(y.type)
 | |
|         x.market.should.equal(y.market)
 | |
|         x.name.should.equal(y.name)
 | |
|         int(x.requestedinstancecount).should.equal(y.num_instances)
 | |
|         int(x.runninginstancecount).should.equal(y.num_instances)
 | |
|         # ShrinkPolicy
 | |
|         x.status.state.should.equal("RUNNING")
 | |
|         x.status.statechangereason.code.should.be.a(str)
 | |
|         x.status.statechangereason.message.should.be.a(str)
 | |
|         x.status.timeline.creationdatetime.should.be.a(str)
 | |
|         # x.status.timeline.enddatetime.should.be.a(str)
 | |
|         x.status.timeline.readydatetime.should.be.a(str)
 | |
| 
 | |
|     igs = dict((g.name, g) for g in jf.instancegroups)
 | |
| 
 | |
|     conn.modify_instance_groups(
 | |
|         [igs["task-1"].instancegroupid, igs["task-2"].instancegroupid], [2, 3]
 | |
|     )
 | |
|     jf = conn.describe_jobflow(job_id)
 | |
|     int(jf.instancecount).should.equal(base_instance_count + 5)
 | |
|     igs = dict((g.name, g) for g in jf.instancegroups)
 | |
|     int(igs["task-1"].instancerunningcount).should.equal(2)
 | |
|     int(igs["task-2"].instancerunningcount).should.equal(3)
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_steps():
 | |
|     input_steps = [
 | |
|         StreamingStep(
 | |
|             name="My wordcount example",
 | |
|             mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
 | |
|             reducer="aggregate",
 | |
|             input="s3n://elasticmapreduce/samples/wordcount/input",
 | |
|             output="s3n://output_bucket/output/wordcount_output",
 | |
|         ),
 | |
|         StreamingStep(
 | |
|             name="My wordcount example & co.",
 | |
|             mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py",
 | |
|             reducer="aggregate",
 | |
|             input="s3n://elasticmapreduce/samples/wordcount/input2",
 | |
|             output="s3n://output_bucket/output/wordcount_output2",
 | |
|         ),
 | |
|     ]
 | |
| 
 | |
|     # TODO: implementation and test for cancel_steps
 | |
| 
 | |
|     conn = boto.connect_emr()
 | |
|     cluster_id = conn.run_jobflow(steps=[input_steps[0]], **run_jobflow_args)
 | |
| 
 | |
|     jf = conn.describe_jobflow(cluster_id)
 | |
|     jf.steps.should.have.length_of(1)
 | |
| 
 | |
|     conn.add_jobflow_steps(cluster_id, [input_steps[1]])
 | |
| 
 | |
|     jf = conn.describe_jobflow(cluster_id)
 | |
|     jf.steps.should.have.length_of(2)
 | |
|     for step in jf.steps:
 | |
|         step.actiononfailure.should.equal("TERMINATE_JOB_FLOW")
 | |
|         list(arg.value for arg in step.args).should.have.length_of(8)
 | |
|         step.creationdatetime.should.be.a(str)
 | |
|         # step.enddatetime.should.be.a(str)
 | |
|         step.jar.should.equal("/home/hadoop/contrib/streaming/hadoop-streaming.jar")
 | |
|         step.laststatechangereason.should.be.a(str)
 | |
|         step.mainclass.should.equal("")
 | |
|         step.name.should.be.a(str)
 | |
|         # step.readydatetime.should.be.a(str)
 | |
|         # step.startdatetime.should.be.a(str)
 | |
|         step.state.should.be.within(["STARTING", "PENDING"])
 | |
| 
 | |
|     expected = dict((s.name, s) for s in input_steps)
 | |
| 
 | |
|     steps = conn.list_steps(cluster_id).steps
 | |
|     for x in steps:
 | |
|         y = expected[x.name]
 | |
|         # actiononfailure
 | |
|         list(arg.value for arg in x.config.args).should.equal(
 | |
|             [
 | |
|                 "-mapper",
 | |
|                 y.mapper,
 | |
|                 "-reducer",
 | |
|                 y.reducer,
 | |
|                 "-input",
 | |
|                 y.input,
 | |
|                 "-output",
 | |
|                 y.output,
 | |
|             ]
 | |
|         )
 | |
|         x.config.jar.should.equal("/home/hadoop/contrib/streaming/hadoop-streaming.jar")
 | |
|         x.config.mainclass.should.equal("")
 | |
|         # properties
 | |
|         x.should.have.property("id").should.be.a(str)
 | |
|         x.name.should.equal(y.name)
 | |
|         x.status.state.should.be.within(["STARTING", "PENDING"])
 | |
|         # x.status.statechangereason
 | |
|         x.status.timeline.creationdatetime.should.be.a(str)
 | |
|         # x.status.timeline.enddatetime.should.be.a(str)
 | |
|         # x.status.timeline.startdatetime.should.be.a(str)
 | |
| 
 | |
|         x = conn.describe_step(cluster_id, x.id)
 | |
|         list(arg.value for arg in x.config.args).should.equal(
 | |
|             [
 | |
|                 "-mapper",
 | |
|                 y.mapper,
 | |
|                 "-reducer",
 | |
|                 y.reducer,
 | |
|                 "-input",
 | |
|                 y.input,
 | |
|                 "-output",
 | |
|                 y.output,
 | |
|             ]
 | |
|         )
 | |
|         x.config.jar.should.equal("/home/hadoop/contrib/streaming/hadoop-streaming.jar")
 | |
|         x.config.mainclass.should.equal("")
 | |
|         # properties
 | |
|         x.should.have.property("id").should.be.a(str)
 | |
|         x.name.should.equal(y.name)
 | |
|         x.status.state.should.be.within(["STARTING", "PENDING"])
 | |
|         # x.status.statechangereason
 | |
|         x.status.timeline.creationdatetime.should.be.a(str)
 | |
|         # x.status.timeline.enddatetime.should.be.a(str)
 | |
|         # x.status.timeline.startdatetime.should.be.a(str)
 | |
| 
 | |
|     @requires_boto_gte("2.39")
 | |
|     def test_list_steps_with_states():
 | |
|         # boto's list_steps prior to 2.39 has a bug that ignores
 | |
|         # step_states argument.
 | |
|         steps = conn.list_steps(cluster_id).steps
 | |
|         step_id = steps[0].id
 | |
|         steps = conn.list_steps(cluster_id, step_states=["STARTING"]).steps
 | |
|         steps.should.have.length_of(1)
 | |
|         steps[0].id.should.equal(step_id)
 | |
| 
 | |
|     test_list_steps_with_states()
 | |
| 
 | |
| 
 | |
| # Has boto3 equivalent
 | |
| @mock_emr_deprecated
 | |
| def test_tags():
 | |
|     input_tags = {"tag1": "val1", "tag2": "val2"}
 | |
| 
 | |
|     conn = boto.connect_emr()
 | |
|     cluster_id = conn.run_jobflow(**run_jobflow_args)
 | |
| 
 | |
|     conn.add_tags(cluster_id, input_tags)
 | |
|     cluster = conn.describe_cluster(cluster_id)
 | |
|     cluster.tags.should.have.length_of(2)
 | |
|     dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)
 | |
| 
 | |
|     conn.remove_tags(cluster_id, list(input_tags.keys()))
 | |
|     cluster = conn.describe_cluster(cluster_id)
 | |
|     cluster.tags.should.have.length_of(0)
 |