| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | import time | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | from copy import deepcopy | 
					
						
							| 
									
										
										
										
											2022-12-10 00:56:08 +01:00
										 |  |  | from datetime import datetime, timezone | 
					
						
							| 
									
										
										
										
											2015-11-15 17:24:36 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | import boto3 | 
					
						
							| 
									
										
										
										
											2020-11-11 15:54:01 +00:00
										 |  |  | import json | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | from botocore.exceptions import ClientError | 
					
						
							| 
									
										
										
										
											2020-10-06 07:54:49 +02:00
										 |  |  | import pytest | 
					
						
							| 
									
										
										
										
											2015-11-15 17:24:36 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | from moto import mock_emr | 
					
						
							| 
									
										
										
										
											2022-08-13 09:49:43 +00:00
										 |  |  | from moto.core import DEFAULT_ACCOUNT_ID as ACCOUNT_ID | 
					
						
							| 
									
										
										
										
											2015-11-15 17:24:36 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | run_job_flow_args = dict( | 
					
						
							|  |  |  |     Instances={ | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         "InstanceCount": 3, | 
					
						
							|  |  |  |         "KeepJobFlowAliveWhenNoSteps": True, | 
					
						
							|  |  |  |         "MasterInstanceType": "c3.medium", | 
					
						
							|  |  |  |         "Placement": {"AvailabilityZone": "us-east-1a"}, | 
					
						
							|  |  |  |         "SlaveInstanceType": "c3.xlarge", | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     JobFlowRole="EMR_EC2_DefaultRole", | 
					
						
							|  |  |  |     LogUri="s3://mybucket/log", | 
					
						
							|  |  |  |     Name="cluster", | 
					
						
							|  |  |  |     ServiceRole="EMR_DefaultRole", | 
					
						
							|  |  |  |     VisibleToAllUsers=True, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | input_instance_groups = [ | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     { | 
					
						
							|  |  |  |         "InstanceCount": 1, | 
					
						
							|  |  |  |         "InstanceRole": "MASTER", | 
					
						
							|  |  |  |         "InstanceType": "c1.medium", | 
					
						
							|  |  |  |         "Market": "ON_DEMAND", | 
					
						
							|  |  |  |         "Name": "master", | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         "InstanceCount": 3, | 
					
						
							|  |  |  |         "InstanceRole": "CORE", | 
					
						
							|  |  |  |         "InstanceType": "c1.medium", | 
					
						
							|  |  |  |         "Market": "ON_DEMAND", | 
					
						
							|  |  |  |         "Name": "core", | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         "InstanceCount": 6, | 
					
						
							|  |  |  |         "InstanceRole": "TASK", | 
					
						
							| 
									
										
										
										
											2022-05-18 19:51:51 +02:00
										 |  |  |         "InstanceType": "c3.large", | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         "Market": "SPOT", | 
					
						
							|  |  |  |         "Name": "task-1", | 
					
						
							|  |  |  |         "BidPrice": "0.07", | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         "InstanceCount": 10, | 
					
						
							|  |  |  |         "InstanceRole": "TASK", | 
					
						
							|  |  |  |         "InstanceType": "c1.xlarge", | 
					
						
							|  |  |  |         "Market": "SPOT", | 
					
						
							|  |  |  |         "Name": "task-2", | 
					
						
							|  |  |  |         "BidPrice": "0.05", | 
					
						
							| 
									
										
										
										
											2020-03-05 18:11:49 -03:00
										 |  |  |         "EbsConfiguration": { | 
					
						
							|  |  |  |             "EbsBlockDeviceConfigs": [ | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "VolumeSpecification": {"VolumeType": "gp2", "SizeInGB": 800}, | 
					
						
							|  |  |  |                     "VolumesPerInstance": 6, | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             "EbsOptimized": True, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-15 17:24:36 +09:00
										 |  |  | @mock_emr | 
					
						
							| 
									
										
										
										
											2022-08-17 14:01:22 +00:00
										 |  |  | @pytest.mark.filterwarnings("ignore") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | def test_describe_cluster(): | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     region_name = "us-east-1" | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name=region_name) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["Applications"] = [{"Name": "Spark", "Version": "2.4.2"}] | 
					
						
							|  |  |  |     args["Configurations"] = [ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "Classification": "yarn-site", | 
					
						
							|  |  |  |             "Properties": { | 
					
						
							|  |  |  |                 "someproperty": "somevalue", | 
					
						
							|  |  |  |                 "someotherproperty": "someothervalue", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "Classification": "nested-configs", | 
					
						
							|  |  |  |             "Properties": {}, | 
					
						
							|  |  |  |             "Configurations": [ | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "Classification": "nested-config", | 
					
						
							|  |  |  |                     "Properties": {"nested-property": "nested-value"}, | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2017-06-27 11:31:43 -07:00
										 |  |  |     ] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["Instances"]["AdditionalMasterSecurityGroups"] = ["additional-master"] | 
					
						
							|  |  |  |     args["Instances"]["AdditionalSlaveSecurityGroups"] = ["additional-slave"] | 
					
						
							|  |  |  |     args["Instances"]["Ec2KeyName"] = "mykey" | 
					
						
							|  |  |  |     args["Instances"]["Ec2SubnetId"] = "subnet-8be41cec" | 
					
						
							|  |  |  |     args["Instances"]["EmrManagedMasterSecurityGroup"] = "master-security-group" | 
					
						
							|  |  |  |     args["Instances"]["EmrManagedSlaveSecurityGroup"] = "slave-security-group" | 
					
						
							|  |  |  |     args["Instances"]["KeepJobFlowAliveWhenNoSteps"] = False | 
					
						
							|  |  |  |     args["Instances"]["ServiceAccessSecurityGroup"] = "service-access-security-group" | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  |     args["KerberosAttributes"] = { | 
					
						
							|  |  |  |         "Realm": "MY-REALM.COM", | 
					
						
							|  |  |  |         "KdcAdminPassword": "SuperSecretPassword2", | 
					
						
							|  |  |  |         "CrossRealmTrustPrincipalPassword": "SuperSecretPassword3", | 
					
						
							|  |  |  |         "ADDomainJoinUser": "Bob", | 
					
						
							|  |  |  |         "ADDomainJoinPassword": "SuperSecretPassword4", | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["Tags"] = [{"Key": "tag1", "Value": "val1"}, {"Key": "tag2", "Value": "val2"}] | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  |     args["SecurityConfiguration"] = "my-security-configuration" | 
					
						
							| 
									
										
										
										
											2021-11-24 23:07:30 +05:30
										 |  |  |     args["AutoScalingRole"] = "EMR_AutoScaling_DefaultRole" | 
					
						
							| 
									
										
										
										
											2022-07-02 17:03:54 -07:00
										 |  |  |     args["AutoTerminationPolicy"] = {"IdleTimeout": 123} | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cl = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert cl["Applications"][0]["Name"] == "Spark" | 
					
						
							|  |  |  |     assert cl["Applications"][0]["Version"] == "2.4.2" | 
					
						
							|  |  |  |     assert cl["AutoTerminate"] is True | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     config = cl["Configurations"][0] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert config["Classification"] == "yarn-site" | 
					
						
							|  |  |  |     assert config["Properties"] == args["Configurations"][0]["Properties"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     nested_config = cl["Configurations"][1] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert nested_config["Classification"] == "nested-configs" | 
					
						
							|  |  |  |     assert nested_config["Properties"] == args["Configurations"][1]["Properties"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     attrs = cl["Ec2InstanceAttributes"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ( | 
					
						
							|  |  |  |         attrs["AdditionalMasterSecurityGroups"] | 
					
						
							|  |  |  |         == args["Instances"]["AdditionalMasterSecurityGroups"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ( | 
					
						
							|  |  |  |         attrs["AdditionalSlaveSecurityGroups"] | 
					
						
							|  |  |  |         == args["Instances"]["AdditionalSlaveSecurityGroups"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert attrs["Ec2AvailabilityZone"] == "us-east-1a" | 
					
						
							|  |  |  |     assert attrs["Ec2KeyName"] == args["Instances"]["Ec2KeyName"] | 
					
						
							|  |  |  |     assert attrs["Ec2SubnetId"] == args["Instances"]["Ec2SubnetId"] | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         attrs["EmrManagedMasterSecurityGroup"] | 
					
						
							|  |  |  |         == args["Instances"]["EmrManagedMasterSecurityGroup"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ( | 
					
						
							|  |  |  |         attrs["EmrManagedSlaveSecurityGroup"] | 
					
						
							|  |  |  |         == args["Instances"]["EmrManagedSlaveSecurityGroup"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert attrs["IamInstanceProfile"] == args["JobFlowRole"] | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         attrs["ServiceAccessSecurityGroup"] | 
					
						
							|  |  |  |         == args["Instances"]["ServiceAccessSecurityGroup"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert cl["Id"] == cluster_id | 
					
						
							|  |  |  |     assert cl["KerberosAttributes"] == args["KerberosAttributes"] | 
					
						
							|  |  |  |     assert cl["LogUri"] == args["LogUri"] | 
					
						
							|  |  |  |     assert isinstance(cl["MasterPublicDnsName"], str) | 
					
						
							|  |  |  |     assert cl["Name"] == args["Name"] | 
					
						
							|  |  |  |     assert cl["NormalizedInstanceHours"] == 0 | 
					
						
							|  |  |  |     # assert cl['ReleaseLabel'] == 'emr-5.0.0' | 
					
						
							|  |  |  |     assert "RequestedAmiVersion" not in cl | 
					
						
							|  |  |  |     assert cl["RunningAmiVersion"] == "1.0.0" | 
					
						
							|  |  |  |     assert isinstance(cl["SecurityConfiguration"], str) | 
					
						
							|  |  |  |     assert cl["SecurityConfiguration"] == args["SecurityConfiguration"] | 
					
						
							|  |  |  |     assert cl["ServiceRole"] == args["ServiceRole"] | 
					
						
							|  |  |  |     assert cl["AutoScalingRole"] == args["AutoScalingRole"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     status = cl["Status"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert status["State"] == "TERMINATED" | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     # cluster['Status']['StateChangeReason'] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert isinstance(status["Timeline"]["CreationDateTime"], datetime) | 
					
						
							|  |  |  |     # assert status['Timeline']['EndDateTime'] == datetime(2014, 1, 24, 2, 19, 46, tzinfo=timezone.utc) | 
					
						
							|  |  |  |     assert isinstance(status["Timeline"]["ReadyDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert {t["Key"]: t["Value"] for t in cl["Tags"]} == { | 
					
						
							|  |  |  |         t["Key"]: t["Value"] for t in args["Tags"] | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert cl["TerminationProtected"] is False | 
					
						
							|  |  |  |     assert cl["VisibleToAllUsers"] is True | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         cl["ClusterArn"] | 
					
						
							|  |  |  |         == f"arn:aws:elasticmapreduce:{region_name}:{ACCOUNT_ID}:cluster/{cluster_id}" | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-10 21:58:42 -04:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_describe_cluster_not_found(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     conn = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2021-11-17 17:19:03 +01:00
										 |  |  |     with pytest.raises(ClientError) as e: | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |         conn.describe_cluster(ClusterId="DummyId") | 
					
						
							| 
									
										
										
										
											2021-11-17 17:19:03 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     assert e.value.response["Error"]["Code"] == "ResourceNotFoundException" | 
					
						
							| 
									
										
										
										
											2017-05-10 21:58:42 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_describe_job_flows(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							|  |  |  |     expected = {} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-23 19:43:48 -05:00
										 |  |  |     for idx in range(4): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         cluster_name = "cluster" + str(idx) | 
					
						
							|  |  |  |         args["Name"] = cluster_name | 
					
						
							|  |  |  |         cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         expected[cluster_id] = { | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             "Id": cluster_id, | 
					
						
							|  |  |  |             "Name": cluster_name, | 
					
						
							|  |  |  |             "State": "WAITING", | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # need sleep since it appears the timestamp is always rounded to | 
					
						
							|  |  |  |     # the nearest second internally | 
					
						
							|  |  |  |     time.sleep(1) | 
					
						
							| 
									
										
										
										
											2022-12-10 00:56:08 +01:00
										 |  |  |     timestamp = datetime.now(timezone.utc) | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |     time.sleep(1) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-23 19:43:48 -05:00
										 |  |  |     for idx in range(4, 6): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         cluster_name = "cluster" + str(idx) | 
					
						
							|  |  |  |         args["Name"] = cluster_name | 
					
						
							|  |  |  |         cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         client.terminate_job_flows(JobFlowIds=[cluster_id]) | 
					
						
							|  |  |  |         expected[cluster_id] = { | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             "Id": cluster_id, | 
					
						
							|  |  |  |             "Name": cluster_name, | 
					
						
							|  |  |  |             "State": "TERMINATED", | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.describe_job_flows() | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["JobFlows"]) == 6 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     for cluster_id in expected: | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         resp = client.describe_job_flows(JobFlowIds=[cluster_id]) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert len(resp["JobFlows"]) == 1 | 
					
						
							|  |  |  |         assert resp["JobFlows"][0]["JobFlowId"] == cluster_id | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     resp = client.describe_job_flows(JobFlowStates=["WAITING"]) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["JobFlows"]) == 4 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for x in resp["JobFlows"]: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["ExecutionStatusDetail"]["State"] == "WAITING" | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.describe_job_flows(CreatedBefore=timestamp) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["JobFlows"]) == 4 | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.describe_job_flows(CreatedAfter=timestamp) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["JobFlows"]) == 2 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							| 
									
										
										
										
											2022-08-17 14:01:22 +00:00
										 |  |  | @pytest.mark.filterwarnings("ignore") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | def test_describe_job_flow(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["AmiVersion"] = "3.8.1" | 
					
						
							|  |  |  |     args["Instances"].update( | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "Ec2KeyName": "ec2keyname", | 
					
						
							|  |  |  |             "Ec2SubnetId": "subnet-8be41cec", | 
					
						
							|  |  |  |             "HadoopVersion": "2.4.0", | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     args["VisibleToAllUsers"] = True | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert jf["AmiVersion"] == args["AmiVersion"] | 
					
						
							|  |  |  |     assert "BootstrapActions" not in jf | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     esd = jf["ExecutionStatusDetail"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert isinstance(esd["CreationDateTime"], datetime) | 
					
						
							|  |  |  |     # assert isinstance(esd['EndDateTime'], 'datetime.datetime') | 
					
						
							|  |  |  |     # assert isinstance(esd['LastStateChangeReason'], str) | 
					
						
							|  |  |  |     assert isinstance(esd["ReadyDateTime"], datetime) | 
					
						
							|  |  |  |     assert isinstance(esd["StartDateTime"], datetime) | 
					
						
							|  |  |  |     assert esd["State"] == "WAITING" | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     attrs = jf["Instances"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert attrs["Ec2KeyName"] == args["Instances"]["Ec2KeyName"] | 
					
						
							|  |  |  |     assert attrs["Ec2SubnetId"] == args["Instances"]["Ec2SubnetId"] | 
					
						
							|  |  |  |     assert attrs["HadoopVersion"] == args["Instances"]["HadoopVersion"] | 
					
						
							|  |  |  |     assert attrs["InstanceCount"] == args["Instances"]["InstanceCount"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for ig in attrs["InstanceGroups"]: | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # ig['BidPrice'] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert isinstance(ig["CreationDateTime"], datetime) | 
					
						
							|  |  |  |         # assert isinstance(ig['EndDateTime'], 'datetime.datetime') | 
					
						
							|  |  |  |         assert isinstance(ig["InstanceGroupId"], str) | 
					
						
							|  |  |  |         assert isinstance(ig["InstanceRequestCount"], int) | 
					
						
							|  |  |  |         assert ig["InstanceRole"] in ["MASTER", "CORE"] | 
					
						
							|  |  |  |         assert isinstance(ig["InstanceRunningCount"], int) | 
					
						
							|  |  |  |         assert ig["InstanceType"] in ["c3.medium", "c3.xlarge"] | 
					
						
							|  |  |  |         # assert isinstance(ig['LastStateChangeReason'], str) | 
					
						
							|  |  |  |         assert ig["Market"] == "ON_DEMAND" | 
					
						
							|  |  |  |         assert isinstance(ig["Name"], str) | 
					
						
							|  |  |  |         assert isinstance(ig["ReadyDateTime"], datetime) | 
					
						
							|  |  |  |         assert isinstance(ig["StartDateTime"], datetime) | 
					
						
							|  |  |  |         assert ig["State"] == "RUNNING" | 
					
						
							|  |  |  |     assert attrs["KeepJobFlowAliveWhenNoSteps"] is True | 
					
						
							|  |  |  |     # assert isinstance(attrs['MasterInstanceId'], str) | 
					
						
							|  |  |  |     assert attrs["MasterInstanceType"] == args["Instances"]["MasterInstanceType"] | 
					
						
							|  |  |  |     assert isinstance(attrs["MasterPublicDnsName"], str) | 
					
						
							|  |  |  |     assert attrs["NormalizedInstanceHours"] == 0 | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         attrs["Placement"]["AvailabilityZone"] | 
					
						
							|  |  |  |         == args["Instances"]["Placement"]["AvailabilityZone"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert attrs["SlaveInstanceType"] == args["Instances"]["SlaveInstanceType"] | 
					
						
							|  |  |  |     assert attrs["TerminationProtected"] is False | 
					
						
							|  |  |  |     assert jf["JobFlowId"] == cluster_id | 
					
						
							|  |  |  |     assert jf["JobFlowRole"] == args["JobFlowRole"] | 
					
						
							|  |  |  |     assert jf["LogUri"] == args["LogUri"] | 
					
						
							|  |  |  |     assert jf["Name"] == args["Name"] | 
					
						
							|  |  |  |     assert jf["ServiceRole"] == args["ServiceRole"] | 
					
						
							|  |  |  |     assert jf["Steps"] == [] | 
					
						
							|  |  |  |     assert jf["SupportedProducts"] == [] | 
					
						
							|  |  |  |     assert jf["VisibleToAllUsers"] is True | 
					
						
							| 
									
										
										
										
											2015-11-15 17:24:36 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_list_clusters(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |     expected = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for idx in range(40): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         cluster_name = "jobflow" + str(idx) | 
					
						
							|  |  |  |         args["Name"] = cluster_name | 
					
						
							|  |  |  |         cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         expected[cluster_id] = { | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             "Id": cluster_id, | 
					
						
							|  |  |  |             "Name": cluster_name, | 
					
						
							|  |  |  |             "NormalizedInstanceHours": 0, | 
					
						
							|  |  |  |             "State": "WAITING", | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # need sleep since it appears the timestamp is always rounded to | 
					
						
							|  |  |  |     # the nearest second internally | 
					
						
							|  |  |  |     time.sleep(1) | 
					
						
							| 
									
										
										
										
											2022-12-10 00:56:08 +01:00
										 |  |  |     timestamp = datetime.now(timezone.utc) | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |     time.sleep(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for idx in range(40, 70): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         cluster_name = "jobflow" + str(idx) | 
					
						
							|  |  |  |         args["Name"] = cluster_name | 
					
						
							|  |  |  |         cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         client.terminate_job_flows(JobFlowIds=[cluster_id]) | 
					
						
							|  |  |  |         expected[cluster_id] = { | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             "Id": cluster_id, | 
					
						
							|  |  |  |             "Name": cluster_name, | 
					
						
							|  |  |  |             "NormalizedInstanceHours": 0, | 
					
						
							|  |  |  |             "State": "TERMINATED", | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     args = {} | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         resp = client.list_clusters(**args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         clusters = resp["Clusters"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert len(clusters) <= 50 | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         for x in clusters: | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             y = expected[x["Id"]] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["Id"] == y["Id"] | 
					
						
							|  |  |  |             assert x["Name"] == y["Name"] | 
					
						
							|  |  |  |             assert x["NormalizedInstanceHours"] == y["NormalizedInstanceHours"] | 
					
						
							|  |  |  |             assert x["Status"]["State"] == y["State"] | 
					
						
							|  |  |  |             assert isinstance(x["Status"]["Timeline"]["CreationDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             if y["State"] == "TERMINATED": | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |                 assert isinstance(x["Status"]["Timeline"]["EndDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |                 assert "EndDateTime" not in x["Status"]["Timeline"] | 
					
						
							|  |  |  |             assert isinstance(x["Status"]["Timeline"]["ReadyDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         marker = resp.get("Marker") | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  |         if marker is None: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args = {"Marker": marker} | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     resp = client.list_clusters(ClusterStates=["TERMINATED"]) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["Clusters"]) == 30 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for x in resp["Clusters"]: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["Status"]["State"] == "TERMINATED" | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.list_clusters(CreatedBefore=timestamp) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["Clusters"]) == 40 | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.list_clusters(CreatedAfter=timestamp) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["Clusters"]) == 30 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow(): | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     region_name = "us-east-1" | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name=region_name) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     resp = client.run_job_flow(**args) | 
					
						
							|  |  |  |     resp["ClusterArn"].startswith( | 
					
						
							| 
									
										
										
										
											2022-11-17 21:41:08 -01:00
										 |  |  |         f"arn:aws:elasticmapreduce:{region_name}:{ACCOUNT_ID}:cluster/" | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     ) | 
					
						
							|  |  |  |     job_flow_id = resp["JobFlowId"] | 
					
						
							|  |  |  |     resp = client.describe_job_flows(JobFlowIds=[job_flow_id])["JobFlows"][0] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["ExecutionStatusDetail"]["State"] == "WAITING" | 
					
						
							|  |  |  |     assert resp["JobFlowId"] == job_flow_id | 
					
						
							|  |  |  |     assert resp["Name"] == args["Name"] | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         resp["Instances"]["MasterInstanceType"] | 
					
						
							|  |  |  |         == args["Instances"]["MasterInstanceType"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ( | 
					
						
							|  |  |  |         resp["Instances"]["SlaveInstanceType"] == args["Instances"]["SlaveInstanceType"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["LogUri"] == args["LogUri"] | 
					
						
							|  |  |  |     assert resp["VisibleToAllUsers"] == args["VisibleToAllUsers"] | 
					
						
							|  |  |  |     assert resp["Instances"]["NormalizedInstanceHours"] == 0 | 
					
						
							|  |  |  |     assert resp["Steps"] == [] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_invalid_params(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2020-10-06 07:54:49 +02:00
										 |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # cannot set both AmiVersion and ReleaseLabel | 
					
						
							|  |  |  |         args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args["AmiVersion"] = "2.4" | 
					
						
							|  |  |  |         args["ReleaseLabel"] = "emr-5.0.0" | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ex.value.response["Error"]["Code"] == "ValidationException" | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_in_multiple_regions(): | 
					
						
							|  |  |  |     regions = {} | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for region in ["us-east-1", "eu-west-1"]: | 
					
						
							|  |  |  |         client = boto3.client("emr", region_name=region) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args["Name"] = region | 
					
						
							|  |  |  |         cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  |         regions[region] = {"client": client, "cluster_id": cluster_id} | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for region in regions.keys(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         client = regions[region]["client"] | 
					
						
							|  |  |  |         resp = client.describe_cluster(ClusterId=regions[region]["cluster_id"]) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert resp["Cluster"]["Name"] == region | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_new_params(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     resp = client.run_job_flow(**run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert "JobFlowId" in resp | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_visible_to_all_users(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     for expected in (True, False): | 
					
						
							|  |  |  |         args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args["VisibleToAllUsers"] = expected | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         resp = client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         cluster_id = resp["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert resp["Cluster"]["VisibleToAllUsers"] == expected | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-06 18:11:07 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-06 18:10:39 -03:00
										 |  |  | def _do_assertion_ebs_configuration(x, y): | 
					
						
							|  |  |  |     total_volumes = 0 | 
					
						
							|  |  |  |     total_size = 0 | 
					
						
							|  |  |  |     for ebs_block in y["EbsConfiguration"]["EbsBlockDeviceConfigs"]: | 
					
						
							|  |  |  |         total_volumes += ebs_block["VolumesPerInstance"] | 
					
						
							|  |  |  |         total_size += ebs_block["VolumeSpecification"]["SizeInGB"] | 
					
						
							|  |  |  |     # Multiply by total volumes | 
					
						
							|  |  |  |     total_size = total_size * total_volumes | 
					
						
							|  |  |  |     comp_total_size = 0 | 
					
						
							|  |  |  |     for ebs_block in x["EbsBlockDevices"]: | 
					
						
							|  |  |  |         comp_total_size += ebs_block["VolumeSpecification"]["SizeInGB"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(x["EbsBlockDevices"]) == total_volumes | 
					
						
							|  |  |  |     assert comp_total_size == comp_total_size | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-06 18:11:07 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_instance_groups(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     input_groups = dict((g["Name"], g) for g in input_instance_groups) | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["Instances"] = {"InstanceGroups": input_instance_groups} | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  |     groups = client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     for x in groups: | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         y = input_groups[x["Name"]] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert "Id" in x | 
					
						
							|  |  |  |         assert x["RequestedInstanceCount"] == y["InstanceCount"] | 
					
						
							|  |  |  |         assert x["InstanceGroupType"] == y["InstanceRole"] | 
					
						
							|  |  |  |         assert x["InstanceType"] == y["InstanceType"] | 
					
						
							|  |  |  |         assert x["Market"] == y["Market"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         if "BidPrice" in y: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["BidPrice"] == y["BidPrice"] | 
					
						
							| 
									
										
										
										
											2020-03-06 18:10:39 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-05 18:11:49 -03:00
										 |  |  |         if "EbsConfiguration" in y: | 
					
						
							| 
									
										
										
										
											2020-03-06 18:10:39 -03:00
										 |  |  |             _do_assertion_ebs_configuration(x, y) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  | auto_scaling_policy = { | 
					
						
							|  |  |  |     "Constraints": {"MinCapacity": 2, "MaxCapacity": 10}, | 
					
						
							|  |  |  |     "Rules": [ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "Name": "Default-scale-out", | 
					
						
							|  |  |  |             "Description": "Replicates the default scale-out rule in the console for YARN memory.", | 
					
						
							|  |  |  |             "Action": { | 
					
						
							|  |  |  |                 "SimpleScalingPolicyConfiguration": { | 
					
						
							|  |  |  |                     "AdjustmentType": "CHANGE_IN_CAPACITY", | 
					
						
							|  |  |  |                     "ScalingAdjustment": 1, | 
					
						
							|  |  |  |                     "CoolDown": 300, | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             "Trigger": { | 
					
						
							|  |  |  |                 "CloudWatchAlarmDefinition": { | 
					
						
							|  |  |  |                     "ComparisonOperator": "LESS_THAN", | 
					
						
							|  |  |  |                     "EvaluationPeriods": 1, | 
					
						
							|  |  |  |                     "MetricName": "YARNMemoryAvailablePercentage", | 
					
						
							|  |  |  |                     "Namespace": "AWS/ElasticMapReduce", | 
					
						
							|  |  |  |                     "Period": 300, | 
					
						
							|  |  |  |                     "Threshold": 15.0, | 
					
						
							|  |  |  |                     "Statistic": "AVERAGE", | 
					
						
							|  |  |  |                     "Unit": "PERCENT", | 
					
						
							|  |  |  |                     "Dimensions": [{"Key": "JobFlowId", "Value": "${emr.clusterId}"}], | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     ], | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_instance_groups_with_autoscaling(): | 
					
						
							|  |  |  |     input_groups = dict((g["Name"], g) for g in input_instance_groups) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     input_groups["core"]["AutoScalingPolicy"] = auto_scaling_policy | 
					
						
							|  |  |  |     input_groups["task-1"]["AutoScalingPolicy"] = auto_scaling_policy | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							|  |  |  |     args["Instances"] = {"InstanceGroups": input_instance_groups} | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  |     groups = client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							|  |  |  |     for x in groups: | 
					
						
							|  |  |  |         y = deepcopy(input_groups[x["Name"]]) | 
					
						
							|  |  |  |         if "AutoScalingPolicy" in y: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["AutoScalingPolicy"]["Status"]["State"] == "ATTACHED" | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |             returned_policy = deepcopy(x["AutoScalingPolicy"]) | 
					
						
							| 
									
										
										
										
											2022-03-10 13:39:59 -01:00
										 |  |  |             auto_scaling_policy_with_cluster_id = ( | 
					
						
							|  |  |  |                 _patch_cluster_id_placeholder_in_autoscaling_policy( | 
					
						
							|  |  |  |                     y["AutoScalingPolicy"], cluster_id | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |             ) | 
					
						
							|  |  |  |             del returned_policy["Status"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert returned_policy == auto_scaling_policy_with_cluster_id | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_put_remove_auto_scaling_policy(): | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     region_name = "us-east-1" | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name=region_name) | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							|  |  |  |     args["Instances"] = {"InstanceGroups": input_instance_groups} | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     core_instance_group = [ | 
					
						
							|  |  |  |         ig | 
					
						
							|  |  |  |         for ig in client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							|  |  |  |         if ig["InstanceGroupType"] == "CORE" | 
					
						
							|  |  |  |     ][0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     resp = client.put_auto_scaling_policy( | 
					
						
							|  |  |  |         ClusterId=cluster_id, | 
					
						
							|  |  |  |         InstanceGroupId=core_instance_group["Id"], | 
					
						
							|  |  |  |         AutoScalingPolicy=auto_scaling_policy, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-10 13:39:59 -01:00
										 |  |  |     auto_scaling_policy_with_cluster_id = ( | 
					
						
							|  |  |  |         _patch_cluster_id_placeholder_in_autoscaling_policy( | 
					
						
							|  |  |  |             auto_scaling_policy, cluster_id | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |     ) | 
					
						
							|  |  |  |     del resp["AutoScalingPolicy"]["Status"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["AutoScalingPolicy"] == auto_scaling_policy_with_cluster_id | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         resp["ClusterArn"] | 
					
						
							|  |  |  |         == f"arn:aws:elasticmapreduce:{region_name}:{ACCOUNT_ID}:cluster/{cluster_id}" | 
					
						
							| 
									
										
										
										
											2021-04-23 07:20:36 -07:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     core_instance_group = [ | 
					
						
							|  |  |  |         ig | 
					
						
							|  |  |  |         for ig in client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							|  |  |  |         if ig["InstanceGroupType"] == "CORE" | 
					
						
							|  |  |  |     ][0] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert "AutoScalingPolicy" in core_instance_group | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     client.remove_auto_scaling_policy( | 
					
						
							|  |  |  |         ClusterId=cluster_id, InstanceGroupId=core_instance_group["Id"] | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     core_instance_group = [ | 
					
						
							|  |  |  |         ig | 
					
						
							|  |  |  |         for ig in client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							|  |  |  |         if ig["InstanceGroupType"] == "CORE" | 
					
						
							|  |  |  |     ][0] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert "AutoScalingPolicy" not in core_instance_group | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-04 16:28:30 +00:00
										 |  |  | def _patch_cluster_id_placeholder_in_autoscaling_policy(policy, cluster_id): | 
					
						
							|  |  |  |     policy_copy = deepcopy(policy) | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |     for rule in policy_copy["Rules"]: | 
					
						
							|  |  |  |         for dimension in rule["Trigger"]["CloudWatchAlarmDefinition"]["Dimensions"]: | 
					
						
							|  |  |  |             dimension["Value"] = cluster_id | 
					
						
							|  |  |  |     return policy_copy | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_custom_ami(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-06 07:54:49 +02:00
										 |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |         # CustomAmiId available in Amazon EMR 5.7.0 and later | 
					
						
							|  |  |  |         args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args["CustomAmiId"] = "MyEmrCustomId" | 
					
						
							|  |  |  |         args["ReleaseLabel"] = "emr-5.6.0" | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |         client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ex.value.response["Error"]["Code"] == "ValidationException" | 
					
						
							|  |  |  |     assert ex.value.response["Error"]["Message"] == "Custom AMI is not allowed" | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-06 07:54:49 +02:00
										 |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |         args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args["CustomAmiId"] = "MyEmrCustomId" | 
					
						
							|  |  |  |         args["AmiVersion"] = "3.8.1" | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |         client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     err = ex.value.response["Error"] | 
					
						
							|  |  |  |     assert err["Code"] == "ValidationException" | 
					
						
							|  |  |  |     assert err["Message"] == "Custom AMI is not supported in this version of EMR" | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-06 07:54:49 +02:00
										 |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |         # AMI version and release label exception  raises before CustomAmi exception | 
					
						
							|  |  |  |         args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         args["CustomAmiId"] = "MyEmrCustomId" | 
					
						
							|  |  |  |         args["ReleaseLabel"] = "emr-5.6.0" | 
					
						
							|  |  |  |         args["AmiVersion"] = "3.8.1" | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |         client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     err = ex.value.response["Error"] | 
					
						
							|  |  |  |     assert err["Code"] == "ValidationException" | 
					
						
							|  |  |  |     assert "Only one AMI version and release label may be specified." in err["Message"] | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["CustomAmiId"] = "MyEmrCustomAmi" | 
					
						
							| 
									
										
										
										
											2020-11-22 10:54:59 -08:00
										 |  |  |     args["ReleaseLabel"] = "emr-5.31.0" | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  |     resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Cluster"]["CustomAmiId"] == "MyEmrCustomAmi" | 
					
						
							| 
									
										
										
										
											2019-05-25 05:19:26 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-02 09:07:13 -04:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_step_concurrency(): | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							|  |  |  |     args["StepConcurrencyLevel"] = 2 | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  |     resp = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Name"] == args["Name"] | 
					
						
							|  |  |  |     assert resp["Status"]["State"] == "WAITING" | 
					
						
							|  |  |  |     assert resp["StepConcurrencyLevel"] == 2 | 
					
						
							| 
									
										
										
										
											2020-10-02 09:07:13 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_modify_cluster(): | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							|  |  |  |     args["StepConcurrencyLevel"] = 2 | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  |     resp = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Name"] == args["Name"] | 
					
						
							|  |  |  |     assert resp["Status"]["State"] == "WAITING" | 
					
						
							|  |  |  |     assert resp["StepConcurrencyLevel"] == 2 | 
					
						
							| 
									
										
										
										
											2020-10-02 09:07:13 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.modify_cluster(ClusterId=cluster_id, StepConcurrencyLevel=4) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["StepConcurrencyLevel"] == 4 | 
					
						
							| 
									
										
										
										
											2020-10-02 09:07:13 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["StepConcurrencyLevel"] == 4 | 
					
						
							| 
									
										
										
										
											2020-10-02 09:07:13 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_set_termination_protection(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["Instances"]["TerminationProtected"] = False | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     resp = client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     cluster_id = resp["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Cluster"]["TerminationProtected"] is False | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for expected in (True, False): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         resp = client.set_termination_protection( | 
					
						
							|  |  |  |             JobFlowIds=[cluster_id], TerminationProtected=expected | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert resp["Cluster"]["TerminationProtected"] == expected | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-17 04:20:45 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_terminate_protected_job_flow_raises_error(): | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     resp = client.run_job_flow(**run_job_flow_args) | 
					
						
							|  |  |  |     cluster_id = resp["JobFlowId"] | 
					
						
							|  |  |  |     client.set_termination_protection( | 
					
						
							|  |  |  |         JobFlowIds=[cluster_id], TerminationProtected=True | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							| 
									
										
										
										
											2022-03-10 13:39:59 -01:00
										 |  |  |         client.terminate_job_flows(JobFlowIds=[cluster_id]) | 
					
						
							| 
									
										
										
										
											2021-06-17 04:20:45 -07:00
										 |  |  |     error = ex.value.response["Error"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert error["Code"] == "ValidationException" | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         error["Message"] | 
					
						
							|  |  |  |         == "Could not shut down one or more job flows since they are termination protected." | 
					
						
							| 
									
										
										
										
											2021-06-17 04:20:45 -07:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_set_visible_to_all_users(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["VisibleToAllUsers"] = False | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     resp = client.run_job_flow(**args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     cluster_id = resp["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Cluster"]["VisibleToAllUsers"] is False | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for expected in (True, False): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         resp = client.set_visible_to_all_users( | 
					
						
							|  |  |  |             JobFlowIds=[cluster_id], VisibleToAllUsers=expected | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert resp["Cluster"]["VisibleToAllUsers"] == expected | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_terminate_job_flows(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.run_job_flow(**run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     cluster_id = resp["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Cluster"]["Status"]["State"] == "WAITING" | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.terminate_job_flows(JobFlowIds=[cluster_id]) | 
					
						
							|  |  |  |     resp = client.describe_cluster(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Cluster"]["Status"]["State"] == "TERMINATED" | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # testing multiple end points for each feature | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_bootstrap_actions(): | 
					
						
							|  |  |  |     bootstrap_actions = [ | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         { | 
					
						
							|  |  |  |             "Name": "bs1", | 
					
						
							|  |  |  |             "ScriptBootstrapAction": { | 
					
						
							|  |  |  |                 "Args": ["arg1", "arg2"], | 
					
						
							|  |  |  |                 "Path": "s3://path/to/script", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "Name": "bs2", | 
					
						
							|  |  |  |             "ScriptBootstrapAction": {"Args": [], "Path": "s3://path/to/anotherscript"}, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["BootstrapActions"] = bootstrap_actions | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     cl = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							|  |  |  |     for x, y in zip(cl["BootstrapActions"], bootstrap_actions): | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["BootstrapActionConfig"] == y | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.list_bootstrap_actions(ClusterId=cluster_id) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for x, y in zip(resp["BootstrapActions"], bootstrap_actions): | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["Name"] == y["Name"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         if "Args" in y["ScriptBootstrapAction"]: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["Args"] == y["ScriptBootstrapAction"]["Args"] | 
					
						
							|  |  |  |         assert x["ScriptPath"] == y["ScriptBootstrapAction"]["Path"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-23 12:33:06 +05:30
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_instances(): | 
					
						
							|  |  |  |     input_groups = dict((g["Name"], g) for g in input_instance_groups) | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							|  |  |  |     args["Instances"] = {"InstanceGroups": input_instance_groups} | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							|  |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							|  |  |  |     instances = client.list_instances(ClusterId=cluster_id)["Instances"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(instances) == sum(g["InstanceCount"] for g in input_instance_groups) | 
					
						
							| 
									
										
										
										
											2021-04-23 12:33:06 +05:30
										 |  |  |     for x in instances: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert "InstanceGroupId" in x | 
					
						
							| 
									
										
										
										
											2021-04-23 12:33:06 +05:30
										 |  |  |         instance_group = [ | 
					
						
							|  |  |  |             j | 
					
						
							|  |  |  |             for j in jf["Instances"]["InstanceGroups"] | 
					
						
							|  |  |  |             if j["InstanceGroupId"] == x["InstanceGroupId"] | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert len(instance_group) == 1 | 
					
						
							| 
									
										
										
										
											2021-04-23 12:33:06 +05:30
										 |  |  |         y = input_groups[instance_group[0]["Name"]] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert "Id" in x | 
					
						
							|  |  |  |         assert "Ec2InstanceId" in x | 
					
						
							|  |  |  |         assert "PublicDnsName" in x | 
					
						
							|  |  |  |         assert "PublicIpAddress" in x | 
					
						
							|  |  |  |         assert "PrivateDnsName" in x | 
					
						
							|  |  |  |         assert "PrivateIpAddress" in x | 
					
						
							|  |  |  |         assert "InstanceFleetId" in x | 
					
						
							|  |  |  |         assert x["InstanceType"] == y["InstanceType"] | 
					
						
							|  |  |  |         assert x["Market"] == y["Market"] | 
					
						
							|  |  |  |         assert isinstance(x["Status"]["Timeline"]["ReadyDateTime"], datetime) | 
					
						
							|  |  |  |         assert isinstance(x["Status"]["Timeline"]["CreationDateTime"], datetime) | 
					
						
							|  |  |  |         assert x["Status"]["State"] == "RUNNING" | 
					
						
							| 
									
										
										
										
											2021-04-23 12:33:06 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |     for x in [["MASTER"], ["CORE"], ["TASK"], ["MASTER", "TASK"]]: | 
					
						
							|  |  |  |         instances = client.list_instances(ClusterId=cluster_id, InstanceGroupTypes=x)[ | 
					
						
							|  |  |  |             "Instances" | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert len(instances) == sum( | 
					
						
							|  |  |  |             g["InstanceCount"] for g in input_instance_groups if g["InstanceRole"] in x | 
					
						
							| 
									
										
										
										
											2021-04-23 12:33:06 +05:30
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | @mock_emr | 
					
						
							|  |  |  | def test_instance_groups(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     input_groups = dict((g["Name"], g) for g in input_instance_groups) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for key in ["MasterInstanceType", "SlaveInstanceType", "InstanceCount"]: | 
					
						
							|  |  |  |         del args["Instances"][key] | 
					
						
							|  |  |  |     args["Instances"]["InstanceGroups"] = input_instance_groups[:2] | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							|  |  |  |     base_instance_count = jf["Instances"]["InstanceCount"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |     instance_groups_to_add = deepcopy(input_instance_groups[2:]) | 
					
						
							|  |  |  |     instance_groups_to_add[0]["AutoScalingPolicy"] = auto_scaling_policy | 
					
						
							|  |  |  |     instance_groups_to_add[1]["AutoScalingPolicy"] = auto_scaling_policy | 
					
						
							| 
									
										
										
										
											2017-02-23 21:37:43 -05:00
										 |  |  |     client.add_instance_groups( | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |         JobFlowId=cluster_id, InstanceGroups=instance_groups_to_add | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert jf["Instances"]["InstanceCount"] == sum( | 
					
						
							|  |  |  |         g["InstanceCount"] for g in input_instance_groups | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ) | 
					
						
							|  |  |  |     for x in jf["Instances"]["InstanceGroups"]: | 
					
						
							|  |  |  |         y = input_groups[x["Name"]] | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |         if "BidPrice" in y: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["BidPrice"] == y["BidPrice"] | 
					
						
							|  |  |  |         assert isinstance(x["CreationDateTime"], datetime) | 
					
						
							|  |  |  |         # assert isinstance(x['EndDateTime'], 'datetime.datetime') | 
					
						
							|  |  |  |         assert "InstanceGroupId" in x | 
					
						
							|  |  |  |         assert x["InstanceRequestCount"] == y["InstanceCount"] | 
					
						
							|  |  |  |         assert x["InstanceRole"] == y["InstanceRole"] | 
					
						
							|  |  |  |         assert x["InstanceRunningCount"] == y["InstanceCount"] | 
					
						
							|  |  |  |         assert x["InstanceType"] == y["InstanceType"] | 
					
						
							|  |  |  |         # assert x['LastStateChangeReason'] == y['LastStateChangeReason'] | 
					
						
							|  |  |  |         assert x["Market"] == y["Market"] | 
					
						
							|  |  |  |         assert x["Name"] == y["Name"] | 
					
						
							|  |  |  |         assert isinstance(x["ReadyDateTime"], datetime) | 
					
						
							|  |  |  |         assert isinstance(x["StartDateTime"], datetime) | 
					
						
							|  |  |  |         assert x["State"] == "RUNNING" | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     groups = client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     for x in groups: | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |         y = deepcopy(input_groups[x["Name"]]) | 
					
						
							|  |  |  |         if "BidPrice" in y: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["BidPrice"] == y["BidPrice"] | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |         if "AutoScalingPolicy" in y: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert x["AutoScalingPolicy"]["Status"]["State"] == "ATTACHED" | 
					
						
							| 
									
										
										
										
											2020-09-23 06:21:45 -04:00
										 |  |  |             returned_policy = dict(x["AutoScalingPolicy"]) | 
					
						
							|  |  |  |             del returned_policy["Status"] | 
					
						
							| 
									
										
										
										
											2020-11-11 15:55:37 +00:00
										 |  |  |             policy = json.loads( | 
					
						
							|  |  |  |                 json.dumps(y["AutoScalingPolicy"]).replace( | 
					
						
							|  |  |  |                     "${emr.clusterId}", cluster_id | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert returned_policy == policy | 
					
						
							| 
									
										
										
										
											2020-03-05 18:11:49 -03:00
										 |  |  |         if "EbsConfiguration" in y: | 
					
						
							| 
									
										
										
										
											2020-03-06 18:10:39 -03:00
										 |  |  |             _do_assertion_ebs_configuration(x, y) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # Configurations | 
					
						
							|  |  |  |         # EbsBlockDevices | 
					
						
							|  |  |  |         # EbsOptimized | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert "Id" in x | 
					
						
							|  |  |  |         assert x["InstanceGroupType"] == y["InstanceRole"] | 
					
						
							|  |  |  |         assert x["InstanceType"] == y["InstanceType"] | 
					
						
							|  |  |  |         assert x["Market"] == y["Market"] | 
					
						
							|  |  |  |         assert x["Name"] == y["Name"] | 
					
						
							|  |  |  |         assert x["RequestedInstanceCount"] == y["InstanceCount"] | 
					
						
							|  |  |  |         assert x["RunningInstanceCount"] == y["InstanceCount"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # ShrinkPolicy | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["Status"]["State"] == "RUNNING" | 
					
						
							|  |  |  |         assert isinstance(x["Status"]["StateChangeReason"]["Code"], str) | 
					
						
							|  |  |  |         # assert isinstance(x['Status']['StateChangeReason']['Message'], str) | 
					
						
							|  |  |  |         assert isinstance(x["Status"]["Timeline"]["CreationDateTime"], datetime) | 
					
						
							|  |  |  |         # assert isinstance(x['Status']['Timeline']['EndDateTime'], 'datetime.datetime') | 
					
						
							|  |  |  |         assert isinstance(x["Status"]["Timeline"]["ReadyDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     igs = dict((g["Name"], g) for g in groups) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     client.modify_instance_groups( | 
					
						
							|  |  |  |         InstanceGroups=[ | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             {"InstanceGroupId": igs["task-1"]["Id"], "InstanceCount": 2}, | 
					
						
							|  |  |  |             {"InstanceGroupId": igs["task-2"]["Id"], "InstanceCount": 3}, | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert jf["Instances"]["InstanceCount"] == base_instance_count + 5 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     igs = dict((g["Name"], g) for g in jf["Instances"]["InstanceGroups"]) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert igs["task-1"]["InstanceRunningCount"] == 2 | 
					
						
							|  |  |  |     assert igs["task-2"]["InstanceRunningCount"] == 3 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_steps(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     input_steps = [ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "HadoopJarStep": { | 
					
						
							|  |  |  |                 "Args": [ | 
					
						
							|  |  |  |                     "hadoop-streaming", | 
					
						
							|  |  |  |                     "-files", | 
					
						
							|  |  |  |                     "s3://elasticmapreduce/samples/wordcount/wordSplitter.py#wordSplitter.py", | 
					
						
							|  |  |  |                     "-mapper", | 
					
						
							|  |  |  |                     "python wordSplitter.py", | 
					
						
							|  |  |  |                     "-input", | 
					
						
							|  |  |  |                     "s3://elasticmapreduce/samples/wordcount/input", | 
					
						
							|  |  |  |                     "-output", | 
					
						
							|  |  |  |                     "s3://output_bucket/output/wordcount_output", | 
					
						
							|  |  |  |                     "-reducer", | 
					
						
							|  |  |  |                     "aggregate", | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |                 "Jar": "command-runner.jar", | 
					
						
							| 
									
										
										
										
											2023-07-12 19:30:58 +02:00
										 |  |  |                 "Properties": [ | 
					
						
							|  |  |  |                     {"Key": "mapred.tasktracker.map.tasks.maximum", "Value": "2"} | 
					
						
							|  |  |  |                 ], | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             }, | 
					
						
							|  |  |  |             "Name": "My wordcount example", | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         { | 
					
						
							|  |  |  |             "HadoopJarStep": { | 
					
						
							|  |  |  |                 "Args": [ | 
					
						
							|  |  |  |                     "hadoop-streaming", | 
					
						
							|  |  |  |                     "-files", | 
					
						
							|  |  |  |                     "s3://elasticmapreduce/samples/wordcount/wordSplitter2.py#wordSplitter2.py", | 
					
						
							|  |  |  |                     "-mapper", | 
					
						
							|  |  |  |                     "python wordSplitter2.py", | 
					
						
							|  |  |  |                     "-input", | 
					
						
							|  |  |  |                     "s3://elasticmapreduce/samples/wordcount/input2", | 
					
						
							|  |  |  |                     "-output", | 
					
						
							|  |  |  |                     "s3://output_bucket/output/wordcount_output2", | 
					
						
							|  |  |  |                     "-reducer", | 
					
						
							|  |  |  |                     "aggregate", | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |                 "Jar": "command-runner.jar", | 
					
						
							| 
									
										
										
										
											2023-07-12 19:30:58 +02:00
										 |  |  |                 "Properties": [ | 
					
						
							|  |  |  |                     {"Key": "mapred.reduce.tasks", "Value": "0"}, | 
					
						
							|  |  |  |                     {"Key": "stream.map.output.field.separator", "Value": "."}, | 
					
						
							|  |  |  |                 ], | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             }, | 
					
						
							|  |  |  |             "Name": "My wordcount example2", | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     ] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # TODO: implementation and test for cancel_steps | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     args = deepcopy(run_job_flow_args) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     args["Steps"] = [input_steps[0]] | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(jf["Steps"]) == 1 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     client.add_job_flow_steps(JobFlowId=cluster_id, Steps=[input_steps[1]]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     jf = client.describe_job_flows(JobFlowIds=[cluster_id])["JobFlows"][0] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(jf["Steps"]) == 2 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     for idx, (x, y) in enumerate(zip(jf["Steps"], input_steps)): | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert "CreationDateTime" in x["ExecutionStatusDetail"] | 
					
						
							|  |  |  |         # assert 'EndDateTime' in x['ExecutionStatusDetail'] | 
					
						
							|  |  |  |         # assert 'LastStateChangeReason' in x['ExecutionStatusDetail'] | 
					
						
							|  |  |  |         # assert 'StartDateTime' in x['ExecutionStatusDetail'] | 
					
						
							|  |  |  |         assert ( | 
					
						
							|  |  |  |             x["ExecutionStatusDetail"]["State"] == "RUNNING" if idx == 0 else "PENDING" | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["StepConfig"]["ActionOnFailure"] == "TERMINATE_CLUSTER" | 
					
						
							|  |  |  |         assert x["StepConfig"]["HadoopJarStep"]["Args"] == y["HadoopJarStep"]["Args"] | 
					
						
							|  |  |  |         assert x["StepConfig"]["HadoopJarStep"]["Jar"] == y["HadoopJarStep"]["Jar"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         if "MainClass" in y["HadoopJarStep"]: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert ( | 
					
						
							|  |  |  |                 x["StepConfig"]["HadoopJarStep"]["MainClass"] | 
					
						
							|  |  |  |                 == y["HadoopJarStep"]["MainClass"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             ) | 
					
						
							|  |  |  |         if "Properties" in y["HadoopJarStep"]: | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert ( | 
					
						
							|  |  |  |                 x["StepConfig"]["HadoopJarStep"]["Properties"] | 
					
						
							|  |  |  |                 == y["HadoopJarStep"]["Properties"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["StepConfig"]["Name"] == y["Name"] | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     expected = dict((s["Name"], s) for s in input_steps) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     steps = client.list_steps(ClusterId=cluster_id)["Steps"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(steps) == 2 | 
					
						
							| 
									
										
										
										
											2022-03-19 22:08:18 -07:00
										 |  |  |     # Steps should be returned in reverse order. | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert ( | 
					
						
							|  |  |  |         sorted( | 
					
						
							|  |  |  |             steps, | 
					
						
							|  |  |  |             key=lambda o: o["Status"]["Timeline"]["CreationDateTime"], | 
					
						
							|  |  |  |             reverse=True, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         == steps | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |     for x in steps: | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         y = expected[x["Name"]] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["ActionOnFailure"] == "TERMINATE_CLUSTER" | 
					
						
							|  |  |  |         assert x["Config"]["Args"] == y["HadoopJarStep"]["Args"] | 
					
						
							|  |  |  |         assert x["Config"]["Jar"] == y["HadoopJarStep"]["Jar"] | 
					
						
							|  |  |  |         # assert x['Config']['MainClass'] == y['HadoopJarStep']['MainClass'] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # Properties | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert isinstance(x["Id"], str) | 
					
						
							|  |  |  |         assert x["Name"] == y["Name"] | 
					
						
							|  |  |  |         assert x["Status"]["State"] in ["RUNNING", "PENDING"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # StateChangeReason | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert isinstance(x["Status"]["Timeline"]["CreationDateTime"], datetime) | 
					
						
							|  |  |  |         # assert isinstance(x['Status']['Timeline']['EndDateTime'], 'datetime.datetime') | 
					
						
							| 
									
										
										
										
											2020-02-20 08:59:21 +00:00
										 |  |  |         # Only the first step will have started - we don't know anything about when it finishes, so the second step never starts | 
					
						
							|  |  |  |         if x["Name"] == "My wordcount example": | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |             assert isinstance(x["Status"]["Timeline"]["StartDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |         x = client.describe_step(ClusterId=cluster_id, StepId=x["Id"])["Step"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert x["ActionOnFailure"] == "TERMINATE_CLUSTER" | 
					
						
							|  |  |  |         assert x["Config"]["Args"] == y["HadoopJarStep"]["Args"] | 
					
						
							|  |  |  |         assert x["Config"]["Jar"] == y["HadoopJarStep"]["Jar"] | 
					
						
							|  |  |  |         # assert x['Config']['MainClass'] == y['HadoopJarStep']['MainClass'] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # Properties | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert isinstance(x["Id"], str) | 
					
						
							|  |  |  |         assert x["Name"] == y["Name"] | 
					
						
							|  |  |  |         assert x["Status"]["State"] in ["RUNNING", "PENDING"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  |         # StateChangeReason | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |         assert isinstance(x["Status"]["Timeline"]["CreationDateTime"], datetime) | 
					
						
							|  |  |  |         # assert isinstance(x['Status']['Timeline']['EndDateTime'], 'datetime.datetime') | 
					
						
							|  |  |  |         # assert isinstance(x['Status']['Timeline']['StartDateTime'], 'datetime.datetime') | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-19 22:08:18 -07:00
										 |  |  |     step_id = steps[-1]["Id"]  # Last step is first created step. | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     steps = client.list_steps(ClusterId=cluster_id, StepIds=[step_id])["Steps"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(steps) == 1 | 
					
						
							|  |  |  |     assert steps[0]["Id"] == step_id | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-21 16:37:57 +00:00
										 |  |  |     steps = client.list_steps(ClusterId=cluster_id, StepStates=["RUNNING"])["Steps"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(steps) == 1 | 
					
						
							|  |  |  |     assert steps[0]["Id"] == step_id | 
					
						
							| 
									
										
										
										
											2016-10-18 16:47:02 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_tags(): | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     input_tags = [ | 
					
						
							|  |  |  |         {"Key": "newkey1", "Value": "newval1"}, | 
					
						
							|  |  |  |         {"Key": "newkey2", "Value": "newval2"}, | 
					
						
							|  |  |  |     ] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**run_job_flow_args)["JobFlowId"] | 
					
						
							| 
									
										
										
										
											2016-09-21 20:59:19 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     client.add_tags(ResourceId=cluster_id, Tags=input_tags) | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  |     resp = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert len(resp["Tags"]) == 2 | 
					
						
							|  |  |  |     assert {t["Key"]: t["Value"] for t in resp["Tags"]} == { | 
					
						
							|  |  |  |         t["Key"]: t["Value"] for t in input_tags | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-10-31 08:44:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     client.remove_tags(ResourceId=cluster_id, TagKeys=[t["Key"] for t in input_tags]) | 
					
						
							|  |  |  |     resp = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Tags"] == [] | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_security_configurations(): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     security_configuration_name = "MySecurityConfiguration" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     security_configuration = """
 | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   "EncryptionConfiguration": { | 
					
						
							|  |  |  |     "AtRestEncryptionConfiguration": { | 
					
						
							|  |  |  |       "S3EncryptionConfiguration": { | 
					
						
							|  |  |  |         "EncryptionMode": "SSE-S3" | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "EnableInTransitEncryption": false, | 
					
						
							|  |  |  |     "EnableAtRestEncryption": true | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  |     """.strip()
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     resp = client.create_security_configuration( | 
					
						
							|  |  |  |         Name=security_configuration_name, SecurityConfiguration=security_configuration | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Name"] == security_configuration_name | 
					
						
							|  |  |  |     assert isinstance(resp["CreationDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     resp = client.describe_security_configuration(Name=security_configuration_name) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert resp["Name"] == security_configuration_name | 
					
						
							|  |  |  |     assert resp["SecurityConfiguration"] == security_configuration | 
					
						
							|  |  |  |     assert isinstance(resp["CreationDateTime"], datetime) | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     client.delete_security_configuration(Name=security_configuration_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							|  |  |  |         client.describe_security_configuration(Name=security_configuration_name) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     err = ex.value.response["Error"] | 
					
						
							|  |  |  |     assert err["Code"] == "InvalidRequestException" | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         err["Message"] | 
					
						
							|  |  |  |         == "Security configuration with name 'MySecurityConfiguration' does not exist." | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							|  |  |  |         client.delete_security_configuration(Name=security_configuration_name) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     err = ex.value.response["Error"] | 
					
						
							|  |  |  |     assert err["Code"] == "InvalidRequestException" | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         err["Message"] | 
					
						
							|  |  |  |         == "Security configuration with name 'MySecurityConfiguration' does not exist." | 
					
						
							| 
									
										
										
										
											2020-11-17 05:54:34 -05:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-06-17 22:04:21 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_invalid_number_of_master_nodes_raises_error(): | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     params = dict( | 
					
						
							|  |  |  |         Name="test-cluster", | 
					
						
							|  |  |  |         Instances={ | 
					
						
							|  |  |  |             "InstanceGroups": [ | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "InstanceCount": 2, | 
					
						
							|  |  |  |                     "InstanceRole": "MASTER", | 
					
						
							|  |  |  |                     "InstanceType": "c1.medium", | 
					
						
							|  |  |  |                     "Market": "ON_DEMAND", | 
					
						
							|  |  |  |                     "Name": "master", | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     with pytest.raises(ClientError) as ex: | 
					
						
							|  |  |  |         client.run_job_flow(**params) | 
					
						
							|  |  |  |     error = ex.value.response["Error"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert error["Code"] == "ValidationException" | 
					
						
							|  |  |  |     assert ( | 
					
						
							|  |  |  |         error["Message"] | 
					
						
							|  |  |  |         == "Master instance group must have exactly 3 instances for HA clusters." | 
					
						
							| 
									
										
										
										
											2021-06-17 22:04:21 -07:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @mock_emr | 
					
						
							|  |  |  | def test_run_job_flow_with_multiple_master_nodes(): | 
					
						
							|  |  |  |     client = boto3.client("emr", region_name="us-east-1") | 
					
						
							|  |  |  |     params = dict( | 
					
						
							|  |  |  |         Name="test-cluster", | 
					
						
							|  |  |  |         Instances={ | 
					
						
							|  |  |  |             "InstanceGroups": [ | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "InstanceCount": 3, | 
					
						
							|  |  |  |                     "InstanceRole": "MASTER", | 
					
						
							|  |  |  |                     "InstanceType": "c1.medium", | 
					
						
							|  |  |  |                     "Market": "ON_DEMAND", | 
					
						
							|  |  |  |                     "Name": "master", | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             "KeepJobFlowAliveWhenNoSteps": False, | 
					
						
							|  |  |  |             "TerminationProtected": False, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     cluster_id = client.run_job_flow(**params)["JobFlowId"] | 
					
						
							|  |  |  |     cluster = client.describe_cluster(ClusterId=cluster_id)["Cluster"] | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert cluster["AutoTerminate"] is False | 
					
						
							|  |  |  |     assert cluster["TerminationProtected"] is True | 
					
						
							| 
									
										
										
										
											2021-06-17 22:04:21 -07:00
										 |  |  |     groups = client.list_instance_groups(ClusterId=cluster_id)["InstanceGroups"] | 
					
						
							|  |  |  |     master_instance_group = next( | 
					
						
							|  |  |  |         group for group in groups if group["InstanceGroupType"] == "MASTER" | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-07-26 09:42:43 +00:00
										 |  |  |     assert master_instance_group["RequestedInstanceCount"] == 3 | 
					
						
							|  |  |  |     assert master_instance_group["RunningInstanceCount"] == 3 |