moto/moto/emr/utils.py

import copy
import datetime
import random
import re
import string
from moto.core.utils import (
    camelcase_to_underscores,
    iso_8601_datetime_with_milliseconds,
)


def random_id(size=13):
    chars = list(range(10)) + list(string.ascii_uppercase)
    return "".join(str(random.choice(chars)) for x in range(size))


def random_cluster_id(size=13):
    return "j-{0}".format(random_id())


def random_step_id(size=13):
    return "s-{0}".format(random_id())


def random_instance_group_id(size=13):
    return "i-{0}".format(random_id())


def steps_from_query_string(querystring_dict):
    steps = []
    for step in querystring_dict:
        step["jar"] = step.pop("hadoop_jar_step._jar")
        step["properties"] = dict(
            (o["Key"], o["Value"]) for o in step.get("properties", [])
        )
        step["args"] = []
        idx = 1
        keyfmt = "hadoop_jar_step._args.member.{0}"
        while keyfmt.format(idx) in step:
            step["args"].append(step.pop(keyfmt.format(idx)))
            idx += 1
        steps.append(step)
    return steps


class Unflattener:
    @staticmethod
    def unflatten_complex_params(input_dict, param_name):
        """Function to unflatten (portions of) dicts with complex keys.  The moto request parser flattens the incoming
        request bodies, which is generally helpful, but for nested dicts/lists can result in a hard-to-manage
        parameter exposion.  This function allows one to selectively unflatten a set of dict keys, replacing them
        with a deep dist/list structure named identically to the root component in the complex name.

        Complex keys are composed of multiple components
        separated by periods. Components may be prefixed with _, which is stripped.  Lists indexes are represented
        with two components, 'member' and the index number."""
        items_to_process = {}
        for k in input_dict.keys():
            if k.startswith(param_name):
                items_to_process[k] = input_dict[k]
        if len(items_to_process) == 0:
            return

        for k in items_to_process.keys():
            del input_dict[k]

        for k in items_to_process.keys():
            Unflattener._set_deep(k, input_dict, items_to_process[k])

    @staticmethod
    def _set_deep(complex_key, container, value):
        keys = complex_key.split(".")
        keys.reverse()

        while len(keys) > 0:
            if len(keys) == 1:
                key = keys.pop().strip("_")
                Unflattener._add_to_container(container, key, value)
            else:
                key = keys.pop().strip("_")
                if keys[-1] == "member":
                    keys.pop()
                    if not Unflattener._key_in_container(container, key):
                        container = Unflattener._add_to_container(container, key, [])
                    else:
                        container = Unflattener._get_child(container, key)
                else:
                    if not Unflattener._key_in_container(container, key):
                        container = Unflattener._add_to_container(container, key, {})
                    else:
                        container = Unflattener._get_child(container, key)

    @staticmethod
    def _add_to_container(container, key, value):
        if type(container) is dict:
            container[key] = value
        elif type(container) is list:
            i = int(key)
            while len(container) < i:
                container.append(None)
            container[i - 1] = value
        return value

    @staticmethod
    def _get_child(container, key):
        if type(container) is dict:
            return container[key]
        elif type(container) is list:
            i = int(key)
            return container[i - 1]

    @staticmethod
    def _key_in_container(container, key):
        if type(container) is dict:
            return key in container
        elif type(container) is list:
            i = int(key)
            return len(container) >= i


class CamelToUnderscoresWalker:
    """A class to convert the keys in dict/list hierarchical data structures from CamelCase to snake_case (underscores)"""

    @staticmethod
    def parse(x):
        if isinstance(x, dict):
            return CamelToUnderscoresWalker.parse_dict(x)
        elif isinstance(x, list):
            return CamelToUnderscoresWalker.parse_list(x)
        else:
            return CamelToUnderscoresWalker.parse_scalar(x)

    @staticmethod
    def parse_dict(x):
        temp = {}
        for key in x.keys():
            temp[camelcase_to_underscores(key)] = CamelToUnderscoresWalker.parse(x[key])
        return temp

    @staticmethod
    def parse_list(x):
        temp = []
        for i in x:
            temp.append(CamelToUnderscoresWalker.parse(i))
        return temp

    @staticmethod
    def parse_scalar(x):
        return x


class ReleaseLabel(object):

    version_re = re.compile(r"^emr-(\d+)\.(\d+)\.(\d+)$")

    def __init__(self, release_label):
        major, minor, patch = self.parse(release_label)

        self.major = major
        self.minor = minor
        self.patch = patch

    @classmethod
    def parse(cls, release_label):
        if not release_label:
            raise ValueError("Invalid empty ReleaseLabel: %r" % release_label)

        match = cls.version_re.match(release_label)
        if not match:
            raise ValueError("Invalid ReleaseLabel: %r" % release_label)

        major, minor, patch = match.groups()

        major = int(major)
        minor = int(minor)
        patch = int(patch)

        return major, minor, patch

    def __str__(self):
        version = "emr-%d.%d.%d" % (self.major, self.minor, self.patch)
        return version

    def __repr__(self):
        return "%s(%r)" % (self.__class__.__name__, str(self))

    def __iter__(self):
        return iter((self.major, self.minor, self.patch))

    def __eq__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return (
            self.major == other.major
            and self.minor == other.minor
            and self.patch == other.patch
        )

    def __ne__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return tuple(self) != tuple(other)

    def __lt__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return tuple(self) < tuple(other)

    def __le__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return tuple(self) <= tuple(other)

    def __gt__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return tuple(self) > tuple(other)

    def __ge__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return tuple(self) >= tuple(other)


class EmrManagedSecurityGroup(object):
    class Kind:
        MASTER = "Master"
        SLAVE = "Slave"
        SERVICE = "Service"

    kind = None

    group_name = ""
    short_name = ""
    desc_fmt = "{short_name} for Elastic MapReduce created on {created}"

    @classmethod
    def description(cls):
        created = iso_8601_datetime_with_milliseconds(datetime.datetime.now())
        return cls.desc_fmt.format(short_name=cls.short_name, created=created)


class EmrManagedMasterSecurityGroup(EmrManagedSecurityGroup):
    kind = EmrManagedSecurityGroup.Kind.MASTER
    group_name = "ElasticMapReduce-Master-Private"
    short_name = "Master"


class EmrManagedSlaveSecurityGroup(EmrManagedSecurityGroup):
    kind = EmrManagedSecurityGroup.Kind.SLAVE
    group_name = "ElasticMapReduce-Slave-Private"
    short_name = "Slave"


class EmrManagedServiceAccessSecurityGroup(EmrManagedSecurityGroup):
    kind = EmrManagedSecurityGroup.Kind.SERVICE
    group_name = "ElasticMapReduce-ServiceAccess"
    short_name = "Service access"


class EmrSecurityGroupManager(object):

    MANAGED_RULES_EGRESS = [
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.MASTER,
            "from_port": None,
            "ip_protocol": "-1",
            "ip_ranges": [{"CidrIp": "0.0.0.0/0"}],
            "to_port": None,
            "source_groups": [],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SLAVE,
            "from_port": None,
            "ip_protocol": "-1",
            "ip_ranges": [{"CidrIp": "0.0.0.0/0"}],
            "to_port": None,
            "source_groups": [],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SERVICE,
            "from_port": 8443,
            "ip_protocol": "tcp",
            "ip_ranges": [],
            "to_port": 8443,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
    ]

    MANAGED_RULES_INGRESS = [
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.MASTER,
            "from_port": 0,
            "ip_protocol": "tcp",
            "ip_ranges": [],
            "to_port": 65535,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.MASTER,
            "from_port": 8443,
            "ip_protocol": "tcp",
            "ip_ranges": [],
            "to_port": 8443,
            "source_groups": [{"GroupId": EmrManagedSecurityGroup.Kind.SERVICE}],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.MASTER,
            "from_port": 0,
            "ip_protocol": "udp",
            "ip_ranges": [],
            "to_port": 65535,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.MASTER,
            "from_port": -1,
            "ip_protocol": "icmp",
            "ip_ranges": [],
            "to_port": -1,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SLAVE,
            "from_port": 0,
            "ip_protocol": "tcp",
            "ip_ranges": [],
            "to_port": 65535,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SLAVE,
            "from_port": 8443,
            "ip_protocol": "tcp",
            "ip_ranges": [],
            "to_port": 8443,
            "source_groups": [{"GroupId": EmrManagedSecurityGroup.Kind.SERVICE}],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SLAVE,
            "from_port": 0,
            "ip_protocol": "udp",
            "ip_ranges": [],
            "to_port": 65535,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SLAVE,
            "from_port": -1,
            "ip_protocol": "icmp",
            "ip_ranges": [],
            "to_port": -1,
            "source_groups": [
                {"GroupId": EmrManagedSecurityGroup.Kind.MASTER},
                {"GroupId": EmrManagedSecurityGroup.Kind.SLAVE},
            ],
        },
        {
            "group_name_or_id": EmrManagedSecurityGroup.Kind.SERVICE,
            "from_port": 9443,
            "ip_protocol": "tcp",
            "ip_ranges": [],
            "to_port": 9443,
            "source_groups": [{"GroupId": EmrManagedSecurityGroup.Kind.MASTER}],
        },
    ]

    def __init__(self, ec2_backend, vpc_id):
        self.ec2 = ec2_backend
        self.vpc_id = vpc_id

    def manage_security_groups(
        self, master_security_group, slave_security_group, service_access_security_group
    ):
        group_metadata = [
            (
                master_security_group,
                EmrManagedSecurityGroup.Kind.MASTER,
                EmrManagedMasterSecurityGroup,
            ),
            (
                slave_security_group,
                EmrManagedSecurityGroup.Kind.SLAVE,
                EmrManagedSlaveSecurityGroup,
            ),
            (
                service_access_security_group,
                EmrManagedSecurityGroup.Kind.SERVICE,
                EmrManagedServiceAccessSecurityGroup,
            ),
        ]
        managed_groups = {}
        for name, kind, defaults in group_metadata:
            managed_groups[kind] = self._get_or_create_sg(name, defaults)
        self._add_rules_to(managed_groups)
        return (
            managed_groups[EmrManagedSecurityGroup.Kind.MASTER],
            managed_groups[EmrManagedSecurityGroup.Kind.SLAVE],
            managed_groups[EmrManagedSecurityGroup.Kind.SERVICE],
        )

    def _get_or_create_sg(self, sg_id, defaults):
        find_sg = self.ec2.get_security_group_by_name_or_id
        create_sg = self.ec2.create_security_group
        group_id_or_name = sg_id or defaults.group_name
        group = find_sg(group_id_or_name, self.vpc_id)
        if group is None:
            if group_id_or_name != defaults.group_name:
                raise ValueError(
                    "The security group '{}' does not exist".format(group_id_or_name)
                )
            group = create_sg(defaults.group_name, defaults.description(), self.vpc_id)
        return group

    def _add_rules_to(self, managed_groups):
        rules_metadata = [
            (self.MANAGED_RULES_EGRESS, self.ec2.authorize_security_group_egress),
            (self.MANAGED_RULES_INGRESS, self.ec2.authorize_security_group_ingress),
        ]
        for rules, add_rule in rules_metadata:
            rendered_rules = self._render_rules(rules, managed_groups)
            for rule in rendered_rules:
                from moto.ec2.exceptions import InvalidPermissionDuplicateError

                try:
                    add_rule(vpc_id=self.vpc_id, **rule)
                except InvalidPermissionDuplicateError:
                    # If the rule already exists, we can just move on.
                    pass

    @staticmethod
    def _render_rules(rules, managed_groups):
        rendered_rules = copy.deepcopy(rules)
        for rule in rendered_rules:
            rule["group_name_or_id"] = managed_groups[rule["group_name_or_id"]].id
            rule["source_groups"] = [
                {"GroupId": managed_groups[group.get("GroupId")].id}
                for group in rule["source_groups"]
            ]
        return rendered_rules