From 77f0a61c9f89202ae09bb7c5ed1e325139ffc516 Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Tue, 10 Jul 2018 13:50:47 -0400 Subject: [PATCH 1/6] Add scaffolding for Glue service, including create_database and get_database for the Glue Data Catalog --- moto/__init__.py | 1 + moto/glue/__init__.py | 5 +++++ moto/glue/exceptions.py | 9 ++++++++ moto/glue/models.py | 27 ++++++++++++++++++++++++ moto/glue/responses.py | 27 ++++++++++++++++++++++++ moto/glue/urls.py | 11 ++++++++++ moto/glue/utils.py | 1 + tests/test_glue/test_datacatalog.py | 30 +++++++++++++++++++++++++++ tests/test_s3/test_s3_storageclass.py | 3 --- tests/test_s3/test_s3_utils.py | 1 - 10 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 moto/glue/__init__.py create mode 100644 moto/glue/exceptions.py create mode 100644 moto/glue/models.py create mode 100644 moto/glue/responses.py create mode 100644 moto/glue/urls.py create mode 100644 moto/glue/utils.py create mode 100644 tests/test_glue/test_datacatalog.py diff --git a/moto/__init__.py b/moto/__init__.py index 0ce5e54d1..e5881cfca 100644 --- a/moto/__init__.py +++ b/moto/__init__.py @@ -24,6 +24,7 @@ from .elbv2 import mock_elbv2 # flake8: noqa from .emr import mock_emr, mock_emr_deprecated # flake8: noqa from .events import mock_events # flake8: noqa from .glacier import mock_glacier, mock_glacier_deprecated # flake8: noqa +from .glue import mock_glue # flake8: noqa from .iam import mock_iam, mock_iam_deprecated # flake8: noqa from .kinesis import mock_kinesis, mock_kinesis_deprecated # flake8: noqa from .kms import mock_kms, mock_kms_deprecated # flake8: noqa diff --git a/moto/glue/__init__.py b/moto/glue/__init__.py new file mode 100644 index 000000000..6b1f13326 --- /dev/null +++ b/moto/glue/__init__.py @@ -0,0 +1,5 @@ +from __future__ import unicode_literals +from .models import glue_backend + +glue_backends = {"global": glue_backend} +mock_glue = glue_backend.decorator diff --git a/moto/glue/exceptions.py b/moto/glue/exceptions.py new file mode 100644 index 000000000..0c8760f18 --- /dev/null +++ b/moto/glue/exceptions.py @@ -0,0 +1,9 @@ +from __future__ import unicode_literals +from moto.core.exceptions import RESTError + + +class GlueClientError(RESTError): + + def __init__(self, *args, **kwargs): + kwargs.setdefault('template', 'single_error') + super(GlueClientError, self).__init__(*args, **kwargs) diff --git a/moto/glue/models.py b/moto/glue/models.py new file mode 100644 index 000000000..55cd46bcb --- /dev/null +++ b/moto/glue/models.py @@ -0,0 +1,27 @@ +from __future__ import unicode_literals + +from moto.core import BaseBackend, BaseModel +from moto.compat import OrderedDict + + +class GlueBackend(BaseBackend): + + def __init__(self): + self.databases = OrderedDict() + + def create_database(self, database_name): + database = FakeDatabase(database_name) + self.databases[database_name] = database + return database + + def get_database(self, database_name): + return self.databases[database_name] + + +class FakeDatabase(BaseModel): + + def __init__(self, database_name): + self.name = database_name + + +glue_backend = GlueBackend() diff --git a/moto/glue/responses.py b/moto/glue/responses.py new file mode 100644 index 000000000..f3ef6eb4d --- /dev/null +++ b/moto/glue/responses.py @@ -0,0 +1,27 @@ +from __future__ import unicode_literals + +import json + +from moto.core.responses import BaseResponse +from .models import glue_backend + + +class GlueResponse(BaseResponse): + + @property + def glue_backend(self): + return glue_backend + + @property + def parameters(self): + return json.loads(self.body) + + def create_database(self): + database_name = self.parameters['DatabaseInput']['Name'] + self.glue_backend.create_database(database_name) + return "" + + def get_database(self): + database_name = self.parameters.get('Name') + database = self.glue_backend.get_database(database_name) + return json.dumps({'Database': {'Name': database.name}}) diff --git a/moto/glue/urls.py b/moto/glue/urls.py new file mode 100644 index 000000000..f3eaa9cad --- /dev/null +++ b/moto/glue/urls.py @@ -0,0 +1,11 @@ +from __future__ import unicode_literals + +from .responses import GlueResponse + +url_bases = [ + "https?://glue(.*).amazonaws.com" +] + +url_paths = { + '{0}/$': GlueResponse.dispatch +} diff --git a/moto/glue/utils.py b/moto/glue/utils.py new file mode 100644 index 000000000..baffc4882 --- /dev/null +++ b/moto/glue/utils.py @@ -0,0 +1 @@ +from __future__ import unicode_literals diff --git a/tests/test_glue/test_datacatalog.py b/tests/test_glue/test_datacatalog.py new file mode 100644 index 000000000..77ad1c013 --- /dev/null +++ b/tests/test_glue/test_datacatalog.py @@ -0,0 +1,30 @@ +from __future__ import unicode_literals + +import sure # noqa +import boto3 + +from moto import mock_glue + + +def create_database(client, database_name): + return client.create_database( + DatabaseInput={ + 'Name': database_name + } + ) + + +def get_database(client, database_name): + return client.get_database(Name=database_name) + + +@mock_glue +def test_create_database(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + create_database(client, database_name) + + response = get_database(client, database_name) + database = response['Database'] + + database.should.equal({'Name': database_name}) diff --git a/tests/test_s3/test_s3_storageclass.py b/tests/test_s3/test_s3_storageclass.py index c4c83a285..2ed966022 100644 --- a/tests/test_s3/test_s3_storageclass.py +++ b/tests/test_s3/test_s3_storageclass.py @@ -101,6 +101,3 @@ def test_s3_default_storage_class(): # tests that the default storage class is still STANDARD list_of_objects["Contents"][0]["StorageClass"].should.equal("STANDARD") - - - diff --git a/tests/test_s3/test_s3_utils.py b/tests/test_s3/test_s3_utils.py index 9cda1f157..d874a0f1e 100644 --- a/tests/test_s3/test_s3_utils.py +++ b/tests/test_s3/test_s3_utils.py @@ -21,7 +21,6 @@ def test_force_ignore_subdomain_for_bucketnames(): os.environ['S3_IGNORE_SUBDOMAIN_BUCKETNAME'] = '1' expect(bucket_name_from_url('https://subdomain.localhost:5000/abc/resource')).should.equal(None) del(os.environ['S3_IGNORE_SUBDOMAIN_BUCKETNAME']) - def test_versioned_key_store(): From e67a8c6f1bc9079d6aa61da5bca54cf6533b7e4d Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Tue, 10 Jul 2018 13:52:53 -0400 Subject: [PATCH 2/6] Revert minor changes to s3 tests --- tests/test_s3/test_s3_storageclass.py | 3 +++ tests/test_s3/test_s3_utils.py | 1 + 2 files changed, 4 insertions(+) diff --git a/tests/test_s3/test_s3_storageclass.py b/tests/test_s3/test_s3_storageclass.py index 2ed966022..99908c501 100644 --- a/tests/test_s3/test_s3_storageclass.py +++ b/tests/test_s3/test_s3_storageclass.py @@ -101,3 +101,6 @@ def test_s3_default_storage_class(): # tests that the default storage class is still STANDARD list_of_objects["Contents"][0]["StorageClass"].should.equal("STANDARD") + + + diff --git a/tests/test_s3/test_s3_utils.py b/tests/test_s3/test_s3_utils.py index d874a0f1e..ce9f54c75 100644 --- a/tests/test_s3/test_s3_utils.py +++ b/tests/test_s3/test_s3_utils.py @@ -23,6 +23,7 @@ def test_force_ignore_subdomain_for_bucketnames(): del(os.environ['S3_IGNORE_SUBDOMAIN_BUCKETNAME']) + def test_versioned_key_store(): d = _VersionedKeyStore() From c5c57efbb5d08828dea08822c90b292d0d183cdb Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Wed, 11 Jul 2018 11:39:40 -0400 Subject: [PATCH 3/6] Creating a database that already exists in the glue data catalog raises an exception --- moto/glue/exceptions.py | 16 +++++++++++----- moto/glue/models.py | 4 ++++ tests/test_glue/test_datacatalog.py | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/moto/glue/exceptions.py b/moto/glue/exceptions.py index 0c8760f18..2f9d16d26 100644 --- a/moto/glue/exceptions.py +++ b/moto/glue/exceptions.py @@ -1,9 +1,15 @@ from __future__ import unicode_literals -from moto.core.exceptions import RESTError +from moto.core.exceptions import JsonRESTError -class GlueClientError(RESTError): +class GlueClientError(JsonRESTError): + code = 400 - def __init__(self, *args, **kwargs): - kwargs.setdefault('template', 'single_error') - super(GlueClientError, self).__init__(*args, **kwargs) + +class DatabaseAlreadyExistsException(GlueClientError): + def __init__(self): + self.code = 400 + super(DatabaseAlreadyExistsException, self).__init__( + 'DatabaseAlreadyExistsException', + 'Database already exists.' + ) diff --git a/moto/glue/models.py b/moto/glue/models.py index 55cd46bcb..357a2a52d 100644 --- a/moto/glue/models.py +++ b/moto/glue/models.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from moto.core import BaseBackend, BaseModel from moto.compat import OrderedDict +from.exceptions import DatabaseAlreadyExistsException class GlueBackend(BaseBackend): @@ -10,6 +11,9 @@ class GlueBackend(BaseBackend): self.databases = OrderedDict() def create_database(self, database_name): + if database_name in self.databases: + raise DatabaseAlreadyExistsException() + database = FakeDatabase(database_name) self.databases[database_name] = database return database diff --git a/tests/test_glue/test_datacatalog.py b/tests/test_glue/test_datacatalog.py index 77ad1c013..c7cdb1a7c 100644 --- a/tests/test_glue/test_datacatalog.py +++ b/tests/test_glue/test_datacatalog.py @@ -1,7 +1,9 @@ from __future__ import unicode_literals import sure # noqa +from nose.tools import assert_raises import boto3 +from botocore.client import ClientError from moto import mock_glue @@ -28,3 +30,15 @@ def test_create_database(): database = response['Database'] database.should.equal({'Name': database_name}) + + +@mock_glue +def test_create_database_already_exists(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'anewdatabase' + create_database(client, database_name) + + with assert_raises(ClientError) as exc: + create_database(client, database_name) + + exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException') From d988ee15fe9d587c8cf0b62e1eaf2c277b7a58d3 Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Thu, 26 Jul 2018 17:05:09 -0400 Subject: [PATCH 4/6] Add create_table, get_table, and get_tables for the Glue Data Catalog --- moto/glue/exceptions.py | 9 +++ moto/glue/models.py | 31 +++++++- moto/glue/responses.py | 36 +++++++++ tests/test_glue/__init__.py | 1 + tests/test_glue/fixtures/__init__.py | 1 + tests/test_glue/fixtures/datacatalog.py | 31 ++++++++ tests/test_glue/helpers.py | 46 ++++++++++++ tests/test_glue/test_datacatalog.py | 98 ++++++++++++++++++++----- 8 files changed, 235 insertions(+), 18 deletions(-) create mode 100644 tests/test_glue/__init__.py create mode 100644 tests/test_glue/fixtures/__init__.py create mode 100644 tests/test_glue/fixtures/datacatalog.py create mode 100644 tests/test_glue/helpers.py diff --git a/moto/glue/exceptions.py b/moto/glue/exceptions.py index 2f9d16d26..62ea1525c 100644 --- a/moto/glue/exceptions.py +++ b/moto/glue/exceptions.py @@ -13,3 +13,12 @@ class DatabaseAlreadyExistsException(GlueClientError): 'DatabaseAlreadyExistsException', 'Database already exists.' ) + + +class TableAlreadyExistsException(GlueClientError): + def __init__(self): + self.code = 400 + super(TableAlreadyExistsException, self).__init__( + 'TableAlreadyExistsException', + 'Table already exists.' + ) diff --git a/moto/glue/models.py b/moto/glue/models.py index 357a2a52d..9f7e7657d 100644 --- a/moto/glue/models.py +++ b/moto/glue/models.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from moto.core import BaseBackend, BaseModel from moto.compat import OrderedDict -from.exceptions import DatabaseAlreadyExistsException +from.exceptions import DatabaseAlreadyExistsException, TableAlreadyExistsException class GlueBackend(BaseBackend): @@ -21,11 +21,40 @@ class GlueBackend(BaseBackend): def get_database(self, database_name): return self.databases[database_name] + def create_table(self, database_name, table_name, table_input): + database = self.get_database(database_name) + + if table_name in database.tables: + raise TableAlreadyExistsException() + + table = FakeTable(database_name, table_name, table_input) + database.tables[table_name] = table + return table + + def get_table(self, database_name, table_name): + database = self.get_database(database_name) + return database.tables[table_name] + + def get_tables(self, database_name): + database = self.get_database(database_name) + return [table for table_name, table in database.tables.iteritems()] + class FakeDatabase(BaseModel): def __init__(self, database_name): self.name = database_name + self.tables = OrderedDict() + + +class FakeTable(BaseModel): + + def __init__(self, database_name, table_name, table_input): + self.database_name = database_name + self.name = table_name + self.table_input = table_input + self.storage_descriptor = self.table_input.get('StorageDescriptor', {}) + self.partition_keys = self.table_input.get('PartitionKeys', []) glue_backend = GlueBackend() diff --git a/moto/glue/responses.py b/moto/glue/responses.py index f3ef6eb4d..bb64c40d4 100644 --- a/moto/glue/responses.py +++ b/moto/glue/responses.py @@ -25,3 +25,39 @@ class GlueResponse(BaseResponse): database_name = self.parameters.get('Name') database = self.glue_backend.get_database(database_name) return json.dumps({'Database': {'Name': database.name}}) + + def create_table(self): + database_name = self.parameters.get('DatabaseName') + table_input = self.parameters.get('TableInput') + table_name = table_input.get('Name') + self.glue_backend.create_table(database_name, table_name, table_input) + return "" + + def get_table(self): + database_name = self.parameters.get('DatabaseName') + table_name = self.parameters.get('Name') + table = self.glue_backend.get_table(database_name, table_name) + return json.dumps({ + 'Table': { + 'DatabaseName': table.database_name, + 'Name': table.name, + 'PartitionKeys': table.partition_keys, + 'StorageDescriptor': table.storage_descriptor + } + }) + + def get_tables(self): + database_name = self.parameters.get('DatabaseName') + tables = self.glue_backend.get_tables(database_name) + return json.dumps( + { + 'TableList': [ + { + 'DatabaseName': table.database_name, + 'Name': table.name, + 'PartitionKeys': table.partition_keys, + 'StorageDescriptor': table.storage_descriptor + } for table in tables + ] + } + ) diff --git a/tests/test_glue/__init__.py b/tests/test_glue/__init__.py new file mode 100644 index 000000000..baffc4882 --- /dev/null +++ b/tests/test_glue/__init__.py @@ -0,0 +1 @@ +from __future__ import unicode_literals diff --git a/tests/test_glue/fixtures/__init__.py b/tests/test_glue/fixtures/__init__.py new file mode 100644 index 000000000..baffc4882 --- /dev/null +++ b/tests/test_glue/fixtures/__init__.py @@ -0,0 +1 @@ +from __future__ import unicode_literals diff --git a/tests/test_glue/fixtures/datacatalog.py b/tests/test_glue/fixtures/datacatalog.py new file mode 100644 index 000000000..b2efe4154 --- /dev/null +++ b/tests/test_glue/fixtures/datacatalog.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +TABLE_INPUT = { + 'Owner': 'a_fake_owner', + 'Parameters': { + 'EXTERNAL': 'TRUE', + }, + 'Retention': 0, + 'StorageDescriptor': { + 'BucketColumns': [], + 'Compressed': False, + 'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat', + 'NumberOfBuckets': -1, + 'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat', + 'Parameters': {}, + 'SerdeInfo': { + 'Parameters': { + 'serialization.format': '1' + }, + 'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + }, + 'SkewedInfo': { + 'SkewedColumnNames': [], + 'SkewedColumnValueLocationMaps': {}, + 'SkewedColumnValues': [] + }, + 'SortColumns': [], + 'StoredAsSubDirectories': False + }, + 'TableType': 'EXTERNAL_TABLE', +} diff --git a/tests/test_glue/helpers.py b/tests/test_glue/helpers.py new file mode 100644 index 000000000..4a51f9117 --- /dev/null +++ b/tests/test_glue/helpers.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import copy + +from .fixtures.datacatalog import TABLE_INPUT + + +def create_database(client, database_name): + return client.create_database( + DatabaseInput={ + 'Name': database_name + } + ) + + +def get_database(client, database_name): + return client.get_database(Name=database_name) + + +def create_table_input(table_name, s3_location, columns=[], partition_keys=[]): + table_input = copy.deepcopy(TABLE_INPUT) + table_input['Name'] = table_name + table_input['PartitionKeys'] = partition_keys + table_input['StorageDescriptor']['Columns'] = columns + table_input['StorageDescriptor']['Location'] = s3_location + return table_input + + +def create_table(client, database_name, table_name, table_input): + return client.create_table( + DatabaseName=database_name, + TableInput=table_input + ) + + +def get_table(client, database_name, table_name): + return client.get_table( + DatabaseName=database_name, + Name=table_name + ) + + +def get_tables(client, database_name): + return client.get_tables( + DatabaseName=database_name + ) diff --git a/tests/test_glue/test_datacatalog.py b/tests/test_glue/test_datacatalog.py index c7cdb1a7c..7dabeb1f3 100644 --- a/tests/test_glue/test_datacatalog.py +++ b/tests/test_glue/test_datacatalog.py @@ -6,27 +6,16 @@ import boto3 from botocore.client import ClientError from moto import mock_glue - - -def create_database(client, database_name): - return client.create_database( - DatabaseInput={ - 'Name': database_name - } - ) - - -def get_database(client, database_name): - return client.get_database(Name=database_name) +from . import helpers @mock_glue def test_create_database(): client = boto3.client('glue', region_name='us-east-1') database_name = 'myspecialdatabase' - create_database(client, database_name) + helpers.create_database(client, database_name) - response = get_database(client, database_name) + response = helpers.get_database(client, database_name) database = response['Database'] database.should.equal({'Name': database_name}) @@ -35,10 +24,85 @@ def test_create_database(): @mock_glue def test_create_database_already_exists(): client = boto3.client('glue', region_name='us-east-1') - database_name = 'anewdatabase' - create_database(client, database_name) + database_name = 'cantcreatethisdatabasetwice' + helpers.create_database(client, database_name) with assert_raises(ClientError) as exc: - create_database(client, database_name) + helpers.create_database(client, database_name) exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException') + + +@mock_glue +def test_create_table(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + helpers.create_database(client, database_name) + + table_name = 'myspecialtable' + s3_location = 's3://my-bucket/{database_name}/{table_name}'.format( + database_name=database_name, + table_name=table_name + ) + + table_input = helpers.create_table_input(table_name, s3_location) + helpers.create_table(client, database_name, table_name, table_input) + + response = helpers.get_table(client, database_name, table_name) + table = response['Table'] + + table['Name'].should.equal(table_input['Name']) + table['StorageDescriptor'].should.equal(table_input['StorageDescriptor']) + table['PartitionKeys'].should.equal(table_input['PartitionKeys']) + + +@mock_glue +def test_create_table_already_exists(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + helpers.create_database(client, database_name) + + table_name = 'cantcreatethistabletwice' + s3_location = 's3://my-bucket/{database_name}/{table_name}'.format( + database_name=database_name, + table_name=table_name + ) + + table_input = helpers.create_table_input(table_name, s3_location) + helpers.create_table(client, database_name, table_name, table_input) + + with assert_raises(ClientError) as exc: + helpers.create_table(client, database_name, table_name, table_input) + + exc.exception.response['Error']['Code'].should.equal('TableAlreadyExistsException') + + +@mock_glue +def test_get_tables(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + helpers.create_database(client, database_name) + + table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable'] + table_inputs = {} + + for table_name in table_names: + s3_location = 's3://my-bucket/{database_name}/{table_name}'.format( + database_name=database_name, + table_name=table_name + ) + table_input = helpers.create_table_input(table_name, s3_location) + table_inputs[table_name] = table_input + helpers.create_table(client, database_name, table_name, table_input) + + response = helpers.get_tables(client, database_name) + + tables = response['TableList'] + + assert len(tables) == 3 + + for table in tables: + table_name = table['Name'] + table_name.should.equal(table_inputs[table_name]['Name']) + table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor']) + table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys']) From 9339a476d2b2c7d23d254c11fa474f6e69426611 Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Sun, 5 Aug 2018 19:46:40 -0400 Subject: [PATCH 5/6] Adjust glue get_tables method to use items instead of iteritems --- moto/glue/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moto/glue/models.py b/moto/glue/models.py index 9f7e7657d..09b7d60ed 100644 --- a/moto/glue/models.py +++ b/moto/glue/models.py @@ -37,7 +37,7 @@ class GlueBackend(BaseBackend): def get_tables(self, database_name): database = self.get_database(database_name) - return [table for table_name, table in database.tables.iteritems()] + return [table for table_name, table in database.tables.items()] class FakeDatabase(BaseModel): From 3830757ec64152e92aac4bc96b837e2108c0563f Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Tue, 7 Aug 2018 16:57:20 -0400 Subject: [PATCH 6/6] Add glue to backends to support server mode --- moto/backends.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/moto/backends.py b/moto/backends.py index cd8fe174f..8d707373f 100644 --- a/moto/backends.py +++ b/moto/backends.py @@ -20,6 +20,7 @@ from moto.elbv2 import elbv2_backends from moto.emr import emr_backends from moto.events import events_backends from moto.glacier import glacier_backends +from moto.glue import glue_backends from moto.iam import iam_backends from moto.instance_metadata import instance_metadata_backends from moto.kinesis import kinesis_backends @@ -65,6 +66,7 @@ BACKENDS = { 'events': events_backends, 'emr': emr_backends, 'glacier': glacier_backends, + 'glue': glue_backends, 'iam': iam_backends, 'moto_api': moto_api_backends, 'instance_metadata': instance_metadata_backends,