From d988ee15fe9d587c8cf0b62e1eaf2c277b7a58d3 Mon Sep 17 00:00:00 2001 From: TheDooner64 Date: Thu, 26 Jul 2018 17:05:09 -0400 Subject: [PATCH] Add create_table, get_table, and get_tables for the Glue Data Catalog --- moto/glue/exceptions.py | 9 +++ moto/glue/models.py | 31 +++++++- moto/glue/responses.py | 36 +++++++++ tests/test_glue/__init__.py | 1 + tests/test_glue/fixtures/__init__.py | 1 + tests/test_glue/fixtures/datacatalog.py | 31 ++++++++ tests/test_glue/helpers.py | 46 ++++++++++++ tests/test_glue/test_datacatalog.py | 98 ++++++++++++++++++++----- 8 files changed, 235 insertions(+), 18 deletions(-) create mode 100644 tests/test_glue/__init__.py create mode 100644 tests/test_glue/fixtures/__init__.py create mode 100644 tests/test_glue/fixtures/datacatalog.py create mode 100644 tests/test_glue/helpers.py diff --git a/moto/glue/exceptions.py b/moto/glue/exceptions.py index 2f9d16d26..62ea1525c 100644 --- a/moto/glue/exceptions.py +++ b/moto/glue/exceptions.py @@ -13,3 +13,12 @@ class DatabaseAlreadyExistsException(GlueClientError): 'DatabaseAlreadyExistsException', 'Database already exists.' ) + + +class TableAlreadyExistsException(GlueClientError): + def __init__(self): + self.code = 400 + super(TableAlreadyExistsException, self).__init__( + 'TableAlreadyExistsException', + 'Table already exists.' + ) diff --git a/moto/glue/models.py b/moto/glue/models.py index 357a2a52d..9f7e7657d 100644 --- a/moto/glue/models.py +++ b/moto/glue/models.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from moto.core import BaseBackend, BaseModel from moto.compat import OrderedDict -from.exceptions import DatabaseAlreadyExistsException +from.exceptions import DatabaseAlreadyExistsException, TableAlreadyExistsException class GlueBackend(BaseBackend): @@ -21,11 +21,40 @@ class GlueBackend(BaseBackend): def get_database(self, database_name): return self.databases[database_name] + def create_table(self, database_name, table_name, table_input): + database = self.get_database(database_name) + + if table_name in database.tables: + raise TableAlreadyExistsException() + + table = FakeTable(database_name, table_name, table_input) + database.tables[table_name] = table + return table + + def get_table(self, database_name, table_name): + database = self.get_database(database_name) + return database.tables[table_name] + + def get_tables(self, database_name): + database = self.get_database(database_name) + return [table for table_name, table in database.tables.iteritems()] + class FakeDatabase(BaseModel): def __init__(self, database_name): self.name = database_name + self.tables = OrderedDict() + + +class FakeTable(BaseModel): + + def __init__(self, database_name, table_name, table_input): + self.database_name = database_name + self.name = table_name + self.table_input = table_input + self.storage_descriptor = self.table_input.get('StorageDescriptor', {}) + self.partition_keys = self.table_input.get('PartitionKeys', []) glue_backend = GlueBackend() diff --git a/moto/glue/responses.py b/moto/glue/responses.py index f3ef6eb4d..bb64c40d4 100644 --- a/moto/glue/responses.py +++ b/moto/glue/responses.py @@ -25,3 +25,39 @@ class GlueResponse(BaseResponse): database_name = self.parameters.get('Name') database = self.glue_backend.get_database(database_name) return json.dumps({'Database': {'Name': database.name}}) + + def create_table(self): + database_name = self.parameters.get('DatabaseName') + table_input = self.parameters.get('TableInput') + table_name = table_input.get('Name') + self.glue_backend.create_table(database_name, table_name, table_input) + return "" + + def get_table(self): + database_name = self.parameters.get('DatabaseName') + table_name = self.parameters.get('Name') + table = self.glue_backend.get_table(database_name, table_name) + return json.dumps({ + 'Table': { + 'DatabaseName': table.database_name, + 'Name': table.name, + 'PartitionKeys': table.partition_keys, + 'StorageDescriptor': table.storage_descriptor + } + }) + + def get_tables(self): + database_name = self.parameters.get('DatabaseName') + tables = self.glue_backend.get_tables(database_name) + return json.dumps( + { + 'TableList': [ + { + 'DatabaseName': table.database_name, + 'Name': table.name, + 'PartitionKeys': table.partition_keys, + 'StorageDescriptor': table.storage_descriptor + } for table in tables + ] + } + ) diff --git a/tests/test_glue/__init__.py b/tests/test_glue/__init__.py new file mode 100644 index 000000000..baffc4882 --- /dev/null +++ b/tests/test_glue/__init__.py @@ -0,0 +1 @@ +from __future__ import unicode_literals diff --git a/tests/test_glue/fixtures/__init__.py b/tests/test_glue/fixtures/__init__.py new file mode 100644 index 000000000..baffc4882 --- /dev/null +++ b/tests/test_glue/fixtures/__init__.py @@ -0,0 +1 @@ +from __future__ import unicode_literals diff --git a/tests/test_glue/fixtures/datacatalog.py b/tests/test_glue/fixtures/datacatalog.py new file mode 100644 index 000000000..b2efe4154 --- /dev/null +++ b/tests/test_glue/fixtures/datacatalog.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +TABLE_INPUT = { + 'Owner': 'a_fake_owner', + 'Parameters': { + 'EXTERNAL': 'TRUE', + }, + 'Retention': 0, + 'StorageDescriptor': { + 'BucketColumns': [], + 'Compressed': False, + 'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat', + 'NumberOfBuckets': -1, + 'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat', + 'Parameters': {}, + 'SerdeInfo': { + 'Parameters': { + 'serialization.format': '1' + }, + 'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + }, + 'SkewedInfo': { + 'SkewedColumnNames': [], + 'SkewedColumnValueLocationMaps': {}, + 'SkewedColumnValues': [] + }, + 'SortColumns': [], + 'StoredAsSubDirectories': False + }, + 'TableType': 'EXTERNAL_TABLE', +} diff --git a/tests/test_glue/helpers.py b/tests/test_glue/helpers.py new file mode 100644 index 000000000..4a51f9117 --- /dev/null +++ b/tests/test_glue/helpers.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import copy + +from .fixtures.datacatalog import TABLE_INPUT + + +def create_database(client, database_name): + return client.create_database( + DatabaseInput={ + 'Name': database_name + } + ) + + +def get_database(client, database_name): + return client.get_database(Name=database_name) + + +def create_table_input(table_name, s3_location, columns=[], partition_keys=[]): + table_input = copy.deepcopy(TABLE_INPUT) + table_input['Name'] = table_name + table_input['PartitionKeys'] = partition_keys + table_input['StorageDescriptor']['Columns'] = columns + table_input['StorageDescriptor']['Location'] = s3_location + return table_input + + +def create_table(client, database_name, table_name, table_input): + return client.create_table( + DatabaseName=database_name, + TableInput=table_input + ) + + +def get_table(client, database_name, table_name): + return client.get_table( + DatabaseName=database_name, + Name=table_name + ) + + +def get_tables(client, database_name): + return client.get_tables( + DatabaseName=database_name + ) diff --git a/tests/test_glue/test_datacatalog.py b/tests/test_glue/test_datacatalog.py index c7cdb1a7c..7dabeb1f3 100644 --- a/tests/test_glue/test_datacatalog.py +++ b/tests/test_glue/test_datacatalog.py @@ -6,27 +6,16 @@ import boto3 from botocore.client import ClientError from moto import mock_glue - - -def create_database(client, database_name): - return client.create_database( - DatabaseInput={ - 'Name': database_name - } - ) - - -def get_database(client, database_name): - return client.get_database(Name=database_name) +from . import helpers @mock_glue def test_create_database(): client = boto3.client('glue', region_name='us-east-1') database_name = 'myspecialdatabase' - create_database(client, database_name) + helpers.create_database(client, database_name) - response = get_database(client, database_name) + response = helpers.get_database(client, database_name) database = response['Database'] database.should.equal({'Name': database_name}) @@ -35,10 +24,85 @@ def test_create_database(): @mock_glue def test_create_database_already_exists(): client = boto3.client('glue', region_name='us-east-1') - database_name = 'anewdatabase' - create_database(client, database_name) + database_name = 'cantcreatethisdatabasetwice' + helpers.create_database(client, database_name) with assert_raises(ClientError) as exc: - create_database(client, database_name) + helpers.create_database(client, database_name) exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException') + + +@mock_glue +def test_create_table(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + helpers.create_database(client, database_name) + + table_name = 'myspecialtable' + s3_location = 's3://my-bucket/{database_name}/{table_name}'.format( + database_name=database_name, + table_name=table_name + ) + + table_input = helpers.create_table_input(table_name, s3_location) + helpers.create_table(client, database_name, table_name, table_input) + + response = helpers.get_table(client, database_name, table_name) + table = response['Table'] + + table['Name'].should.equal(table_input['Name']) + table['StorageDescriptor'].should.equal(table_input['StorageDescriptor']) + table['PartitionKeys'].should.equal(table_input['PartitionKeys']) + + +@mock_glue +def test_create_table_already_exists(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + helpers.create_database(client, database_name) + + table_name = 'cantcreatethistabletwice' + s3_location = 's3://my-bucket/{database_name}/{table_name}'.format( + database_name=database_name, + table_name=table_name + ) + + table_input = helpers.create_table_input(table_name, s3_location) + helpers.create_table(client, database_name, table_name, table_input) + + with assert_raises(ClientError) as exc: + helpers.create_table(client, database_name, table_name, table_input) + + exc.exception.response['Error']['Code'].should.equal('TableAlreadyExistsException') + + +@mock_glue +def test_get_tables(): + client = boto3.client('glue', region_name='us-east-1') + database_name = 'myspecialdatabase' + helpers.create_database(client, database_name) + + table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable'] + table_inputs = {} + + for table_name in table_names: + s3_location = 's3://my-bucket/{database_name}/{table_name}'.format( + database_name=database_name, + table_name=table_name + ) + table_input = helpers.create_table_input(table_name, s3_location) + table_inputs[table_name] = table_input + helpers.create_table(client, database_name, table_name, table_input) + + response = helpers.get_tables(client, database_name) + + tables = response['TableList'] + + assert len(tables) == 3 + + for table in tables: + table_name = table['Name'] + table_name.should.equal(table_inputs[table_name]['Name']) + table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor']) + table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys'])