Add create_table, get_table, and get_tables for the Glue Data Catalog
This commit is contained in:
parent
c5c57efbb5
commit
d988ee15fe
@ -13,3 +13,12 @@ class DatabaseAlreadyExistsException(GlueClientError):
|
|||||||
'DatabaseAlreadyExistsException',
|
'DatabaseAlreadyExistsException',
|
||||||
'Database already exists.'
|
'Database already exists.'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TableAlreadyExistsException(GlueClientError):
|
||||||
|
def __init__(self):
|
||||||
|
self.code = 400
|
||||||
|
super(TableAlreadyExistsException, self).__init__(
|
||||||
|
'TableAlreadyExistsException',
|
||||||
|
'Table already exists.'
|
||||||
|
)
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from moto.core import BaseBackend, BaseModel
|
from moto.core import BaseBackend, BaseModel
|
||||||
from moto.compat import OrderedDict
|
from moto.compat import OrderedDict
|
||||||
from.exceptions import DatabaseAlreadyExistsException
|
from.exceptions import DatabaseAlreadyExistsException, TableAlreadyExistsException
|
||||||
|
|
||||||
|
|
||||||
class GlueBackend(BaseBackend):
|
class GlueBackend(BaseBackend):
|
||||||
@ -21,11 +21,40 @@ class GlueBackend(BaseBackend):
|
|||||||
def get_database(self, database_name):
|
def get_database(self, database_name):
|
||||||
return self.databases[database_name]
|
return self.databases[database_name]
|
||||||
|
|
||||||
|
def create_table(self, database_name, table_name, table_input):
|
||||||
|
database = self.get_database(database_name)
|
||||||
|
|
||||||
|
if table_name in database.tables:
|
||||||
|
raise TableAlreadyExistsException()
|
||||||
|
|
||||||
|
table = FakeTable(database_name, table_name, table_input)
|
||||||
|
database.tables[table_name] = table
|
||||||
|
return table
|
||||||
|
|
||||||
|
def get_table(self, database_name, table_name):
|
||||||
|
database = self.get_database(database_name)
|
||||||
|
return database.tables[table_name]
|
||||||
|
|
||||||
|
def get_tables(self, database_name):
|
||||||
|
database = self.get_database(database_name)
|
||||||
|
return [table for table_name, table in database.tables.iteritems()]
|
||||||
|
|
||||||
|
|
||||||
class FakeDatabase(BaseModel):
|
class FakeDatabase(BaseModel):
|
||||||
|
|
||||||
def __init__(self, database_name):
|
def __init__(self, database_name):
|
||||||
self.name = database_name
|
self.name = database_name
|
||||||
|
self.tables = OrderedDict()
|
||||||
|
|
||||||
|
|
||||||
|
class FakeTable(BaseModel):
|
||||||
|
|
||||||
|
def __init__(self, database_name, table_name, table_input):
|
||||||
|
self.database_name = database_name
|
||||||
|
self.name = table_name
|
||||||
|
self.table_input = table_input
|
||||||
|
self.storage_descriptor = self.table_input.get('StorageDescriptor', {})
|
||||||
|
self.partition_keys = self.table_input.get('PartitionKeys', [])
|
||||||
|
|
||||||
|
|
||||||
glue_backend = GlueBackend()
|
glue_backend = GlueBackend()
|
||||||
|
@ -25,3 +25,39 @@ class GlueResponse(BaseResponse):
|
|||||||
database_name = self.parameters.get('Name')
|
database_name = self.parameters.get('Name')
|
||||||
database = self.glue_backend.get_database(database_name)
|
database = self.glue_backend.get_database(database_name)
|
||||||
return json.dumps({'Database': {'Name': database.name}})
|
return json.dumps({'Database': {'Name': database.name}})
|
||||||
|
|
||||||
|
def create_table(self):
|
||||||
|
database_name = self.parameters.get('DatabaseName')
|
||||||
|
table_input = self.parameters.get('TableInput')
|
||||||
|
table_name = table_input.get('Name')
|
||||||
|
self.glue_backend.create_table(database_name, table_name, table_input)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_table(self):
|
||||||
|
database_name = self.parameters.get('DatabaseName')
|
||||||
|
table_name = self.parameters.get('Name')
|
||||||
|
table = self.glue_backend.get_table(database_name, table_name)
|
||||||
|
return json.dumps({
|
||||||
|
'Table': {
|
||||||
|
'DatabaseName': table.database_name,
|
||||||
|
'Name': table.name,
|
||||||
|
'PartitionKeys': table.partition_keys,
|
||||||
|
'StorageDescriptor': table.storage_descriptor
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
def get_tables(self):
|
||||||
|
database_name = self.parameters.get('DatabaseName')
|
||||||
|
tables = self.glue_backend.get_tables(database_name)
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
'TableList': [
|
||||||
|
{
|
||||||
|
'DatabaseName': table.database_name,
|
||||||
|
'Name': table.name,
|
||||||
|
'PartitionKeys': table.partition_keys,
|
||||||
|
'StorageDescriptor': table.storage_descriptor
|
||||||
|
} for table in tables
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
1
tests/test_glue/__init__.py
Normal file
1
tests/test_glue/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from __future__ import unicode_literals
|
1
tests/test_glue/fixtures/__init__.py
Normal file
1
tests/test_glue/fixtures/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from __future__ import unicode_literals
|
31
tests/test_glue/fixtures/datacatalog.py
Normal file
31
tests/test_glue/fixtures/datacatalog.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
TABLE_INPUT = {
|
||||||
|
'Owner': 'a_fake_owner',
|
||||||
|
'Parameters': {
|
||||||
|
'EXTERNAL': 'TRUE',
|
||||||
|
},
|
||||||
|
'Retention': 0,
|
||||||
|
'StorageDescriptor': {
|
||||||
|
'BucketColumns': [],
|
||||||
|
'Compressed': False,
|
||||||
|
'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
|
||||||
|
'NumberOfBuckets': -1,
|
||||||
|
'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
|
||||||
|
'Parameters': {},
|
||||||
|
'SerdeInfo': {
|
||||||
|
'Parameters': {
|
||||||
|
'serialization.format': '1'
|
||||||
|
},
|
||||||
|
'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
|
||||||
|
},
|
||||||
|
'SkewedInfo': {
|
||||||
|
'SkewedColumnNames': [],
|
||||||
|
'SkewedColumnValueLocationMaps': {},
|
||||||
|
'SkewedColumnValues': []
|
||||||
|
},
|
||||||
|
'SortColumns': [],
|
||||||
|
'StoredAsSubDirectories': False
|
||||||
|
},
|
||||||
|
'TableType': 'EXTERNAL_TABLE',
|
||||||
|
}
|
46
tests/test_glue/helpers.py
Normal file
46
tests/test_glue/helpers.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import copy
|
||||||
|
|
||||||
|
from .fixtures.datacatalog import TABLE_INPUT
|
||||||
|
|
||||||
|
|
||||||
|
def create_database(client, database_name):
|
||||||
|
return client.create_database(
|
||||||
|
DatabaseInput={
|
||||||
|
'Name': database_name
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_database(client, database_name):
|
||||||
|
return client.get_database(Name=database_name)
|
||||||
|
|
||||||
|
|
||||||
|
def create_table_input(table_name, s3_location, columns=[], partition_keys=[]):
|
||||||
|
table_input = copy.deepcopy(TABLE_INPUT)
|
||||||
|
table_input['Name'] = table_name
|
||||||
|
table_input['PartitionKeys'] = partition_keys
|
||||||
|
table_input['StorageDescriptor']['Columns'] = columns
|
||||||
|
table_input['StorageDescriptor']['Location'] = s3_location
|
||||||
|
return table_input
|
||||||
|
|
||||||
|
|
||||||
|
def create_table(client, database_name, table_name, table_input):
|
||||||
|
return client.create_table(
|
||||||
|
DatabaseName=database_name,
|
||||||
|
TableInput=table_input
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_table(client, database_name, table_name):
|
||||||
|
return client.get_table(
|
||||||
|
DatabaseName=database_name,
|
||||||
|
Name=table_name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_tables(client, database_name):
|
||||||
|
return client.get_tables(
|
||||||
|
DatabaseName=database_name
|
||||||
|
)
|
@ -6,27 +6,16 @@ import boto3
|
|||||||
from botocore.client import ClientError
|
from botocore.client import ClientError
|
||||||
|
|
||||||
from moto import mock_glue
|
from moto import mock_glue
|
||||||
|
from . import helpers
|
||||||
|
|
||||||
def create_database(client, database_name):
|
|
||||||
return client.create_database(
|
|
||||||
DatabaseInput={
|
|
||||||
'Name': database_name
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_database(client, database_name):
|
|
||||||
return client.get_database(Name=database_name)
|
|
||||||
|
|
||||||
|
|
||||||
@mock_glue
|
@mock_glue
|
||||||
def test_create_database():
|
def test_create_database():
|
||||||
client = boto3.client('glue', region_name='us-east-1')
|
client = boto3.client('glue', region_name='us-east-1')
|
||||||
database_name = 'myspecialdatabase'
|
database_name = 'myspecialdatabase'
|
||||||
create_database(client, database_name)
|
helpers.create_database(client, database_name)
|
||||||
|
|
||||||
response = get_database(client, database_name)
|
response = helpers.get_database(client, database_name)
|
||||||
database = response['Database']
|
database = response['Database']
|
||||||
|
|
||||||
database.should.equal({'Name': database_name})
|
database.should.equal({'Name': database_name})
|
||||||
@ -35,10 +24,85 @@ def test_create_database():
|
|||||||
@mock_glue
|
@mock_glue
|
||||||
def test_create_database_already_exists():
|
def test_create_database_already_exists():
|
||||||
client = boto3.client('glue', region_name='us-east-1')
|
client = boto3.client('glue', region_name='us-east-1')
|
||||||
database_name = 'anewdatabase'
|
database_name = 'cantcreatethisdatabasetwice'
|
||||||
create_database(client, database_name)
|
helpers.create_database(client, database_name)
|
||||||
|
|
||||||
with assert_raises(ClientError) as exc:
|
with assert_raises(ClientError) as exc:
|
||||||
create_database(client, database_name)
|
helpers.create_database(client, database_name)
|
||||||
|
|
||||||
exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException')
|
exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_create_table():
|
||||||
|
client = boto3.client('glue', region_name='us-east-1')
|
||||||
|
database_name = 'myspecialdatabase'
|
||||||
|
helpers.create_database(client, database_name)
|
||||||
|
|
||||||
|
table_name = 'myspecialtable'
|
||||||
|
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
|
||||||
|
database_name=database_name,
|
||||||
|
table_name=table_name
|
||||||
|
)
|
||||||
|
|
||||||
|
table_input = helpers.create_table_input(table_name, s3_location)
|
||||||
|
helpers.create_table(client, database_name, table_name, table_input)
|
||||||
|
|
||||||
|
response = helpers.get_table(client, database_name, table_name)
|
||||||
|
table = response['Table']
|
||||||
|
|
||||||
|
table['Name'].should.equal(table_input['Name'])
|
||||||
|
table['StorageDescriptor'].should.equal(table_input['StorageDescriptor'])
|
||||||
|
table['PartitionKeys'].should.equal(table_input['PartitionKeys'])
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_create_table_already_exists():
|
||||||
|
client = boto3.client('glue', region_name='us-east-1')
|
||||||
|
database_name = 'myspecialdatabase'
|
||||||
|
helpers.create_database(client, database_name)
|
||||||
|
|
||||||
|
table_name = 'cantcreatethistabletwice'
|
||||||
|
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
|
||||||
|
database_name=database_name,
|
||||||
|
table_name=table_name
|
||||||
|
)
|
||||||
|
|
||||||
|
table_input = helpers.create_table_input(table_name, s3_location)
|
||||||
|
helpers.create_table(client, database_name, table_name, table_input)
|
||||||
|
|
||||||
|
with assert_raises(ClientError) as exc:
|
||||||
|
helpers.create_table(client, database_name, table_name, table_input)
|
||||||
|
|
||||||
|
exc.exception.response['Error']['Code'].should.equal('TableAlreadyExistsException')
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_get_tables():
|
||||||
|
client = boto3.client('glue', region_name='us-east-1')
|
||||||
|
database_name = 'myspecialdatabase'
|
||||||
|
helpers.create_database(client, database_name)
|
||||||
|
|
||||||
|
table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable']
|
||||||
|
table_inputs = {}
|
||||||
|
|
||||||
|
for table_name in table_names:
|
||||||
|
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
|
||||||
|
database_name=database_name,
|
||||||
|
table_name=table_name
|
||||||
|
)
|
||||||
|
table_input = helpers.create_table_input(table_name, s3_location)
|
||||||
|
table_inputs[table_name] = table_input
|
||||||
|
helpers.create_table(client, database_name, table_name, table_input)
|
||||||
|
|
||||||
|
response = helpers.get_tables(client, database_name)
|
||||||
|
|
||||||
|
tables = response['TableList']
|
||||||
|
|
||||||
|
assert len(tables) == 3
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
table_name = table['Name']
|
||||||
|
table_name.should.equal(table_inputs[table_name]['Name'])
|
||||||
|
table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor'])
|
||||||
|
table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys'])
|
||||||
|
Loading…
Reference in New Issue
Block a user