Merge pull request #1750 from TheDooner64/glue-data-catalog
Scaffolding for AWS Glue Data Catalog
This commit is contained in:
commit
42d486f9b0
@ -24,6 +24,7 @@ from .elbv2 import mock_elbv2 # flake8: noqa
|
||||
from .emr import mock_emr, mock_emr_deprecated # flake8: noqa
|
||||
from .events import mock_events # flake8: noqa
|
||||
from .glacier import mock_glacier, mock_glacier_deprecated # flake8: noqa
|
||||
from .glue import mock_glue # flake8: noqa
|
||||
from .iam import mock_iam, mock_iam_deprecated # flake8: noqa
|
||||
from .kinesis import mock_kinesis, mock_kinesis_deprecated # flake8: noqa
|
||||
from .kms import mock_kms, mock_kms_deprecated # flake8: noqa
|
||||
|
@ -20,6 +20,7 @@ from moto.elbv2 import elbv2_backends
|
||||
from moto.emr import emr_backends
|
||||
from moto.events import events_backends
|
||||
from moto.glacier import glacier_backends
|
||||
from moto.glue import glue_backends
|
||||
from moto.iam import iam_backends
|
||||
from moto.instance_metadata import instance_metadata_backends
|
||||
from moto.kinesis import kinesis_backends
|
||||
@ -65,6 +66,7 @@ BACKENDS = {
|
||||
'events': events_backends,
|
||||
'emr': emr_backends,
|
||||
'glacier': glacier_backends,
|
||||
'glue': glue_backends,
|
||||
'iam': iam_backends,
|
||||
'moto_api': moto_api_backends,
|
||||
'instance_metadata': instance_metadata_backends,
|
||||
|
5
moto/glue/__init__.py
Normal file
5
moto/glue/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
from .models import glue_backend
|
||||
|
||||
glue_backends = {"global": glue_backend}
|
||||
mock_glue = glue_backend.decorator
|
24
moto/glue/exceptions.py
Normal file
24
moto/glue/exceptions.py
Normal file
@ -0,0 +1,24 @@
|
||||
from __future__ import unicode_literals
|
||||
from moto.core.exceptions import JsonRESTError
|
||||
|
||||
|
||||
class GlueClientError(JsonRESTError):
|
||||
code = 400
|
||||
|
||||
|
||||
class DatabaseAlreadyExistsException(GlueClientError):
|
||||
def __init__(self):
|
||||
self.code = 400
|
||||
super(DatabaseAlreadyExistsException, self).__init__(
|
||||
'DatabaseAlreadyExistsException',
|
||||
'Database already exists.'
|
||||
)
|
||||
|
||||
|
||||
class TableAlreadyExistsException(GlueClientError):
|
||||
def __init__(self):
|
||||
self.code = 400
|
||||
super(TableAlreadyExistsException, self).__init__(
|
||||
'TableAlreadyExistsException',
|
||||
'Table already exists.'
|
||||
)
|
60
moto/glue/models.py
Normal file
60
moto/glue/models.py
Normal file
@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from moto.core import BaseBackend, BaseModel
|
||||
from moto.compat import OrderedDict
|
||||
from.exceptions import DatabaseAlreadyExistsException, TableAlreadyExistsException
|
||||
|
||||
|
||||
class GlueBackend(BaseBackend):
|
||||
|
||||
def __init__(self):
|
||||
self.databases = OrderedDict()
|
||||
|
||||
def create_database(self, database_name):
|
||||
if database_name in self.databases:
|
||||
raise DatabaseAlreadyExistsException()
|
||||
|
||||
database = FakeDatabase(database_name)
|
||||
self.databases[database_name] = database
|
||||
return database
|
||||
|
||||
def get_database(self, database_name):
|
||||
return self.databases[database_name]
|
||||
|
||||
def create_table(self, database_name, table_name, table_input):
|
||||
database = self.get_database(database_name)
|
||||
|
||||
if table_name in database.tables:
|
||||
raise TableAlreadyExistsException()
|
||||
|
||||
table = FakeTable(database_name, table_name, table_input)
|
||||
database.tables[table_name] = table
|
||||
return table
|
||||
|
||||
def get_table(self, database_name, table_name):
|
||||
database = self.get_database(database_name)
|
||||
return database.tables[table_name]
|
||||
|
||||
def get_tables(self, database_name):
|
||||
database = self.get_database(database_name)
|
||||
return [table for table_name, table in database.tables.items()]
|
||||
|
||||
|
||||
class FakeDatabase(BaseModel):
|
||||
|
||||
def __init__(self, database_name):
|
||||
self.name = database_name
|
||||
self.tables = OrderedDict()
|
||||
|
||||
|
||||
class FakeTable(BaseModel):
|
||||
|
||||
def __init__(self, database_name, table_name, table_input):
|
||||
self.database_name = database_name
|
||||
self.name = table_name
|
||||
self.table_input = table_input
|
||||
self.storage_descriptor = self.table_input.get('StorageDescriptor', {})
|
||||
self.partition_keys = self.table_input.get('PartitionKeys', [])
|
||||
|
||||
|
||||
glue_backend = GlueBackend()
|
63
moto/glue/responses.py
Normal file
63
moto/glue/responses.py
Normal file
@ -0,0 +1,63 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from moto.core.responses import BaseResponse
|
||||
from .models import glue_backend
|
||||
|
||||
|
||||
class GlueResponse(BaseResponse):
|
||||
|
||||
@property
|
||||
def glue_backend(self):
|
||||
return glue_backend
|
||||
|
||||
@property
|
||||
def parameters(self):
|
||||
return json.loads(self.body)
|
||||
|
||||
def create_database(self):
|
||||
database_name = self.parameters['DatabaseInput']['Name']
|
||||
self.glue_backend.create_database(database_name)
|
||||
return ""
|
||||
|
||||
def get_database(self):
|
||||
database_name = self.parameters.get('Name')
|
||||
database = self.glue_backend.get_database(database_name)
|
||||
return json.dumps({'Database': {'Name': database.name}})
|
||||
|
||||
def create_table(self):
|
||||
database_name = self.parameters.get('DatabaseName')
|
||||
table_input = self.parameters.get('TableInput')
|
||||
table_name = table_input.get('Name')
|
||||
self.glue_backend.create_table(database_name, table_name, table_input)
|
||||
return ""
|
||||
|
||||
def get_table(self):
|
||||
database_name = self.parameters.get('DatabaseName')
|
||||
table_name = self.parameters.get('Name')
|
||||
table = self.glue_backend.get_table(database_name, table_name)
|
||||
return json.dumps({
|
||||
'Table': {
|
||||
'DatabaseName': table.database_name,
|
||||
'Name': table.name,
|
||||
'PartitionKeys': table.partition_keys,
|
||||
'StorageDescriptor': table.storage_descriptor
|
||||
}
|
||||
})
|
||||
|
||||
def get_tables(self):
|
||||
database_name = self.parameters.get('DatabaseName')
|
||||
tables = self.glue_backend.get_tables(database_name)
|
||||
return json.dumps(
|
||||
{
|
||||
'TableList': [
|
||||
{
|
||||
'DatabaseName': table.database_name,
|
||||
'Name': table.name,
|
||||
'PartitionKeys': table.partition_keys,
|
||||
'StorageDescriptor': table.storage_descriptor
|
||||
} for table in tables
|
||||
]
|
||||
}
|
||||
)
|
11
moto/glue/urls.py
Normal file
11
moto/glue/urls.py
Normal file
@ -0,0 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .responses import GlueResponse
|
||||
|
||||
url_bases = [
|
||||
"https?://glue(.*).amazonaws.com"
|
||||
]
|
||||
|
||||
url_paths = {
|
||||
'{0}/$': GlueResponse.dispatch
|
||||
}
|
1
moto/glue/utils.py
Normal file
1
moto/glue/utils.py
Normal file
@ -0,0 +1 @@
|
||||
from __future__ import unicode_literals
|
1
tests/test_glue/__init__.py
Normal file
1
tests/test_glue/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from __future__ import unicode_literals
|
1
tests/test_glue/fixtures/__init__.py
Normal file
1
tests/test_glue/fixtures/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from __future__ import unicode_literals
|
31
tests/test_glue/fixtures/datacatalog.py
Normal file
31
tests/test_glue/fixtures/datacatalog.py
Normal file
@ -0,0 +1,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
TABLE_INPUT = {
|
||||
'Owner': 'a_fake_owner',
|
||||
'Parameters': {
|
||||
'EXTERNAL': 'TRUE',
|
||||
},
|
||||
'Retention': 0,
|
||||
'StorageDescriptor': {
|
||||
'BucketColumns': [],
|
||||
'Compressed': False,
|
||||
'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
|
||||
'NumberOfBuckets': -1,
|
||||
'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
|
||||
'Parameters': {},
|
||||
'SerdeInfo': {
|
||||
'Parameters': {
|
||||
'serialization.format': '1'
|
||||
},
|
||||
'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
|
||||
},
|
||||
'SkewedInfo': {
|
||||
'SkewedColumnNames': [],
|
||||
'SkewedColumnValueLocationMaps': {},
|
||||
'SkewedColumnValues': []
|
||||
},
|
||||
'SortColumns': [],
|
||||
'StoredAsSubDirectories': False
|
||||
},
|
||||
'TableType': 'EXTERNAL_TABLE',
|
||||
}
|
46
tests/test_glue/helpers.py
Normal file
46
tests/test_glue/helpers.py
Normal file
@ -0,0 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import copy
|
||||
|
||||
from .fixtures.datacatalog import TABLE_INPUT
|
||||
|
||||
|
||||
def create_database(client, database_name):
|
||||
return client.create_database(
|
||||
DatabaseInput={
|
||||
'Name': database_name
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_database(client, database_name):
|
||||
return client.get_database(Name=database_name)
|
||||
|
||||
|
||||
def create_table_input(table_name, s3_location, columns=[], partition_keys=[]):
|
||||
table_input = copy.deepcopy(TABLE_INPUT)
|
||||
table_input['Name'] = table_name
|
||||
table_input['PartitionKeys'] = partition_keys
|
||||
table_input['StorageDescriptor']['Columns'] = columns
|
||||
table_input['StorageDescriptor']['Location'] = s3_location
|
||||
return table_input
|
||||
|
||||
|
||||
def create_table(client, database_name, table_name, table_input):
|
||||
return client.create_table(
|
||||
DatabaseName=database_name,
|
||||
TableInput=table_input
|
||||
)
|
||||
|
||||
|
||||
def get_table(client, database_name, table_name):
|
||||
return client.get_table(
|
||||
DatabaseName=database_name,
|
||||
Name=table_name
|
||||
)
|
||||
|
||||
|
||||
def get_tables(client, database_name):
|
||||
return client.get_tables(
|
||||
DatabaseName=database_name
|
||||
)
|
108
tests/test_glue/test_datacatalog.py
Normal file
108
tests/test_glue/test_datacatalog.py
Normal file
@ -0,0 +1,108 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import sure # noqa
|
||||
from nose.tools import assert_raises
|
||||
import boto3
|
||||
from botocore.client import ClientError
|
||||
|
||||
from moto import mock_glue
|
||||
from . import helpers
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_create_database():
|
||||
client = boto3.client('glue', region_name='us-east-1')
|
||||
database_name = 'myspecialdatabase'
|
||||
helpers.create_database(client, database_name)
|
||||
|
||||
response = helpers.get_database(client, database_name)
|
||||
database = response['Database']
|
||||
|
||||
database.should.equal({'Name': database_name})
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_create_database_already_exists():
|
||||
client = boto3.client('glue', region_name='us-east-1')
|
||||
database_name = 'cantcreatethisdatabasetwice'
|
||||
helpers.create_database(client, database_name)
|
||||
|
||||
with assert_raises(ClientError) as exc:
|
||||
helpers.create_database(client, database_name)
|
||||
|
||||
exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException')
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_create_table():
|
||||
client = boto3.client('glue', region_name='us-east-1')
|
||||
database_name = 'myspecialdatabase'
|
||||
helpers.create_database(client, database_name)
|
||||
|
||||
table_name = 'myspecialtable'
|
||||
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
|
||||
database_name=database_name,
|
||||
table_name=table_name
|
||||
)
|
||||
|
||||
table_input = helpers.create_table_input(table_name, s3_location)
|
||||
helpers.create_table(client, database_name, table_name, table_input)
|
||||
|
||||
response = helpers.get_table(client, database_name, table_name)
|
||||
table = response['Table']
|
||||
|
||||
table['Name'].should.equal(table_input['Name'])
|
||||
table['StorageDescriptor'].should.equal(table_input['StorageDescriptor'])
|
||||
table['PartitionKeys'].should.equal(table_input['PartitionKeys'])
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_create_table_already_exists():
|
||||
client = boto3.client('glue', region_name='us-east-1')
|
||||
database_name = 'myspecialdatabase'
|
||||
helpers.create_database(client, database_name)
|
||||
|
||||
table_name = 'cantcreatethistabletwice'
|
||||
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
|
||||
database_name=database_name,
|
||||
table_name=table_name
|
||||
)
|
||||
|
||||
table_input = helpers.create_table_input(table_name, s3_location)
|
||||
helpers.create_table(client, database_name, table_name, table_input)
|
||||
|
||||
with assert_raises(ClientError) as exc:
|
||||
helpers.create_table(client, database_name, table_name, table_input)
|
||||
|
||||
exc.exception.response['Error']['Code'].should.equal('TableAlreadyExistsException')
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_get_tables():
|
||||
client = boto3.client('glue', region_name='us-east-1')
|
||||
database_name = 'myspecialdatabase'
|
||||
helpers.create_database(client, database_name)
|
||||
|
||||
table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable']
|
||||
table_inputs = {}
|
||||
|
||||
for table_name in table_names:
|
||||
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
|
||||
database_name=database_name,
|
||||
table_name=table_name
|
||||
)
|
||||
table_input = helpers.create_table_input(table_name, s3_location)
|
||||
table_inputs[table_name] = table_input
|
||||
helpers.create_table(client, database_name, table_name, table_input)
|
||||
|
||||
response = helpers.get_tables(client, database_name)
|
||||
|
||||
tables = response['TableList']
|
||||
|
||||
assert len(tables) == 3
|
||||
|
||||
for table in tables:
|
||||
table_name = table['Name']
|
||||
table_name.should.equal(table_inputs[table_name]['Name'])
|
||||
table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor'])
|
||||
table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys'])
|
@ -101,6 +101,6 @@ def test_s3_default_storage_class():
|
||||
|
||||
# tests that the default storage class is still STANDARD
|
||||
list_of_objects["Contents"][0]["StorageClass"].should.equal("STANDARD")
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ def test_force_ignore_subdomain_for_bucketnames():
|
||||
os.environ['S3_IGNORE_SUBDOMAIN_BUCKETNAME'] = '1'
|
||||
expect(bucket_name_from_url('https://subdomain.localhost:5000/abc/resource')).should.equal(None)
|
||||
del(os.environ['S3_IGNORE_SUBDOMAIN_BUCKETNAME'])
|
||||
|
||||
|
||||
|
||||
|
||||
def test_versioned_key_store():
|
||||
|
Loading…
Reference in New Issue
Block a user