Glue: support nanosecond-precision timestamp partition filtering (#5915)
This commit is contained in:
parent
c6c0e50ee9
commit
c9fe32520d
@ -173,7 +173,6 @@ class GlueBackend(BaseBackend):
|
||||
Expression caveats:
|
||||
|
||||
- Column names must consist of UPPERCASE, lowercase, dots and underscores only.
|
||||
- Nanosecond expressions on timestamp columns are rounded to microseconds.
|
||||
- Literal dates and timestamps must be valid, i.e. no support for February 31st.
|
||||
- LIKE expressions are converted to Python regexes, escaping special characters.
|
||||
Only % and _ wildcards are supported, and SQL escaping using [] does not work.
|
||||
|
@ -2,7 +2,7 @@ import abc
|
||||
import operator
|
||||
import re
|
||||
import warnings
|
||||
from datetime import date, datetime, timedelta
|
||||
from datetime import date, datetime
|
||||
from itertools import repeat
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
@ -74,15 +74,17 @@ def _cast(type_: str, value: Any) -> Union[date, datetime, float, int, str]:
|
||||
f" {value} is not a timestamp."
|
||||
)
|
||||
|
||||
# use nanosecond representation for timestamps
|
||||
posix_nanoseconds = int(timestamp.timestamp() * 1_000_000_000)
|
||||
|
||||
nanos = match.group("nanos")
|
||||
if nanos is not None:
|
||||
# strip leading dot, reverse and left pad with zeros to nanoseconds
|
||||
nanos = "".join(reversed(nanos[1:])).zfill(9)
|
||||
for i, nanoseconds in enumerate(nanos):
|
||||
microseconds = (int(nanoseconds) * 10**i) / 1000
|
||||
timestamp += timedelta(microseconds=round(microseconds))
|
||||
posix_nanoseconds += int(nanoseconds) * 10**i
|
||||
|
||||
return timestamp
|
||||
return posix_nanoseconds
|
||||
|
||||
raise InvalidInputException("GetPartitions", f"Unknown type : '{type_}'")
|
||||
|
||||
|
@ -288,11 +288,10 @@ def test_get_partitions_expression_timestamp_column():
|
||||
"timestamp_col between '2022-01-15 00:00:00' AND '2022-02-15 00:00:00'",
|
||||
"timestamp_col > '2022-01-15 00:00:00' AND "
|
||||
"timestamp_col < '2022-02-15 00:00:00'",
|
||||
# these expressions only work because of rounding to microseconds
|
||||
"timestamp_col = '2022-01-31 23:59:59.999999999'",
|
||||
"timestamp_col = '2022-02-01 00:00:00.00000001'",
|
||||
"timestamp_col > '2022-01-31 23:59:59.999999499' AND"
|
||||
" timestamp_col < '2022-02-01 00:00:00.0000009'",
|
||||
"timestamp_col > '2022-01-31 23:59:59.999999999' AND"
|
||||
" timestamp_col < '2022-02-01 00:00:00.000000001'",
|
||||
)
|
||||
|
||||
for expression in timestamp_col_is_february_expressions:
|
||||
|
Loading…
Reference in New Issue
Block a user