diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 3441166bb33f..d0da0c87dcd9 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -96,7 +96,10 @@ impl<'a> AnyValueBuffer<'a> { #[cfg(feature = "dtype-datetime")] (Datetime(builder, _, _), AnyValue::Null) => builder.append_null(), #[cfg(feature = "dtype-datetime")] - (Datetime(builder, tu_l, _), AnyValue::Datetime(v, tu_r, _)) => { + ( + Datetime(builder, tu_l, _), + AnyValue::Datetime(v, tu_r, _) | AnyValue::DatetimeOwned(v, tu_r, _), + ) => { // we convert right tu to left tu // so we swap. let v = convert_time_units(v, tu_r, *tu_l); diff --git a/crates/polars-plan/src/plans/lit.rs b/crates/polars-plan/src/plans/lit.rs index f26e8bc318d0..095eb9e11b48 100644 --- a/crates/polars-plan/src/plans/lit.rs +++ b/crates/polars-plan/src/plans/lit.rs @@ -310,6 +310,10 @@ impl From> for LiteralValue { AnyValue::Date(v) => LiteralValue::Date(v), #[cfg(feature = "dtype-datetime")] AnyValue::Datetime(value, tu, tz) => LiteralValue::DateTime(value, tu, tz.cloned()), + #[cfg(feature = "dtype-datetime")] + AnyValue::DatetimeOwned(value, tu, tz) => { + LiteralValue::DateTime(value, tu, tz.as_ref().map(AsRef::as_ref).cloned()) + }, #[cfg(feature = "dtype-duration")] AnyValue::Duration(value, tu) => LiteralValue::Duration(value, tu), #[cfg(feature = "dtype-time")] diff --git a/crates/polars-python/src/conversion/any_value.rs b/crates/polars-python/src/conversion/any_value.rs index 4307801133b0..41bfeae1a521 100644 --- a/crates/polars-python/src/conversion/any_value.rs +++ b/crates/polars-python/src/conversion/any_value.rs @@ -1,4 +1,5 @@ use std::borrow::{Borrow, Cow}; +use std::sync::Arc; use chrono::{ DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike, @@ -253,31 +254,42 @@ pub(crate) fn py_object_to_any_value<'py>( let py = ob.py(); let tzinfo = ob.getattr(intern!(py, "tzinfo"))?; - let timestamp = if tzinfo.is_none() { + if tzinfo.is_none() { let datetime = ob.extract::()?; let delta = datetime - NaiveDateTime::UNIX_EPOCH; - delta.num_microseconds().unwrap() - } else if tzinfo.hasattr(intern!(py, "key"))? { + let timestamp = delta.num_microseconds().unwrap(); + return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None)); + } + + let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? { let datetime = ob.extract::>()?; + let tz = datetime.timezone().name().into(); if datetime.year() >= 2100 { // chrono-tz does not support dates after 2100 // https://github.com/chronotope/chrono-tz/issues/135 - pl_utils(py) - .bind(py) - .getattr(intern!(py, "datetime_to_int"))? - .call1((ob, intern!(py, "us")))? - .extract::()? + ( + pl_utils(py) + .bind(py) + .getattr(intern!(py, "datetime_to_int"))? + .call1((ob, intern!(py, "us")))? + .extract::()?, + tz, + ) } else { let delta = datetime.to_utc() - DateTime::UNIX_EPOCH; - delta.num_microseconds().unwrap() + (delta.num_microseconds().unwrap(), tz) } } else { let datetime = ob.extract::>()?; let delta = datetime.to_utc() - DateTime::UNIX_EPOCH; - delta.num_microseconds().unwrap() + (delta.num_microseconds().unwrap(), "UTC".into()) }; - Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None)) + Ok(AnyValue::DatetimeOwned( + timestamp, + TimeUnit::Microseconds, + Some(Arc::new(tz)), + )) } fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { diff --git a/crates/polars-python/src/conversion/datetime.rs b/crates/polars-python/src/conversion/datetime.rs index 1d6b36d87e98..2c7aea0306df 100644 --- a/crates/polars-python/src/conversion/datetime.rs +++ b/crates/polars-python/src/conversion/datetime.rs @@ -2,13 +2,15 @@ use std::str::FromStr; -use chrono::{DateTime, FixedOffset, NaiveDateTime, NaiveTime, TimeDelta, TimeZone as _}; +use chrono::{DateTime, Datelike, FixedOffset, NaiveDateTime, NaiveTime, TimeDelta, TimeZone as _}; use chrono_tz::Tz; use polars::datatypes::TimeUnit; use polars_core::datatypes::TimeZone; -use pyo3::{Bound, IntoPyObject, PyAny, PyResult, Python}; +use pyo3::types::PyAnyMethods; +use pyo3::{intern, Bound, IntoPyObject, PyAny, PyResult, Python}; use crate::error::PyPolarsErr; +use crate::py_modules::pl_utils; pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta { let (in_second, nano_multiplier) = match time_unit { @@ -46,8 +48,17 @@ pub fn datetime_to_py_object<'py>( if let Some(time_zone) = tz { if let Ok(tz) = Tz::from_str(time_zone) { let utc_datetime = DateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(v, tu); - let datetime = utc_datetime.with_timezone(&tz); - datetime.into_pyobject(py) + if utc_datetime.year() >= 2100 { + // chrono-tz does not support dates after 2100 + // https://github.com/chronotope/chrono-tz/issues/135 + pl_utils(py) + .bind(py) + .getattr(intern!(py, "to_py_datetime"))? + .call1((v, tu.to_ascii(), time_zone.as_str())) + } else { + let datetime = utc_datetime.with_timezone(&tz); + datetime.into_pyobject(py) + } } else if let Ok(tz) = FixedOffset::from_str(time_zone) { let naive_datetime = timestamp_to_naive_datetime(v, tu); let datetime = tz.from_utc_datetime(&naive_datetime); diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py index 1e7667fcef58..d7dacb01cc7f 100644 --- a/py-polars/polars/_utils/construction/dataframe.py +++ b/py-polars/polars/_utils/construction/dataframe.py @@ -34,7 +34,6 @@ from polars.datatypes import ( N_INFER_DEFAULT, Categorical, - Datetime, Enum, String, Struct, @@ -701,19 +700,6 @@ def _sequence_of_dict_to_pydf( if column_names else None ) - tz_overrides = { - column_name: Datetime("us", time_zone="UTC") - for column_name, first_value in first_element.items() - if ( - isinstance(first_value, datetime) - and hasattr(first_value, "tzinfo") - and first_value.tzinfo is not None - and column_name not in schema_overrides - and (schema is None or column_name not in schema) - ) - } - if tz_overrides: - schema_overrides = {**schema_overrides, **tz_overrides} pydf = PyDataFrame.from_dicts( data, diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index 010829b8a6a1..72419b139522 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -217,13 +217,10 @@ def sequence_to_pyseries( s = wrap_s(py_series).dt.cast_time_unit(time_unit) if (values_dtype == Date) & (dtype == Datetime): - result = s.cast(Datetime(time_unit or "us")) - if time_zone is not None: - result = result.dt.convert_time_zone(time_zone) - return result._s + s = s.cast(Datetime(time_unit or "us")) - if (dtype == Datetime) and (value.tzinfo is not None or time_zone is not None): - return s.dt.convert_time_zone(time_zone or "UTC")._s + if dtype == Datetime and time_zone is not None: + return s.dt.convert_time_zone(time_zone)._s return s._s elif ( diff --git a/py-polars/polars/_utils/construction/utils.py b/py-polars/polars/_utils/construction/utils.py index 417c7588e848..90c9c4248e9c 100644 --- a/py-polars/polars/_utils/construction/utils.py +++ b/py-polars/polars/_utils/construction/utils.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from functools import lru_cache from typing import TYPE_CHECKING, Any, Callable, get_type_hints @@ -36,20 +35,15 @@ def _get_annotations(obj: type) -> dict[str, Any]: return getattr(obj, "__annotations__", {}) -if sys.version_info >= (3, 10): - - def try_get_type_hints(obj: type) -> dict[str, Any]: - try: - # often the same as obj.__annotations__, but handles forward references - # encoded as string literals, adds Optional[t] if a default value equal - # to None is set and recursively replaces 'Annotated[T, ...]' with 'T'. - return get_type_hints(obj) - except TypeError: - # fallback on edge-cases (eg: InitVar inference on python 3.10). - return _get_annotations(obj) - -else: - try_get_type_hints = _get_annotations +def try_get_type_hints(obj: type) -> dict[str, Any]: + try: + # often the same as obj.__annotations__, but handles forward references + # encoded as string literals, adds Optional[t] if a default value equal + # to None is set and recursively replaces 'Annotated[T, ...]' with 'T'. + return get_type_hints(obj) + except TypeError: + # fallback on edge-cases (eg: InitVar inference on python 3.10). + return _get_annotations(obj) @lru_cache(64) diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py index f33e00d2a494..fd4603b0e53f 100644 --- a/py-polars/polars/datatypes/classes.py +++ b/py-polars/polars/datatypes/classes.py @@ -4,7 +4,7 @@ import enum from collections import OrderedDict from collections.abc import Mapping -from datetime import timezone +from datetime import tzinfo from inspect import isclass from typing import TYPE_CHECKING, Any @@ -475,7 +475,7 @@ class Datetime(TemporalType): time_zone: str | None def __init__( - self, time_unit: TimeUnit = "us", time_zone: str | timezone | None = None + self, time_unit: TimeUnit = "us", time_zone: str | tzinfo | None = None ) -> None: if time_unit not in ("ms", "us", "ns"): msg = ( @@ -484,7 +484,7 @@ def __init__( ) raise ValueError(msg) - if isinstance(time_zone, timezone): + if isinstance(time_zone, tzinfo): time_zone = str(time_zone) self.time_unit = time_unit diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index c562167e8829..ce68349cfc61 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -936,7 +936,7 @@ def test_init_1d_sequence() -> None: [datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))], schema={"ts": pl.Datetime("ms")}, ) - assert df.schema == {"ts": pl.Datetime("ms", "UTC")} + assert df.schema == {"ts": pl.Datetime("ms", "Asia/Kathmandu")} def test_init_pandas(monkeypatch: Any) -> None: @@ -1812,7 +1812,28 @@ def test_init_list_of_dicts_with_timezone(tz: Any) -> None: expected = pl.DataFrame({"dt": [dt, dt]}) assert_frame_equal(df, expected) - assert df.schema == {"dt": pl.Datetime("us", time_zone=tz and "UTC")} + assert df.schema == {"dt": pl.Datetime("us", time_zone=tz)} + + +@pytest.mark.parametrize( + "tz", + [ + None, + ZoneInfo("Asia/Tokyo"), + ZoneInfo("Europe/Amsterdam"), + ZoneInfo("UTC"), + timezone.utc, + ], +) +def test_init_list_of_nested_dicts_with_timezone(tz: Any) -> None: + dt = datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=tz) + data = [{"timestamp": {"content": datetime(2021, 1, 1, 0, 0, tzinfo=tz)}}] + + df = pl.DataFrame(data).unnest("timestamp") + expected = pl.DataFrame({"content": [dt]}) + assert_frame_equal(df, expected) + + assert df.schema == {"content": pl.Datetime("us", time_zone=tz)} def test_init_from_subclassed_types() -> None: diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 1c0245ac1e10..2036c89094ab 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -1366,7 +1366,7 @@ def test_tz_datetime_duration_arithm_5221() -> None: def test_auto_infer_time_zone() -> None: dt = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai")) s = pl.Series([dt]) - assert s.dtype == pl.Datetime("us", "UTC") + assert s.dtype == pl.Datetime("us", "Asia/Shanghai") assert s[0] == dt diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 72c20fd1ee46..8bbf5ac917d9 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -120,8 +120,12 @@ def test_datetime_range_lazy_time_zones() -> None: ) expected = pl.DataFrame( { - "start": [datetime(2019, 12, 31, 18, 15, tzinfo=ZoneInfo(key="UTC"))], - "stop": [datetime(2020, 1, 1, 18, 15, tzinfo=ZoneInfo(key="UTC"))], + "start": [ + datetime(2020, 1, 1, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu")) + ], + "stop": [ + datetime(2020, 1, 2, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu")) + ], "literal": [ datetime(2020, 1, 1, 6, 15, tzinfo=ZoneInfo(key="Pacific/Tarawa")) ], @@ -598,7 +602,7 @@ def test_datetime_range_fast_slow_paths( unit: str, start: datetime, ) -> None: - end = pl.select(pl.lit(start).dt.offset_by(f"{n*size}{unit}")).item() + end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).item() result_slow = pl.datetime_range( start, end,