diff --git a/changelog/169.bugfix.rst b/changelog/169.bugfix.rst new file mode 100644 index 0000000..745f98e --- /dev/null +++ b/changelog/169.bugfix.rst @@ -0,0 +1 @@ +Fixed a bug in parsing of hexadecimal columns. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index c4bed38..201426e 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -91,7 +91,7 @@ JSOC time strings can be converted to a naive `~datetime.datetime` representatio 1 2016-04-01 06:00:00 2 2016-04-01 12:00:00 3 2016-04-01 18:00:00 - Name: T_REC, dtype: datetime64[ns] + Name: T_REC, dtype: datetime64[us] For most of the HMI and MDI data sets, the `TAI `__ time standard is used which, in contrast to `UTC `__, does not make use of any leap seconds. The TAI standard is currently not supported by the Python standard libraries. @@ -210,7 +210,7 @@ Note that :meth:`drms.client.Client.export` performs an ``url_quick`` / ``as-is` 1 /SUM41/D803708361/S00008/Dopplergram.fits 2 /SUM71/D803720859/S00008/Dopplergram.fits 3 /SUM70/D803730119/S00008/Dopplergram.fits - Name: filename, dtype: object + Name: filename, dtype: str Download URLs can now be generated using the :attr:`drms.client.ExportRequest.urls` attribute: @@ -225,7 +225,7 @@ The following, for example, only downloads the first file of the request: .. code-block:: python - >>> export_request.download(out_dir, index=0) # doctest: +REMOTE_DATA + >>> res = export_request.download(out_dir, index=0) # doctest: +REMOTE_DATA Being a direct ``as-is`` export, there are no keyword data written to any FITS headers. If you need keyword data added to the headers, you have to use the ``fits`` export protocol instead, which is described below. diff --git a/drms/client.py b/drms/client.py index 24e29b7..0ddd44a 100644 --- a/drms/client.py +++ b/drms/client.py @@ -11,6 +11,7 @@ import numpy as np import pandas as pd +from pandas.api.types import is_object_dtype, is_string_dtype from drms import logger from drms.utils import create_request_with_header @@ -614,10 +615,9 @@ def __init__(self, server="jsoc", *, email=None): def __repr__(self): return f"" - def _convert_numeric_keywords(self, ds, kdf, *, skip_conversion=None): - si = self.info(ds) - int_keys = list(si.keywords[si.keywords.is_integer].index) - num_keys = list(si.keywords[si.keywords.is_numeric].index) + def _convert_numeric_keywords(self, keywords, kdf, *, skip_conversion=None): + int_keys = list(keywords[keywords.is_integer].index) + num_keys = list(keywords[keywords.is_numeric].index) num_keys += ["*recnum*", "*sunum*", "*size*"] if skip_conversion is None: skip_conversion = [] @@ -630,11 +630,12 @@ def _convert_numeric_keywords(self, ds, kdf, *, skip_conversion=None): # we need a special treatment for integer strings that start # with '0x', like QUALITY. The following to_numeric call is # still necessary as the results are still Python objects. - if k in int_keys and kdf[k].dtype is np.dtype(object): - idx = kdf[k].str.startswith("0x") + if k in int_keys and (is_object_dtype(kdf[k]) or is_string_dtype(kdf[k])): + values = kdf[k].astype(str) + idx = values.str.startswith(("0x", "0X")) if idx.any(): - k_idx = kdf.columns.get_loc(k) - kdf.loc[idx, kdf.columns[k_idx]] = kdf.loc[idx, kdf.columns[k_idx]].apply(int, base=16) + kdf[k] = kdf[k].astype(object) + kdf.loc[idx, k] = values[idx].apply(int, base=16) if k in num_keys: kdf[k] = _pd_to_numeric_coerce(kdf[k]) @@ -1029,7 +1030,7 @@ def query( else: res_key = pd.DataFrame() if convert_numeric: - self._convert_numeric_keywords(ds, res_key, skip_conversion=skip_conversion) + self._convert_numeric_keywords(self.info(ds).keywords, res_key, skip_conversion=skip_conversion) res.append(res_key) if seg is not None: diff --git a/drms/tests/test_jsoc_query.py b/drms/tests/test_jsoc_query.py index 659cb25..e1463f0 100644 --- a/drms/tests/test_jsoc_query.py +++ b/drms/tests/test_jsoc_query.py @@ -1,3 +1,4 @@ +import pandas as pd import pytest import drms @@ -100,9 +101,20 @@ def test_query_invalid_series(jsoc_client): [ "hmi.v_45s[2014.01.01_00:00:35_TAI-2014.01.01_01:00:35_TAI]", "hmi.M_720s[2011.04.14_00:30:00_TAI/6h@2h]", + "aia.lev1_euv_12s[2014-01-01T00:00:01Z/365d@1d][335]", ], ) def test_query_hexadecimal_strings(query): # Exercise the part of client.py that deals with hexadecimal strings c = drms.Client() - c.query(query, key="**ALL**") + result = c.query(query, key=["T_REC", "QUALITY", "CRPIX1", "CRVAL1", "BUNIT"]) + assert pd.api.types.is_integer_dtype(result["QUALITY"]) + + +def test_query_quality_hex_decimal_conversion(): + c = drms.Client() + keywords = pd.DataFrame({"is_integer": [True], "is_numeric": [True]}, index=["QUALITY"]) + df = pd.DataFrame({"QUALITY": pd.Series(["0x00000000", "0x0000000A", "0X000000FF"], dtype="string")}) + c._convert_numeric_keywords(keywords, df) + assert df["QUALITY"].tolist() == [0, 10, 255] + assert pd.api.types.is_integer_dtype(df["QUALITY"]) diff --git a/drms/tests/test_to_datetime.py b/drms/tests/test_to_datetime.py index 5deea8f..7863f8a 100644 --- a/drms/tests/test_to_datetime.py +++ b/drms/tests/test_to_datetime.py @@ -82,7 +82,6 @@ def test_time_series(time_series, expected): ("2010.05.01_TAI", False), ("2010.05.01_00:00_TAI", False), ("", True), - ("1600", True), ("foo", True), ("2013.12.21_23:32:34_TAI", False), ] diff --git a/examples/plot_aia_lightcurve.py b/examples/plot_aia_lightcurve.py index dc23cf6..9505085 100644 --- a/examples/plot_aia_lightcurve.py +++ b/examples/plot_aia_lightcurve.py @@ -40,9 +40,10 @@ print("Querying series info...") series_info = client.info("aia.lev1_euv_12s") series_info_lev1 = client.info("aia.lev1") +linkinfo = series_info.keywords.linkinfo +missing_linkinfo = linkinfo.isna() for key in keys: - linkinfo = series_info.keywords.loc[key].linkinfo - if linkinfo is not None and linkinfo.startswith("lev1->"): + if not missing_linkinfo.loc[key] and linkinfo.loc[key].startswith("lev1->"): note_str = series_info_lev1.keywords.loc[key].note else: note_str = series_info.keywords.loc[key].note