Skip to content
Open
Show file tree
Hide file tree
Changes from 86 commits
Commits
Show all changes
119 commits
Select commit Hold shift + click to select a range
be22083
added objid functions
jreadey Feb 4, 2025
28dcfc6
fix flake8 errors
jreadey Feb 4, 2025
54c83d5
merge hsds hdf5dtype changes
jreadey Feb 8, 2025
3a2b084
patch flake8 error
jreadey Feb 8, 2025
133e962
patch flake8 error
jreadey Feb 8, 2025
856ee65
keep backward compatibility for enum members key
jreadey Feb 9, 2025
eec4efc
first pass at abstrct db class
jreadey Feb 12, 2025
2f546b9
first pass at h5py reader
jreadey Feb 18, 2025
4b9cb68
added h5json_writer
jreadey Feb 23, 2025
bad4012
create reader and writer packages
jreadey Feb 23, 2025
c5c28a4
basic dataset read/write methods added
jreadey Feb 26, 2025
c0a6cc3
update h5tojson script
jreadey Feb 26, 2025
48d43e4
added h5json read
jreadey Feb 27, 2025
06b5a6f
added h5py writer
jreadey Feb 27, 2025
8fceb5f
added filters.py
jreadey Feb 27, 2025
af4d46a
updates for h5py_writer to write dataset values
jreadey Mar 4, 2025
7c393b6
revert to using members for dtype enums
jreadey Mar 5, 2025
825fc89
add support for reference types
jreadey Mar 7, 2025
88fa1eb
support for h5py and json readers and writers
jreadey Mar 25, 2025
541b966
fix for vlen encoding
jreadey Mar 28, 2025
398e2d3
fix for reference types
jreadey Apr 2, 2025
9978c45
fix flake8 errors
jreadey Apr 2, 2025
436d921
fix flake8 error
jreadey Apr 2, 2025
51063f6
update testall script
jreadey Apr 2, 2025
d14599a
fix flake8 error
jreadey Apr 2, 2025
e4be33c
make tmp dir in testall
jreadey Apr 2, 2025
8af6508
fix for h5json writer on windows
jreadey Apr 2, 2025
d519d8b
require python >= 3.9
jreadey Apr 2, 2025
4169d5c
remove redundant stripId function
jreadey Apr 3, 2025
7840ca4
add test for incremental updates
jreadey Apr 3, 2025
deb501f
fix flake8 errors
jreadey Apr 3, 2025
1bf10b1
added dset writes to h5py_writer test
jreadey Apr 3, 2025
bfd6cdd
fix for array types
jreadey Apr 10, 2025
d1e2b39
fix for scalar json to arr conversion
jreadey Apr 16, 2025
c6d77f8
support jsontoarray for all byte strings
jreadey Apr 17, 2025
cb3419a
fix errors in jsonToArray function
jreadey Apr 18, 2025
1513334
added extra jsonToArray test
jreadey Apr 21, 2025
289bacb
support setting single element compounds with a list
jreadey Apr 21, 2025
135d88f
handle assigning sequence to multi-dim array
jreadey Apr 21, 2025
13ea473
clean up debug print messages
jreadey Apr 21, 2025
3b87203
fix jsonToArray for single element compoound values
jreadey May 9, 2025
ef390ec
resturcture soruce tree
jreadey May 30, 2025
8b42625
added proptype hsdsreader
jreadey May 30, 2025
638ab00
fix flake8 error
jreadey Jun 2, 2025
7e17e7b
added missing hsds_reaader files
jreadey Jun 3, 2025
e07bd81
fix flake8 error
jreadey Jun 3, 2025
69baad8
fix import paths
jreadey Jun 3, 2025
66b5b15
use binary for dataset reads
jreadey Jun 3, 2025
0090d56
add hsds_writer class
jreadey Jun 10, 2025
9d59f8c
fix db re-open logic
jreadey Jun 11, 2025
4894e6d
support for h5py_writer
jreadey Jun 16, 2025
8324a46
fix jsontoh5 and h5tojson for new db interface
jreadey Jun 18, 2025
5c82129
update of hsds_writer
jreadey Jun 19, 2025
286f239
multi-update for hsds-writer
jreadey Jun 26, 2025
09c017a
reorg executables to apps dir
jreadey Jun 26, 2025
9773e2c
added h5tohs util
jreadey Jul 1, 2025
3d9003c
hsds writer updates
jreadey Jul 11, 2025
74d3a62
update datasetvalues for in init
jreadey Jul 14, 2025
985a842
set dataset values in create if possible
jreadey Jul 14, 2025
9d78d0c
hsdsreader test
jreadey Jul 26, 2025
4413e9b
added reader, writer stat method
jreadey Jul 31, 2025
5ee8b3e
fix for reopen db
jreadey Aug 3, 2025
67bf8e1
add more debug log messages
jreadey Aug 4, 2025
0a2c0af
move null reader, writer classes
jreadey Aug 5, 2025
0542e7d
fix for remove links
jreadey Aug 11, 2025
5bbb0f3
use DELTED to indicate link deletions
jreadey Aug 17, 2025
b05941f
persist attr deletion
jreadey Aug 17, 2025
e4e0105
fix key name for creationPropertyList
jreadey Sep 5, 2025
3260929
use client create time for new link if provided
jreadey Sep 9, 2025
1ea0bff
make reference type simple wrapper for str uuid
jreadey Sep 9, 2025
46ff5fa
fix syntax for ci yaml
jreadey Sep 9, 2025
e88f85f
remove python 3.9 support
jreadey Sep 9, 2025
51f2a9b
revert h5py_util.py
jreadey Sep 9, 2025
e7452ca
use uuid as representation of Reference type
jreadey Sep 9, 2025
5b6f33d
fix len ref in hsds_reader
jreadey Sep 11, 2025
8e6d14a
fix for reading unpersisted dataset values
jreadey Sep 11, 2025
5561767
fix for created and lastModified keys
jreadey Sep 11, 2025
924ee00
fix for scalar datasets
jreadey Sep 11, 2025
1f90429
move hsds plugins to h5pyd
jreadey Sep 12, 2025
c60e1c9
moved hsds reader/writer tests to h5pyd
jreadey Sep 12, 2025
29ae237
fix for getDatasetValues
jreadey Sep 16, 2025
b904ea5
fix for datasets with fillvalue
jreadey Sep 16, 2025
65b94c1
added dset_util functions
jreadey Sep 17, 2025
eb138bc
added filter functions
jreadey Sep 18, 2025
fec0a43
added more dset utility functions
jreadey Sep 18, 2025
5ab9b65
added shape_util.py
jreadey Oct 3, 2025
dcaf2fb
consolidate duplicate dsetutil funcs
jreadey Oct 17, 2025
e6357ff
for non chunked datasets return chunk dims as dset shape
jreadey Oct 17, 2025
b8c474f
add more tests for dset_util
jreadey Oct 17, 2025
0be82f2
add min/max param for guessChunk
jreadey Oct 23, 2025
fdb9ffa
added constant for valid layout classes
jreadey Oct 29, 2025
e84a072
update for create time
jreadey Dec 9, 2025
1f53fe0
add time_util.py
jreadey Dec 9, 2025
6a6f385
fix flake8 errors
jreadey Dec 9, 2025
632260b
revert getStats change
jreadey Dec 9, 2025
d957669
check for chunked for resiable dsets
jreadey Dec 11, 2025
978a548
added validateDatasetCreationProps
jreadey Dec 11, 2025
6cb136e
updated dset_util_test
jreadey Dec 11, 2025
678025b
added filter validation
jreadey Dec 15, 2025
b68e967
fix for getFilters
jreadey Dec 17, 2025
7ad35b7
updates for dataset reads/writes
jreadey Dec 22, 2025
8bb734e
update filter methods
jreadey Dec 24, 2025
e4aafaf
fix for lz4 filter opts
jreadey Dec 26, 2025
43133da
added filter test to testall
jreadey Dec 26, 2025
df8ce23
updates for resizable datasets
jreadey Dec 30, 2025
6f94e07
adjust dataset updates for resize
jreadey Jan 2, 2026
b4485eb
test broadcasting
jreadey Jan 2, 2026
bb4d148
added data limit option to json writer
jreadey Jan 2, 2026
db47efa
fix for H5S_UNLIMITED
jreadey Jan 6, 2026
3a2e6b2
added link_util file
jreadey Jan 8, 2026
c8f2aa3
fix circular import
jreadey Jan 14, 2026
40c4705
fix for vlen types
jreadey Jan 28, 2026
7295f6a
fix for str encoding
jreadey Feb 3, 2026
3a9e573
update for vlen dsets
jreadey Feb 3, 2026
6201152
log warning on link replacement
jreadey Feb 6, 2026
4aecf51
fix filtertest
jreadey Mar 13, 2026
4138276
vlen array fix
jreadey Mar 16, 2026
adafa90
check that db.setvalue has same rank as dataset
jreadey Mar 17, 2026
a7b1613
raise error when attempting to serialize object arrays
jreadey Mar 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}

steps:
Expand All @@ -25,23 +25,67 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest

- name: Lint with flake8
shell: bash
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --ignore=F401,W503,E203 --max-complexity=99 --max-line-length=127 --statistics

- name: Install h5json
shell: bash
run: |
pip install -e .

- name: Checkout HSDS
uses: actions/checkout@v4
with:
repository: HDFGroup/hsds
path: ${{github.workspace}}/hsds

- name: Install HSDS
working-directory: ${{github.workspace}}/hsds
shell: bash
run: |
pip install -e .

- name: Start HSDS
shell: bash
working-directory: ${{github.workspace}}/hsds
run: |
mkdir hsds_root
mkdir hsds_root/hsds_bucket
cp admin/config/groups.default admin/config/groups.txt
cp admin/config/passwd.default admin/config/passwd.txt
hsds --root_dir hsds_root --host localhost --port 5101 --password_file admin/config/passwd.txt --logfile hs.log --loglevel DEBUG --config_dir=admin/config --count=4 &

- name: Wait for node startup
shell: bash
run: |
sleep 30

- name: HSDS Setup
shell: bash
env:
ADMIN_PASSWORD: admin
ADMIN_USERNAME: admin
working-directory: ${{github.workspace}}/hsds
run: |
python tests/integ/setup_test.py

- name: Run tests
shell: bash
env:
HS_ENDPOINT: http://localhost:5101
HS_USERNAME: test_user1
HS_PASSWORD: test
run: |
python testall.py
Binary file modified data/hdf5/dset_creationprop.h5
Binary file not shown.
34 changes: 0 additions & 34 deletions data/json/nullspace_dset.json

This file was deleted.

23 changes: 11 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,35 @@ classifiers = [
"Topic :: Software Development :: Build Tools",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
authors = [{ "name" = "The HDF Group", "email" = "help@hdfgroup.org" }]
keywords = ["json", "hdf5", "multidimensional array", "data", "datacube"]
requires-python = ">=3.8"
requires-python = ">=3.9"
dependencies = [
"h5py >=3.10",
"h5py >= 3.10",
"numpy >= 2.0; python_version>='3.9'",
"jsonschema >=4.4.0",
"tomli; python_version<'3.11'",
"numpy >=1.20,<2.0.0; python_version=='3.8'",
]

dynamic = ["version"]

[project.urls]
Homepage = "https://hdf5-json.readthedocs.io"
Documentation = "https://hdf5-json.readthedocs.io"
Homepage = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
Documentation = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
Source = "https://github.com/HDFGroup/hdf5-json"
"Bug Reports" = "https://github.com/HDFGroup/hdf5-json/issues"
Social = "https://twitter.com/hdf5"
Discussion = "https://forum.hdfgroup.org"

[project.scripts]
h5tojson = "h5json.h5tojson.h5tojson:main"
jsontoh5 = "h5json.jsontoh5.jsontoh5:main"
h5jvalidate = "h5json.validator.validator:main"
h5tojson = "h5json.apps.h5tojson:main"
jsontoh5 = "h5json.apps.jsontoh5:main"
h5jvalidate = "h5json.apps.validator:main"

[project.optional-dependencies]
dev = ["check-manifest"]
Expand All @@ -52,10 +51,10 @@ build-backend = "setuptools.build_meta"
package-dir = { "" = "src" }
packages = [
"h5json",
"h5json.h5tojson",
"h5json.jsontoh5",
"h5json.jsonstore",
"h5json.h5pystore",
"h5json.schema",
"h5json.validator",
"h5json.apps",
]
package-data = { "h5json.schema" = ["*.schema.json"] }
platforms = ["any"]
Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[flake8]
max-line-length = 120
# E402: module level import not at top of file
# C901: too complex
# F401: unused exports are necessary in __init__.py
ignore = E402, C901, F401
8 changes: 8 additions & 0 deletions src/h5json/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
from .hdf5dtype import getTypeResponse
from .hdf5dtype import getItemSize
from .hdf5dtype import createDataType
from .objid import createObjId
from .objid import getCollectionForId
from .objid import isObjId
from .objid import isS3ObjKey
from .objid import getS3Key
from .objid import getObjId
from .objid import isSchema2Id
from .objid import isRootObjId
from .hdf5db import Hdf5db
from . import _version

Expand Down
File renamed without changes.
66 changes: 66 additions & 0 deletions src/h5json/apps/h5tohs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
##############################################################################
# Copyright by The HDF Group. #
# All rights reserved. #
# #
# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and #
# Utilities. The full HDF5 REST Server copyright notice, including #
# terms governing use, modification, and redistribution, is contained in #
# the file COPYING, which can be found at the root of the source code #
# distribution tree. If you do not have access to this file, you may #
# request a copy from help@hdfgroup.org. #
##############################################################################
import sys
import os.path as op
import logging

from h5json import Hdf5db
from h5json.hsdsstore.hsds_writer import HSDSWriter
from h5json.h5pystore.h5py_reader import H5pyReader


def usage():
print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file> <hsds_domain>")
sys.exit(0)


def main():
no_data = False
filename = None
domain = None
for i in range(1, len(sys.argv)):
if sys.argv[i] in ("-h", "--help"):
usage()
elif sys.argv[i] == "--nodata":
no_data = True
elif filename is None:
filename = sys.argv[i]
elif domain is None:
domain = sys.argv[i]
else:
usage()

if domain is None:
usage()

# create logger
logfname = "h5tohs.log"
loglevel = logging.DEBUG
logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
log = logging.getLogger()

# check that the input file exists
if not op.isfile(filename):
sys.exit(f"Cannot find file: {filename}")

log.info(f"h5tohs {filename}")

db = Hdf5db(app_logger=log)
db.writer = HSDSWriter(domain, no_data=no_data, app_logger=log)
db.reader = H5pyReader(filename, app_logger=log)
db.open() # read HDF5 data into db

db.close() # close will trigger write to HSDS


if __name__ == "__main__":
main()
54 changes: 54 additions & 0 deletions src/h5json/apps/h5tojson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
##############################################################################
# Copyright by The HDF Group. #
# All rights reserved. #
# #
# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and #
# Utilities. The full HDF5 REST Server copyright notice, including #
# terms governing use, modification, and redistribution, is contained in #
# the file COPYING, which can be found at the root of the source code #
# distribution tree. If you do not have access to this file, you may #
# request a copy from help@hdfgroup.org. #
##############################################################################
import sys
import os.path as op
import logging

from h5json import Hdf5db
from h5json.jsonstore.h5json_writer import H5JsonWriter
from h5json.h5pystore.h5py_reader import H5pyReader


def main():
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file>")
sys.exit(0)

no_data = False
filename = None
for i in range(1, len(sys.argv)):
if sys.argv[i] == "--nodata":
no_data = True
else:
filename = sys.argv[i]

# create logger
logfname = "h5tojson.log"
loglevel = logging.DEBUG
logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
log = logging.getLogger()

# check that the input file exists
if not op.isfile(filename):
sys.exit(f"Cannot find file: {filename}")

log.info(f"h5tojson {filename}")

db = Hdf5db(app_logger=log)
db.reader = H5pyReader(filename, app_logger=log)
db.writer = H5JsonWriter(None, no_data=no_data, app_logger=log)
db.open() # read HDF5 data into db
db.close() # close will trigger write to json file


if __name__ == "__main__":
main()
58 changes: 58 additions & 0 deletions src/h5json/apps/jsontoh5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
##############################################################################
# Copyright by The HDF Group. #
# All rights reserved. #
# #
# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and #
# Utilities. The full HDF5 REST Server copyright notice, including #
# terms governing use, modification, and redistribution, is contained in #
# the file COPYING, which can be found at the root of the source code #
# distribution tree. If you do not have access to this file, you may #
# request a copy from help@hdfgroup.org. #
##############################################################################
import sys
import os.path as op
import logging

from h5json import Hdf5db
from h5json.h5pystore.h5py_writer import H5pyWriter
from h5json.jsonstore.h5json_reader import H5JsonReader


def main():
if len(sys.argv) < 3 or sys.argv[1] in ("-h", "--help"):
print(f"usage: {sys.argv[0]} [-h] [--nodata] <json_file> <h5_file>")
sys.exit(0)

no_data = False
json_filename = None
hdf5_filename = None
for i in range(1, len(sys.argv)):
if sys.argv[i] == "--nodata":
no_data = True
elif not json_filename:
json_filename = sys.argv[i]
else:
hdf5_filename = sys.argv[i]

# create logger
logfname = "jsontoh5.log"
loglevel = logging.DEBUG
logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
log = logging.getLogger()

# check that the input file exists
if not op.isfile(json_filename):
sys.exit(f"Cannot find file: {json_filename}")

log.info(f"jsontoh5 {json_filename} to {hdf5_filename}")

db = Hdf5db(app_logger=log)
db.reader = H5JsonReader(json_filename, app_logger=log)
db.writer = H5pyWriter(hdf5_filename, no_data=no_data, app_logger=log)
db.open() # read json data
# close should create everything the json reader read to the output file
db.close()


if __name__ == "__main__":
main()
File renamed without changes.
Loading
Loading