Skip to content
Draft
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
978e672
add h5json package
jreadey Apr 14, 2025
abb5d0c
temp use of github branch for h5json ref
jreadey Apr 14, 2025
ed44afa
remove array_util test
jreadey Apr 14, 2025
bdff6e4
use h5json for ndarray_compare function
jreadey Apr 14, 2025
3904cf9
use h5json objid funcs
jreadey Apr 23, 2025
e1926c0
add nodeUtil.py
jreadey Apr 23, 2025
ae4579f
fix parameter for createObjId call
jreadey Apr 23, 2025
d6cad74
fix collection name for use with h5json
jreadey Apr 23, 2025
6add48a
use connsistent collection name for isValidUuid
jreadey Apr 23, 2025
b13321c
fix flake8 format errors
jreadey Apr 23, 2025
fee9390
fix flake8 error in testall
jreadey Apr 23, 2025
f1b1cab
use h5json for unit test id
jreadey Apr 23, 2025
5dc3f76
restrict version on numcodecs
jreadey Apr 24, 2025
fb17e10
allow client to generate obj ids
jreadey Apr 30, 2025
3be18a0
enable attributes to be included with POST req
jreadey May 7, 2025
00d7c96
add create timestamps for attributes in obj create
jreadey May 7, 2025
47b9a6e
enable links to be initialized in post groups
jreadey May 7, 2025
d9c3e87
support dataset value init in post request
jreadey May 8, 2025
4ab24fc
add compound init value test
jreadey May 9, 2025
fc3ad68
added post data with compound data initializer
jreadey May 9, 2025
8a18945
add post_crawler class
jreadey May 15, 2025
a8ec66d
avoid exception for mkdir race condition
jreadey May 15, 2025
41e23e9
use domain crawler to create links for post group multi
jreadey May 15, 2025
7cfa3d6
added multi create for datatype objs
jreadey May 16, 2025
ef746d0
added datatype test with no type in body
jreadey May 18, 2025
b1af9bc
modularize dataset creation args processing
jreadey May 20, 2025
52f42f3
refacotr post dataset args to service_lib.py
jreadey May 21, 2025
ce45804
add multi-dataset test with init data
jreadey May 21, 2025
88e0691
allow client group id for PUT domain
jreadey Jun 6, 2025
7561534
fix np.frombuffer error
jreadey Jun 8, 2025
25c4cf3
fix dsetUtil flake errors
jreadey Jun 8, 2025
5cc77e7
expanded link test
jreadey Jul 14, 2025
45f3aa5
added config to test high latency storage
jreadey Jul 16, 2025
ff1c043
added put_data action for DomainCrawler
jreadey Jul 22, 2025
cda56cf
fix for hang in DomainCrawler put_data handler
jreadey Jul 23, 2025
5a2d4d6
reduce log verbosity
jreadey Jul 23, 2025
053395c
fix for regression with h5pyd master branch
jreadey Jul 29, 2025
78127f1
enable client-based timestamps for attribute and link creation
jreadey Sep 8, 2025
f96b34c
remove python 3.9 from .git workflow
jreadey Sep 9, 2025
03e413f
adjust min time for time skew test
jreadey Sep 9, 2025
b6016e0
use hdf5-json util classes
jreadey Oct 29, 2025
61d38fd
update requirement.txt
jreadey Nov 13, 2025
73d8223
updates to support h5json latest
joshStillerman Dec 16, 2025
a2ca1ee
updated for new hdf5-json methods
jreadey Dec 26, 2025
55c8598
update for h5json changes
jreadey Jan 4, 2026
77042d8
added consolidated metadata support
jreadey Jan 6, 2026
23bb24b
fix for use of H5S_UNLIMITED in maxdims
jreadey Jan 6, 2026
c66d632
fix for domain_test
jreadey Jan 6, 2026
6917c5d
refactor linkUtil with h5json
jreadey Jan 8, 2026
99fda2d
fix for attr uninit values
jreadey Feb 3, 2026
2bafb51
check for zero-length domain name
jreadey Feb 11, 2026
b4f4658
tmp ref to github for h5json
jreadey Mar 25, 2026
179ee10
fixed ref to timeUtil
jreadey Mar 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions admin/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ allow_any_bucket_read: true # enable reads to buckets other than default bucket
allow_any_bucket_write: true # enable writes to buckets other than default bucket
bit_shuffle_default_blocksize: 2048 # default blocksize for bitshuffle filter
max_rangeget_gap: 1024 # max gap in byte for intelligent range get requests
predate_maxtime: 10.0 # max delta between object created timestamp in request and actual time
# DEPRECATED - the remaining config values are not used in currently but kept for backward compatibility with older container images
aws_lambda_chunkread_function: null # name of aws lambda function for chunk reading
aws_lambda_threshold: 4 # number of chunks per node per request to reach before using lambda
Expand Down
10 changes: 6 additions & 4 deletions hsds/async_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
from aiohttp.client_exceptions import ClientError
from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
from aiohttp.web_exceptions import HTTPForbidden
from .util.idUtil import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
from .util.idUtil import getObjId, isValidChunkId, getCollectionForId
from h5json.hdf5dtype import getItemSize
from h5json.hdf5dtype import createDataType
from h5json.array_util import getNumElements, bytesToArray
from h5json.objid import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
from h5json.objid import getObjId, isValidChunkId, getCollectionForId

from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
from .util.hdf5dtype import getItemSize, createDataType
from .util.arrayUtil import getNumElements, bytesToArray
from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims, getFilters
from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout, getShapeDims
from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
Expand Down
7 changes: 4 additions & 3 deletions hsds/attr_dn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
from aiohttp.web_exceptions import HTTPInternalServerError
from aiohttp.web import json_response

from h5json.hdf5dtype import getItemSize, createDataType
from h5json.array_util import arrayToBytes, jsonToArray, decodeData
from h5json.array_util import bytesToArray, bytesArrayToList, getNumElements

from .util.attrUtil import validateAttributeName, isEqualAttr
from .util.hdf5dtype import getItemSize, createDataType
from .util.globparser import globmatch
from .util.dsetUtil import getShapeDims
from .util.arrayUtil import arrayToBytes, jsonToArray, decodeData
from .util.arrayUtil import bytesToArray, bytesArrayToList, getNumElements
from .util.domainUtil import isValidBucketName
from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
from . import hsds_logger as log
Expand Down
267 changes: 15 additions & 252 deletions hsds/attr_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@
from aiohttp.web import StreamResponse
from json import JSONDecodeError

from h5json.hdf5dtype import createDataType, getItemSize
from h5json.array_util import jsonToArray, getNumElements
from h5json.array_util import bytesToArray, arrayToBytes, decodeData, encodeData
from h5json.objid import isValidUuid

from .util.httpUtil import getAcceptType, jsonResponse, getHref, getBooleanParam
from .util.globparser import globmatch
from .util.idUtil import isValidUuid, getRootObjId
from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
from .util.domainUtil import getDomainFromRequest, isValidDomain
from .util.domainUtil import getBucketForDomain, verifyRoot
from .util.attrUtil import validateAttributeName, getRequestCollectionName
from .util.hdf5dtype import validateTypeItem, getBaseTypeJson
from .util.hdf5dtype import createDataType, getItemSize
from .util.arrayUtil import jsonToArray, getNumElements, bytesArrayToList
from .util.arrayUtil import bytesToArray, arrayToBytes, decodeData, encodeData
from .util.dsetUtil import getShapeDims

from .servicenode_lib import getDomainJson, getObjectJson, validateAction
from .servicenode_lib import getAttributes, putAttributes, deleteAttributes
from .servicenode_lib import getDomainJson, getAttributeFromRequest, getAttributesFromRequest
from .servicenode_lib import getAttributes, putAttributes, deleteAttributes, validateAction
from .domain_crawl import DomainCrawler
from . import hsds_logger as log
from . import config
Expand Down Expand Up @@ -295,244 +295,6 @@ async def GET_Attribute(request):
return resp


async def _getTypeFromRequest(app, body, obj_id=None, bucket=None):
""" return a type json from the request body """
if "type" not in body:
msg = "PUT attribute with no type in body"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
datatype = body["type"]

if isinstance(datatype, str) and datatype.startswith("t-"):
# Committed type - fetch type json from DN
ctype_id = datatype
log.debug(f"got ctypeid: {ctype_id}")
ctype_json = await getObjectJson(app, ctype_id, bucket=bucket)
log.debug(f"ctype {ctype_id}: {ctype_json}")
root_id = getRootObjId(obj_id)
if ctype_json["root"] != root_id:
msg = "Referenced committed datatype must belong in same domain"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
datatype = ctype_json["type"]
# add the ctype_id to the type
datatype["id"] = ctype_id
elif isinstance(datatype, str):
try:
# convert predefined type string (e.g. "H5T_STD_I32LE") to
# corresponding json representation
datatype = getBaseTypeJson(datatype)
except TypeError:
msg = "PUT attribute with invalid predefined type"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

try:
validateTypeItem(datatype)
except KeyError as ke:
msg = f"KeyError creating type: {ke}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
except TypeError as te:
msg = f"TypeError creating type: {te}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
except ValueError as ve:
msg = f"ValueError creating type: {ve}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

return datatype


def _getShapeFromRequest(body):
""" get shape json from request body """
shape_json = {}
if "shape" in body:
shape_body = body["shape"]
shape_class = None
if isinstance(shape_body, dict) and "class" in shape_body:
shape_class = shape_body["class"]
elif isinstance(shape_body, str):
shape_class = shape_body
if shape_class:
if shape_class == "H5S_NULL":
shape_json["class"] = "H5S_NULL"
if isinstance(shape_body, dict) and "dims" in shape_body:
msg = "can't include dims with null shape"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
if isinstance(shape_body, dict) and "value" in body:
msg = "can't have H5S_NULL shape with value"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
elif shape_class == "H5S_SCALAR":
shape_json["class"] = "H5S_SCALAR"
dims = getShapeDims(shape_body)
if len(dims) != 1 or dims[0] != 1:
msg = "dimensions aren't valid for scalar attribute"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
elif shape_class == "H5S_SIMPLE":
shape_json["class"] = "H5S_SIMPLE"
dims = getShapeDims(shape_body)
shape_json["dims"] = dims
else:
msg = f"Unknown shape class: {shape_class}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
else:
# no class, interpet shape value as dimensions and
# use H5S_SIMPLE as class
if isinstance(shape_body, list) and len(shape_body) == 0:
shape_json["class"] = "H5S_SCALAR"
else:
shape_json["class"] = "H5S_SIMPLE"
dims = getShapeDims(shape_body)
shape_json["dims"] = dims
else:
shape_json["class"] = "H5S_SCALAR"

return shape_json


def _getValueFromRequest(body, data_type, data_shape):
""" Get attribute value from request json """
dims = getShapeDims(data_shape)
if "value" in body:
if dims is None:
msg = "Bad Request: data can not be included with H5S_NULL space"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
value = body["value"]
# validate that the value agrees with type/shape
arr_dtype = createDataType(data_type) # np datatype
if len(dims) == 0:
np_dims = [1, ]
else:
np_dims = dims

if body.get("encoding"):
item_size = getItemSize(data_type)
if item_size == "H5T_VARIABLE":
msg = "base64 encoding is not support for variable length attributes"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
try:
data = decodeData(value)
except ValueError:
msg = "unable to decode data"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

expected_numbytes = arr_dtype.itemsize * np.prod(dims)
if len(data) != expected_numbytes:
msg = f"expected: {expected_numbytes} but got: {len(data)}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

# check to see if this works with our shape and type
try:
arr = bytesToArray(data, arr_dtype, np_dims)
except ValueError as e:
log.debug(f"data: {data}")
log.debug(f"type: {arr_dtype}")
log.debug(f"np_dims: {np_dims}")
msg = f"Bad Request: encoded input data doesn't match shape and type: {e}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

value_json = None
# now try converting to JSON
list_data = arr.tolist()
try:
value_json = bytesArrayToList(list_data)
except ValueError as err:
msg = f"Cannot decode bytes to list: {err}, will store as encoded bytes"
log.warn(msg)
if value_json:
log.debug("will store base64 input as json")
if data_shape["class"] == "H5S_SCALAR":
# just use the scalar value
value = value_json[0]
else:
value = value_json # return this
else:
value = data # return bytes to signal that this needs to be encoded
else:
# verify that the input data matches the array shape and type
try:
jsonToArray(np_dims, arr_dtype, value)
except ValueError as e:
msg = f"Bad Request: input data doesn't match selection: {e}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
else:
value = None

return value


async def _getAttributeFromRequest(app, req_json, obj_id=None, bucket=None):
""" return attribute from given request json """
attr_item = {}
attr_type = await _getTypeFromRequest(app, req_json, obj_id=obj_id, bucket=bucket)
attr_shape = _getShapeFromRequest(req_json)
attr_item = {"type": attr_type, "shape": attr_shape}
attr_value = _getValueFromRequest(req_json, attr_type, attr_shape)
if attr_value is not None:
if isinstance(attr_value, bytes):
attr_value = encodeData(attr_value) # store as base64
attr_item["encoding"] = "base64"
else:
# just store the JSON dict or primitive value
attr_item["value"] = attr_value
else:
attr_item["value"] = None

return attr_item


async def _getAttributesFromRequest(request, req_json, obj_id=None, bucket=None):
""" read the given JSON dictinary and return dict of attribute json """

app = request.app
attr_items = {}
kwargs = {"obj_id": obj_id}
if bucket:
kwargs["bucket"] = bucket
if "attributes" in req_json:
attributes = req_json["attributes"]
if not isinstance(attributes, dict):
msg = f"expected list for attributes but got: {type(attributes)}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
# read each attr_item and canonicalize the shape, type, verify value
for attr_name in attributes:
attr_json = attributes[attr_name]
attr_item = await _getAttributeFromRequest(app, attr_json, **kwargs)
attr_items[attr_name] = attr_item

elif "type" in req_json:
# single attribute create - fake an item list
attr_item = await _getAttributeFromRequest(app, req_json, **kwargs)
if "name" in req_json:
attr_name = req_json["name"]
else:
attr_name = request.match_info.get("name")
validateAttributeName(attr_name)
if not attr_name:
msg = "Missing attribute name"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

attr_items[attr_name] = attr_item
else:
log.debug(f"_getAttributes from request - no attribute defined in {req_json}")

return attr_items


async def PUT_Attribute(request):
"""HTTP method to create a new attribute"""
log.request(request)
Expand All @@ -555,7 +317,7 @@ async def PUT_Attribute(request):
log.debug(f"Attribute name: [{attr_name}]")
validateAttributeName(attr_name)

log.info(f"PUT Attributes id: {req_obj_id} name: {attr_name}")
log.info(f"PUT Attribute id: {req_obj_id} name: {attr_name}")
username, pswd = getUserPasswordFromRequest(request)
# write actions need auth
await validateUserPassword(app, username, pswd)
Expand Down Expand Up @@ -587,7 +349,7 @@ async def PUT_Attribute(request):

# get attribute from request body
kwargs = {"bucket": bucket, "obj_id": req_obj_id}
attr_body = await _getAttributeFromRequest(app, body, **kwargs)
attr_body = await getAttributeFromRequest(app, body, **kwargs)

# write attribute to DN
attr_json = {attr_name: attr_body}
Expand Down Expand Up @@ -624,7 +386,7 @@ async def PUT_Attributes(request):
await validateUserPassword(app, username, pswd)

if not request.has_body:
msg = "PUT Attribute with no body"
msg = "PUT Attributes with no body"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
try:
Expand Down Expand Up @@ -654,10 +416,10 @@ async def PUT_Attributes(request):
if not req_obj_id:
req_obj_id = domain_json["root"]
kwargs = {"obj_id": req_obj_id, "bucket": bucket}
attr_items = await _getAttributesFromRequest(request, body, **kwargs)
attr_items = await getAttributesFromRequest(app, body, **kwargs)

if attr_items:
log.debug(f"PUT Attribute {len(attr_items)} attibutes to add")
log.debug(f"PUT Attribute {len(attr_items)} attributes to add")
else:
log.debug("no attributes defined yet")

Expand All @@ -666,6 +428,7 @@ async def PUT_Attributes(request):
obj_ids = {}
if "obj_ids" in body:
body_ids = body["obj_ids"]

if isinstance(body_ids, list):
# multi cast the attributes - each attribute in attr-items
# will be written to each of the objects identified by obj_id
Expand All @@ -685,7 +448,7 @@ async def PUT_Attributes(request):
msg += f"{len(obj_ids)} objects"
log.info(msg)
elif isinstance(body_ids, dict):
# each value is body_ids is a set of attriutes to write to the object
# each value is body_ids is a set of attributes to write to the object
# unlike the above case, different attributes can be written to
# different objects
if attr_items:
Expand All @@ -701,7 +464,7 @@ async def PUT_Attributes(request):
id_json = body_ids[obj_id]

kwargs = {"obj_id": obj_id, "bucket": bucket}
obj_items = await _getAttributesFromRequest(request, id_json, **kwargs)
obj_items = await getAttributesFromRequest(app, id_json, **kwargs)
if obj_items:
obj_ids[obj_id] = obj_items

Expand Down
Loading
Loading