mirror of
https://github.com/HDFGroup/hdf5.git
synced 2024-12-03 02:32:04 +08:00
Fix metadata cache bug when resizing a pinned/protected entry (v2) (#1463)
This commit is contained in:
parent
80954e8272
commit
8e0b427bf2
@ -922,15 +922,6 @@ New Features
|
||||
|
||||
* Improved coverage of regression testing for the feature
|
||||
|
||||
NOTE: we are aware of and working on a fix for an assertion failure that
|
||||
occurs during regression testing of the feature. It looks similar to:
|
||||
|
||||
hdf5/src/H5ACmpio.c:1089: H5AC__log_moved_entry: Assertion `!entry_dirty' failed.
|
||||
|
||||
so far, this assertion failure has only occurred during testing and has
|
||||
not been seen in any production uses, but until this bug is fixed the
|
||||
feature should be considered unstable.
|
||||
|
||||
(JTH - 2022/2/23)
|
||||
|
||||
Fortran Library:
|
||||
@ -1138,6 +1129,23 @@ Bug Fixes since HDF5-1.12.0 release
|
||||
===================================
|
||||
Library
|
||||
-------
|
||||
- Fixed a metadata cache bug when resizing a pinned/protected cache entry
|
||||
|
||||
When resizing a pinned/protected cache entry, the metadata
|
||||
cache code previously would wait until after resizing the
|
||||
entry to attempt to log the newly-dirtied entry. This would
|
||||
cause H5C_resize_entry to mark the entry as dirty and make
|
||||
H5AC_resize_entry think that it doesn't need to add the
|
||||
newly-dirtied entry to the dirty entries skiplist.
|
||||
|
||||
Thus, a subsequent H5AC__log_moved_entry would think it
|
||||
needs to allocate a new entry for insertion into the dirty
|
||||
entry skip list, since the entry doesn't exist on that list.
|
||||
This causes an assertion failure, as the code to allocate a
|
||||
new entry assumes that the entry is not dirty.
|
||||
|
||||
(JRM - 2022/02/28)
|
||||
|
||||
- Issue #1436 identified a problem with the H5_VERS_RELEASE check in the
|
||||
H5check_version function.
|
||||
|
||||
|
73
src/H5AC.c
73
src/H5AC.c
@ -1440,21 +1440,82 @@ H5AC_resize_entry(void *thing, size_t new_size)
|
||||
cache_ptr = entry_ptr->cache_ptr;
|
||||
HDassert(cache_ptr);
|
||||
|
||||
/* Resize the entry */
|
||||
if (H5C_resize_entry(thing, new_size) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
{
|
||||
/* Log the generation of dirty bytes of metadata iff:
|
||||
*
|
||||
* 1) The entry is clean on entry, and this resize will dirty it
|
||||
* (i.e. the current and new sizes are different), and
|
||||
*
|
||||
* 2) This is a parallel computation -- which it is if the aux_ptr
|
||||
* is non-null.
|
||||
*
|
||||
* A few points to note about this section of the code:
|
||||
*
|
||||
* 1) This call must occur before the call to H5C_resize_entry() since
|
||||
* H5AC__log_dirtied_entry() expects the target entry to be clean
|
||||
* on entry.
|
||||
*
|
||||
* 2) This code has some basic issues in terms of the number of bytes
|
||||
* added to the dirty bytes count.
|
||||
*
|
||||
* First, it adds the initial entry size to aux_ptr->dirty_bytes,
|
||||
* not the final size. Note that this code used to use the final
|
||||
* size, but code to support this has been removed from
|
||||
* H5AC__log_dirtied_entry() for reasons unknown since I wrote this
|
||||
* code.
|
||||
*
|
||||
* As long as all ranks do the same thing here, this probably doesn't
|
||||
* matter much, although it will delay initiation of sync points.
|
||||
*
|
||||
* A more interesting point is that this code will not increment
|
||||
* aux_ptr->dirty_bytes if a dirty entry is resized. At first glance
|
||||
* this seems major, as particularly with the older file formats,
|
||||
* resizes can be quite large. However, this is probably not an
|
||||
* issue either, since such resizes will be accompanied by large
|
||||
* amounts of dirty metadata creation in other areas -- which will
|
||||
* cause aux_ptr->dirty_bytes to be incremented.
|
||||
*
|
||||
* The bottom line is that this code is probably OK, but the above
|
||||
* points should be kept in mind.
|
||||
*
|
||||
* One final observation: This comment is occasioned by a bug caused
|
||||
* by moving the call to H5AC__log_dirtied_entry() after the call to
|
||||
* H5C_resize_entry(), and then only calling H5AC__log_dirtied_entry()
|
||||
* if entry_ptr->is_dirty was false.
|
||||
*
|
||||
* Since H5C_resize_entry() marks the target entry dirty unless there
|
||||
* is not change in size, this had the effect of not calling
|
||||
* H5AC__log_dirtied_entry() when it should be, and corrupting
|
||||
* the cleaned and dirtied lists used by rank 0 in the parallel
|
||||
* version of the metadata cache.
|
||||
*
|
||||
* The point here is that you should be very careful when working with
|
||||
* this code, and not modify it unless you fully understand it.
|
||||
*
|
||||
* JRM -- 2/28/22
|
||||
*/
|
||||
|
||||
if ((!entry_ptr->is_dirty) && (entry_ptr->size != new_size)) {
|
||||
|
||||
/* the entry is clean, and will be marked dirty in the resize
|
||||
* operation.
|
||||
*/
|
||||
H5AC_aux_t *aux_ptr;
|
||||
|
||||
aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
|
||||
if ((!entry_ptr->is_dirty) && (NULL != aux_ptr))
|
||||
|
||||
if (NULL != aux_ptr) {
|
||||
|
||||
if (H5AC__log_dirtied_entry(entry_ptr) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTMARKDIRTY, FAIL, "can't log dirtied entry")
|
||||
}
|
||||
}
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
||||
/* Resize the entry */
|
||||
if (H5C_resize_entry(thing, new_size) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")
|
||||
|
||||
done:
|
||||
/* If currently logging, generate a message */
|
||||
if (cache_ptr != NULL && cache_ptr->log_info != NULL)
|
||||
|
Loading…
Reference in New Issue
Block a user