mirror of
https://github.com/godotengine/godot.git
synced 2024-11-21 03:18:37 +08:00
Rewrite index optimization code for maximum efficiency
While all the previous fixes to optimizeVertexCache invocation fixed the vertex transform efficiency, the import code still was missing two crucial recommendations from meshoptimizer documentation: - All meshes should be optimized for vertex cache (this reorders vertices for maximum fetch efficiency) - When LODs are used with a shared vertex buffer, the vertex order should be generated by doing a vertex fetch optimization on the concatenated index buffer from coarse to fine LODs; this maximizes fetch efficiency for coarse LODs The last point is especially crucial for Mali GPUs; unlike other GPUs where vertex order affects fetch efficiency but not shading, these GPUs have various shading quirks (depending on the GPU generation) that really require consecutive index ranges for each LOD, which requires the second optimization mentioned above. However all of these also help desktop GPUs and other mobile GPUs as well. Because this optimization is "global" in the sense that it affects all LODs and all vertex arrays in concert, I've taken this opportunity to isolate all optimization code in this function and pull it out of generate_lods and create_shadow_mesh; this doesn't change the vertex cache efficiency, but makes the code cleaner. Consequently, optimize_indices should be called after other functions like create_shadow_mesh / generate_lods. This required exposing meshopt_optimizeVertexFetchRemap; as a drive-by, meshopt_simplifySloppy was never used so it's not exposed anymore - this will simplify future meshopt upgrades if they end up changing the function's interface.
This commit is contained in:
parent
1bffd6c73b
commit
260287b3a1
@ -535,8 +535,6 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
|
||||
}
|
||||
}
|
||||
|
||||
mesh->optimize_indices_for_cache();
|
||||
|
||||
if (p_generate_lods) {
|
||||
// Use normal merge/split angles that match the defaults used for 3D scene importing.
|
||||
mesh->generate_lods(60.0f, {});
|
||||
@ -546,6 +544,8 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
|
||||
mesh->create_shadow_mesh();
|
||||
}
|
||||
|
||||
mesh->optimize_indices();
|
||||
|
||||
if (p_single_mesh && mesh->get_surface_count() > 0) {
|
||||
r_meshes.push_back(mesh);
|
||||
}
|
||||
|
@ -2567,8 +2567,6 @@ Node *ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_
|
||||
}
|
||||
}
|
||||
|
||||
src_mesh_node->get_mesh()->optimize_indices_for_cache();
|
||||
|
||||
if (generate_lods) {
|
||||
Array skin_pose_transform_array = _get_skinned_pose_transforms(src_mesh_node);
|
||||
src_mesh_node->get_mesh()->generate_lods(merge_angle, skin_pose_transform_array);
|
||||
@ -2578,6 +2576,8 @@ Node *ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_
|
||||
src_mesh_node->get_mesh()->create_shadow_mesh();
|
||||
}
|
||||
|
||||
src_mesh_node->get_mesh()->optimize_indices();
|
||||
|
||||
if (!save_to_file.is_empty()) {
|
||||
Ref<Mesh> existing = ResourceCache::get_ref(save_to_file);
|
||||
if (existing.is_valid()) {
|
||||
|
@ -40,10 +40,10 @@ void initialize_meshoptimizer_module(ModuleInitializationLevel p_level) {
|
||||
}
|
||||
|
||||
SurfaceTool::optimize_vertex_cache_func = meshopt_optimizeVertexCache;
|
||||
SurfaceTool::optimize_vertex_fetch_remap_func = meshopt_optimizeVertexFetchRemap;
|
||||
SurfaceTool::simplify_func = meshopt_simplify;
|
||||
SurfaceTool::simplify_with_attrib_func = meshopt_simplifyWithAttributes;
|
||||
SurfaceTool::simplify_scale_func = meshopt_simplifyScale;
|
||||
SurfaceTool::simplify_sloppy_func = meshopt_simplifySloppy;
|
||||
SurfaceTool::generate_remap_func = meshopt_generateVertexRemap;
|
||||
SurfaceTool::remap_vertex_func = meshopt_remapVertexBuffer;
|
||||
SurfaceTool::remap_index_func = meshopt_remapIndexBuffer;
|
||||
@ -55,9 +55,9 @@ void uninitialize_meshoptimizer_module(ModuleInitializationLevel p_level) {
|
||||
}
|
||||
|
||||
SurfaceTool::optimize_vertex_cache_func = nullptr;
|
||||
SurfaceTool::optimize_vertex_fetch_remap_func = nullptr;
|
||||
SurfaceTool::simplify_func = nullptr;
|
||||
SurfaceTool::simplify_scale_func = nullptr;
|
||||
SurfaceTool::simplify_sloppy_func = nullptr;
|
||||
SurfaceTool::generate_remap_func = nullptr;
|
||||
SurfaceTool::remap_vertex_func = nullptr;
|
||||
SurfaceTool::remap_index_func = nullptr;
|
||||
|
@ -168,10 +168,56 @@ void ImporterMesh::set_surface_material(int p_surface, const Ref<Material> &p_ma
|
||||
mesh.unref();
|
||||
}
|
||||
|
||||
void ImporterMesh::optimize_indices_for_cache() {
|
||||
template <typename T>
|
||||
static Vector<T> _remap_array(Vector<T> p_array, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) {
|
||||
ERR_FAIL_COND_V(p_array.size() % p_remap.size() != 0, p_array);
|
||||
int num_elements = p_array.size() / p_remap.size();
|
||||
T *data = p_array.ptrw();
|
||||
SurfaceTool::remap_vertex_func(data, data, p_remap.size(), sizeof(T) * num_elements, p_remap.ptr());
|
||||
p_array.resize(p_vertex_count * num_elements);
|
||||
return p_array;
|
||||
}
|
||||
|
||||
static void _remap_arrays(Array &r_arrays, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) {
|
||||
for (int i = 0; i < r_arrays.size(); i++) {
|
||||
if (i == RS::ARRAY_INDEX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (r_arrays[i].get_type()) {
|
||||
case Variant::NIL:
|
||||
break;
|
||||
case Variant::PACKED_VECTOR3_ARRAY:
|
||||
r_arrays[i] = _remap_array<Vector3>(r_arrays[i], p_remap, p_vertex_count);
|
||||
break;
|
||||
case Variant::PACKED_VECTOR2_ARRAY:
|
||||
r_arrays[i] = _remap_array<Vector2>(r_arrays[i], p_remap, p_vertex_count);
|
||||
break;
|
||||
case Variant::PACKED_FLOAT32_ARRAY:
|
||||
r_arrays[i] = _remap_array<float>(r_arrays[i], p_remap, p_vertex_count);
|
||||
break;
|
||||
case Variant::PACKED_INT32_ARRAY:
|
||||
r_arrays[i] = _remap_array<int32_t>(r_arrays[i], p_remap, p_vertex_count);
|
||||
break;
|
||||
case Variant::PACKED_BYTE_ARRAY:
|
||||
r_arrays[i] = _remap_array<uint8_t>(r_arrays[i], p_remap, p_vertex_count);
|
||||
break;
|
||||
case Variant::PACKED_COLOR_ARRAY:
|
||||
r_arrays[i] = _remap_array<Color>(r_arrays[i], p_remap, p_vertex_count);
|
||||
break;
|
||||
default:
|
||||
ERR_FAIL_MSG("Unhandled array type.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ImporterMesh::optimize_indices() {
|
||||
if (!SurfaceTool::optimize_vertex_cache_func) {
|
||||
return;
|
||||
}
|
||||
if (!SurfaceTool::optimize_vertex_fetch_remap_func || !SurfaceTool::remap_vertex_func || !SurfaceTool::remap_index_func) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < surfaces.size(); i++) {
|
||||
if (surfaces[i].primitive != Mesh::PRIMITIVE_TRIANGLES) {
|
||||
@ -188,10 +234,48 @@ void ImporterMesh::optimize_indices_for_cache() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Optimize indices for vertex cache to establish final triangle order.
|
||||
int *indices_ptr = indices.ptrw();
|
||||
SurfaceTool::optimize_vertex_cache_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, vertex_count);
|
||||
|
||||
surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices;
|
||||
|
||||
for (int j = 0; j < surfaces[i].lods.size(); ++j) {
|
||||
Surface::LOD &lod = surfaces.write[i].lods.write[j];
|
||||
int *lod_indices_ptr = lod.indices.ptrw();
|
||||
SurfaceTool::optimize_vertex_cache_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), vertex_count);
|
||||
}
|
||||
|
||||
// Concatenate indices for all LODs in the order of coarse->fine; this establishes the effective order of vertices,
|
||||
// and is important to optimize for vertex fetch (all GPUs) and shading (Mali GPUs)
|
||||
PackedInt32Array merged_indices;
|
||||
for (int j = surfaces[i].lods.size() - 1; j >= 0; --j) {
|
||||
merged_indices.append_array(surfaces[i].lods[j].indices);
|
||||
}
|
||||
merged_indices.append_array(indices);
|
||||
|
||||
// Generate remap array that establishes optimal vertex order according to the order of indices above.
|
||||
Vector<uint32_t> remap;
|
||||
remap.resize(vertex_count);
|
||||
unsigned int new_vertex_count = SurfaceTool::optimize_vertex_fetch_remap_func(remap.ptrw(), (const unsigned int *)merged_indices.ptr(), merged_indices.size(), vertex_count);
|
||||
|
||||
// We need to remap all vertex and index arrays in lockstep according to the remap.
|
||||
SurfaceTool::remap_index_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, remap.ptr());
|
||||
surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices;
|
||||
|
||||
for (int j = 0; j < surfaces[i].lods.size(); ++j) {
|
||||
Surface::LOD &lod = surfaces.write[i].lods.write[j];
|
||||
int *lod_indices_ptr = lod.indices.ptrw();
|
||||
SurfaceTool::remap_index_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), remap.ptr());
|
||||
}
|
||||
|
||||
_remap_arrays(surfaces.write[i].arrays, remap, new_vertex_count);
|
||||
for (int j = 0; j < surfaces[i].blend_shape_data.size(); j++) {
|
||||
_remap_arrays(surfaces.write[i].blend_shape_data.write[j].arrays, remap, new_vertex_count);
|
||||
}
|
||||
}
|
||||
|
||||
if (shadow_mesh.is_valid()) {
|
||||
shadow_mesh->optimize_indices();
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,9 +299,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
|
||||
if (!SurfaceTool::simplify_with_attrib_func) {
|
||||
return;
|
||||
}
|
||||
if (!SurfaceTool::optimize_vertex_cache_func) {
|
||||
return;
|
||||
}
|
||||
|
||||
LocalVector<Transform3D> bone_transform_vector;
|
||||
for (int i = 0; i < p_bone_transform_array.size(); i++) {
|
||||
@ -431,12 +512,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
|
||||
}
|
||||
|
||||
surfaces.write[i].lods.sort_custom<Surface::LODComparator>();
|
||||
|
||||
for (int j = 0; j < surfaces.write[i].lods.size(); j++) {
|
||||
Surface::LOD &lod = surfaces.write[i].lods.write[j];
|
||||
unsigned int *lod_indices_ptr = (unsigned int *)lod.indices.ptrw();
|
||||
SurfaceTool::optimize_vertex_cache_func(lod_indices_ptr, lod_indices_ptr, lod.indices.size(), vertex_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -574,10 +649,6 @@ void ImporterMesh::create_shadow_mesh() {
|
||||
index_wptr[j] = vertex_remap[index];
|
||||
}
|
||||
|
||||
if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
|
||||
SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size());
|
||||
}
|
||||
|
||||
new_surface[RS::ARRAY_INDEX] = new_indices;
|
||||
|
||||
// Make sure the same LODs as the full version are used.
|
||||
@ -596,10 +667,6 @@ void ImporterMesh::create_shadow_mesh() {
|
||||
index_wptr[k] = vertex_remap[index];
|
||||
}
|
||||
|
||||
if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
|
||||
SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size());
|
||||
}
|
||||
|
||||
lods[surfaces[i].lods[j].distance] = new_indices;
|
||||
}
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ public:
|
||||
|
||||
void set_surface_material(int p_surface, const Ref<Material> &p_material);
|
||||
|
||||
void optimize_indices_for_cache();
|
||||
void optimize_indices();
|
||||
|
||||
void generate_lods(float p_normal_merge_angle, Array p_skin_pose_transform_array);
|
||||
|
||||
|
@ -33,10 +33,10 @@
|
||||
#define EQ_VERTEX_DIST 0.00001
|
||||
|
||||
SurfaceTool::OptimizeVertexCacheFunc SurfaceTool::optimize_vertex_cache_func = nullptr;
|
||||
SurfaceTool::OptimizeVertexFetchRemapFunc SurfaceTool::optimize_vertex_fetch_remap_func = nullptr;
|
||||
SurfaceTool::SimplifyFunc SurfaceTool::simplify_func = nullptr;
|
||||
SurfaceTool::SimplifyWithAttribFunc SurfaceTool::simplify_with_attrib_func = nullptr;
|
||||
SurfaceTool::SimplifyScaleFunc SurfaceTool::simplify_scale_func = nullptr;
|
||||
SurfaceTool::SimplifySloppyFunc SurfaceTool::simplify_sloppy_func = nullptr;
|
||||
SurfaceTool::GenerateRemapFunc SurfaceTool::generate_remap_func = nullptr;
|
||||
SurfaceTool::RemapVertexFunc SurfaceTool::remap_vertex_func = nullptr;
|
||||
SurfaceTool::RemapIndexFunc SurfaceTool::remap_index_func = nullptr;
|
||||
|
@ -90,14 +90,14 @@ public:
|
||||
|
||||
typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
|
||||
static OptimizeVertexCacheFunc optimize_vertex_cache_func;
|
||||
typedef size_t (*OptimizeVertexFetchRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
|
||||
static OptimizeVertexFetchRemapFunc optimize_vertex_fetch_remap_func;
|
||||
typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float *r_error);
|
||||
static SimplifyFunc simplify_func;
|
||||
typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, const float *attributes, size_t attribute_stride, const float *attribute_weights, size_t attribute_count, const unsigned char *vertex_lock, size_t target_index_count, float target_error, unsigned int options, float *result_error);
|
||||
static SimplifyWithAttribFunc simplify_with_attrib_func;
|
||||
typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
static SimplifyScaleFunc simplify_scale_func;
|
||||
typedef size_t (*SimplifySloppyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *out_result_error);
|
||||
static SimplifySloppyFunc simplify_sloppy_func;
|
||||
typedef size_t (*GenerateRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const void *vertices, size_t vertex_count, size_t vertex_size);
|
||||
static GenerateRemapFunc generate_remap_func;
|
||||
typedef void (*RemapVertexFunc)(void *destination, const void *vertices, size_t vertex_count, size_t vertex_size, const unsigned int *remap);
|
||||
@ -222,7 +222,9 @@ public:
|
||||
|
||||
void clear();
|
||||
|
||||
LocalVector<Vertex> &get_vertex_array() { return vertex_array; }
|
||||
LocalVector<Vertex> &get_vertex_array() {
|
||||
return vertex_array;
|
||||
}
|
||||
|
||||
void create_from_triangle_arrays(const Array &p_arrays);
|
||||
void create_from_arrays(const Array &p_arrays, Mesh::PrimitiveType p_primitive_type = Mesh::PRIMITIVE_TRIANGLES);
|
||||
|
Loading…
Reference in New Issue
Block a user