godot/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp
2021-04-23 15:57:28 +02:00

532 lines
20 KiB
C++

// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_statistics.h"
#include "bvh_rotate.h"
#include "../common/profile.h"
#include "../../common/algorithms/parallel_prefix_sum.h"
#include "../builders/primrefgen.h"
#include "../builders/bvh_builder_morton.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#if defined(__X86_64__) || defined(__aarch64__)
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
#else
# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
#endif
namespace embree
{
namespace isa
{
template<int N>
struct SetBVHNBounds
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
typedef typename BVH::AABBNode AABBNode;
BVH* bvh;
__forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
{
AABBNode* node = ref.getAABBNode();
BBox3fa res = empty;
for (size_t i=0; i<num; i++) {
const BBox3fa b = children[i].bounds;
res.extend(b);
node->setRef(i,children[i].ref);
node->setBounds(i,b);
}
BBox3fx result = (BBox3fx&)res;
#if ROTATE_TREE
if (N == 4)
{
size_t n = 0;
for (size_t i=0; i<num; i++)
n += children[i].bounds.lower.a;
if (n >= 4096) {
for (size_t i=0; i<num; i++) {
if (children[i].bounds.lower.a < 4096) {
for (int j=0; j<ROTATE_TREE; j++)
BVHNRotate<N>::rotate(node->child(i));
node->child(i).setBarrier();
}
}
}
result.lower.a = unsigned(n);
}
#endif
return NodeRecord(ref,result);
}
};
template<int N, typename Primitive>
struct CreateMortonLeaf;
template<int N>
struct CreateMortonLeaf<N,Triangle4>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
const TriangleMesh* __restrict__ const mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = unsigned(current.size());
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Triangle4v>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
const TriangleMesh* __restrict__ mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Triangle4i>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 v0 = zero, v1 = zero, v2 = zero;
vuint4 vgeomID = -1, vprimID = -1;
const TriangleMesh* __restrict__ const mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID[i] = geomID_;
vprimID[i] = primID;
unsigned int int_stride = mesh->vertices0.getStride()/4;
v0[i] = tri.v[0] * int_stride;
v1[i] = tri.v[1] * int_stride;
v2[i] = tri.v[2] * int_stride;
}
for (size_t i=items; i<4; i++)
{
vgeomID[i] = vgeomID[0];
vprimID[i] = -1;
v0[i] = 0;
v1[i] = 0;
v2[i] = 0;
}
Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Quad4v>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
const QuadMesh* __restrict__ mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const QuadMesh::Quad& tri = mesh->quad(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
const Vec3fa& p3 = mesh->vertex(tri.v[3]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
}
Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
QuadMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Object>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
/* allocate leaf node */
Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const UserGeometry* mesh = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int index = morton[start+i].index;
const unsigned int primID = index;
bounds.extend(mesh->bounds(primID));
new (&accel[i]) Object(geomID_,primID);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
UserGeometry* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,InstancePrimitive>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items <= 1);
/* allocate leaf node */
InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const Instance* instance = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
bounds.extend(instance->bounds(primID));
new (&accel[i]) InstancePrimitive(instance, geomID_);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
Instance* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<typename Mesh>
struct CalculateMeshBounds
{
__forceinline CalculateMeshBounds (Mesh* mesh)
: mesh(mesh) {}
__forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
return mesh->bounds(morton.index);
}
private:
Mesh* mesh;
};
template<int N, typename Mesh, typename Primitive>
class BVHNMeshBuilderMorton : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
public:
BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
: bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
/* build function */
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
morton.clear();
}
size_t numPrimitives = mesh->size();
numPreviousPrimitives = numPrimitives;
/* skip build for empty scene */
if (numPrimitives == 0) {
bvh->set(BVH::emptyNode,empty,0);
return;
}
/* preallocate arrays */
morton.resize(numPrimitives);
size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
/* create morton code array */
BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
/* create BVH */
SetBVHNBounds<N> setBounds(bvh);
CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
CalculateMeshBounds<Mesh> calculateBounds(mesh);
auto root = BVHBuilderMorton::build<NodeRecord>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create(),
setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
morton.data(),dest,numPrimitivesGen,settings);
bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
#if ROTATE_TREE
if (N == 4)
{
for (int i=0; i<ROTATE_TREE; i++)
BVHNRotate<N>::rotate(bvh->root);
bvh->clearBarrier(bvh->root);
}
#endif
/* clear temporary data for static geometry */
if (bvh->scene->isStaticAccel()) {
morton.clear();
}
bvh->cleanup();
}
void clear() {
morton.clear();
}
private:
BVH* bvh;
Mesh* mesh;
mvector<BVHBuilderMorton::BuildPrim> morton;
BVHBuilderMorton::Settings settings;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
};
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
#if defined(__AVX__)
Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
}
}