mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-30 17:40:05 +08:00
* multi-threaded rendering
* increased number of iterations, with more iterations done before testing divergence. results in x2 speedup from vectorization.
This commit is contained in:
parent
cacf986a7f
commit
56d03f181e
@ -15,20 +15,30 @@ void MandelbrotWidget::resizeEvent(QResizeEvent *)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Real> int MandelbrotWidget::render(int max_iter, int img_width, int img_height)
|
template<typename T> struct iters_before_test { enum { ret = 8 }; };
|
||||||
|
template<> struct iters_before_test<double> { enum { ret = 16 }; };
|
||||||
|
|
||||||
|
template<typename Real> void MandelbrotThread::render(int img_width, int img_height)
|
||||||
{
|
{
|
||||||
enum { packetSize = Eigen::ei_packet_traits<Real>::size }; // number of reals in a Packet
|
enum { packetSize = Eigen::ei_packet_traits<Real>::size }; // number of reals in a Packet
|
||||||
typedef Eigen::Matrix<Real, packetSize, 1> Packet; // wrap a Packet as a vector
|
typedef Eigen::Matrix<Real, packetSize, 1> Packet; // wrap a Packet as a vector
|
||||||
|
|
||||||
int alignedWidth = (img_width/packetSize)*packetSize;
|
enum { iters_before_test = iters_before_test<Real>::ret };
|
||||||
double yradius = xradius * img_height / img_width;
|
max_iter = (max_iter / iters_before_test) * iters_before_test;
|
||||||
|
const int alignedWidth = (img_width/packetSize)*packetSize;
|
||||||
|
unsigned char *const buffer = widget->buffer;
|
||||||
|
const double xradius = widget->xradius;
|
||||||
|
const double yradius = xradius * img_height / img_width;
|
||||||
typedef Eigen::Matrix<Real, 2, 1> Vector2;
|
typedef Eigen::Matrix<Real, 2, 1> Vector2;
|
||||||
Vector2 start(center.x() - xradius, center.y() - yradius);
|
Vector2 start(widget->center.x() - widget->xradius, widget->center.y() - yradius);
|
||||||
Vector2 step(2*xradius/img_width, 2*yradius/img_height);
|
Vector2 step(2*widget->xradius/img_width, 2*yradius/img_height);
|
||||||
int pix = 0, total_iter = 0;
|
total_iter = 0;
|
||||||
|
int slice_height = img_height / widget->threadcount;
|
||||||
|
|
||||||
for(int y = 0; y < img_height; y++)
|
for(int y = id * slice_height; y < (id+1) * slice_height; y++)
|
||||||
{
|
{
|
||||||
|
int pix = y * img_width;
|
||||||
|
|
||||||
// for each pixel, we're going to do the iteration z := z^2 + c where z and c are complex numbers,
|
// for each pixel, we're going to do the iteration z := z^2 + c where z and c are complex numbers,
|
||||||
// starting with z = c = complex coord of the pixel. pzi and pzr denote the real and imaginary parts of z.
|
// starting with z = c = complex coord of the pixel. pzi and pzr denote the real and imaginary parts of z.
|
||||||
// pci and pcr denote the real and imaginary parts of c.
|
// pci and pcr denote the real and imaginary parts of c.
|
||||||
@ -47,27 +57,29 @@ template<typename Real> int MandelbrotWidget::render(int max_iter, int img_width
|
|||||||
Packeti pix_iter = Packeti::zero(), pix_dont_diverge;
|
Packeti pix_iter = Packeti::zero(), pix_dont_diverge;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
for(int i = 0; i < 4; i++)
|
for(int i = 0; i < iters_before_test/4; i++) // peel the inner loop by 4
|
||||||
{
|
{
|
||||||
pzr_buf = pzr;
|
# define ITERATE \
|
||||||
pzr = pzr.cwiseAbs2() - pzi.cwiseAbs2() + pcr;
|
pzr_buf = pzr; \
|
||||||
pzi = 2 * pzr_buf.cwiseProduct(pzi) + pci;
|
pzr = pzr.cwiseAbs2() - pzi.cwiseAbs2() + pcr; \
|
||||||
|
pzi = 2 * pzr_buf.cwiseProduct(pzi) + pci;
|
||||||
|
ITERATE ITERATE ITERATE ITERATE
|
||||||
}
|
}
|
||||||
pix_dont_diverge = (pzr.cwiseAbs2() + pzi.cwiseAbs2())
|
pix_dont_diverge = (pzr.cwiseAbs2() + pzi.cwiseAbs2())
|
||||||
.eval() // temporary fix for lack of vectorizability of what follows
|
.eval() // temporary fix as what follows is not yet vectorized by Eigen
|
||||||
.cwiseLessThan(Packet::constant(4))
|
.cwiseLessThan(Packet::constant(iters_before_test))
|
||||||
.template cast<int>();
|
.template cast<int>();
|
||||||
pix_iter += 4 * pix_dont_diverge;
|
pix_iter += iters_before_test * pix_dont_diverge;
|
||||||
j++;
|
j++;
|
||||||
total_iter += 4 * packetSize;
|
total_iter += iters_before_test * packetSize;
|
||||||
}
|
}
|
||||||
while(j < max_iter/4 && pix_dont_diverge.any());
|
while(j < max_iter/iters_before_test && pix_dont_diverge.any()); // any() is not yet vectorized by Eigen
|
||||||
|
|
||||||
// compute arbitrary pixel colors
|
// compute pixel colors
|
||||||
for(int i = 0; i < packetSize; i++)
|
for(int i = 0; i < packetSize; i++)
|
||||||
{
|
{
|
||||||
|
|
||||||
buffer[4*(pix+i)] = pix_iter[i]*255/max_iter;
|
buffer[4*(pix+i)] = 255*pix_iter[i]/max_iter;
|
||||||
buffer[4*(pix+i)+1] = 0;
|
buffer[4*(pix+i)+1] = 0;
|
||||||
buffer[4*(pix+i)+2] = 0;
|
buffer[4*(pix+i)+2] = 0;
|
||||||
}
|
}
|
||||||
@ -77,46 +89,60 @@ template<typename Real> int MandelbrotWidget::render(int max_iter, int img_width
|
|||||||
for(int x = alignedWidth; x < img_width; x++, pix++)
|
for(int x = alignedWidth; x < img_width; x++, pix++)
|
||||||
buffer[4*pix] = buffer[4*pix+1] = buffer[4*pix+2] = 0;
|
buffer[4*pix] = buffer[4*pix+1] = buffer[4*pix+2] = 0;
|
||||||
}
|
}
|
||||||
return total_iter;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotThread::run()
|
||||||
|
{
|
||||||
|
setTerminationEnabled(true);
|
||||||
|
double resolution = widget->xradius*2/widget->width();
|
||||||
|
max_iter = 128;
|
||||||
|
if(resolution < 1e-4f) max_iter += 128 * ( - 4 - std::log10(resolution));
|
||||||
|
int img_width = widget->width()/widget->draft;
|
||||||
|
int img_height = widget->height()/widget->draft;
|
||||||
|
single_precision = resolution > 1e-7f;
|
||||||
|
|
||||||
|
if(single_precision)
|
||||||
|
render<float>(img_width, img_height);
|
||||||
|
else
|
||||||
|
render<double>(img_width, img_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MandelbrotWidget::paintEvent(QPaintEvent *)
|
void MandelbrotWidget::paintEvent(QPaintEvent *)
|
||||||
{
|
{
|
||||||
double resolution = xradius*2/width();
|
|
||||||
int max_iter = 64;
|
|
||||||
if(resolution < 1e-4f) max_iter += 32 * ( - 4 - std::log10(resolution));
|
|
||||||
max_iter = (max_iter/4)*4;
|
|
||||||
int img_width = width()/draft;
|
|
||||||
int img_height = height()/draft;
|
|
||||||
static float max_speed = 0;
|
static float max_speed = 0;
|
||||||
int total_iter;
|
long long total_iter = 0;
|
||||||
bool single_precision = resolution > 1e-6f;
|
|
||||||
|
|
||||||
QTime time;
|
QTime time;
|
||||||
time.start();
|
time.start();
|
||||||
if(single_precision)
|
for(int th = 0; th < threadcount; th++)
|
||||||
total_iter = render<float>(max_iter, img_width, img_height);
|
threads[th]->start(QThread::LowPriority);
|
||||||
else
|
for(int th = 0; th < threadcount; th++)
|
||||||
total_iter = render<double>(max_iter, img_width, img_height);
|
{
|
||||||
|
threads[th]->wait();
|
||||||
|
total_iter += threads[th]->total_iter;
|
||||||
|
}
|
||||||
int elapsed = time.elapsed();
|
int elapsed = time.elapsed();
|
||||||
|
|
||||||
if(draft == 1)
|
if(draft == 1)
|
||||||
{
|
{
|
||||||
float speed = elapsed ? float(total_iter)*1000/elapsed : 0;
|
float speed = elapsed ? float(total_iter)*1000/elapsed : 0;
|
||||||
max_speed = std::max(max_speed, speed);
|
max_speed = std::max(max_speed, speed);
|
||||||
std::cout << elapsed << " ms elapsed, "
|
std::cout << threadcount << " threads, "
|
||||||
<< total_iter << " iters, "
|
<< elapsed << " ms, "
|
||||||
<< speed << " iters/s (max " << max_speed << ")" << std::endl;
|
<< speed << " iters/s (max " << max_speed << ")" << std::endl;
|
||||||
int packetSize = single_precision ? int(Eigen::ei_packet_traits<float>::size)
|
int packetSize = threads[0]->single_precision
|
||||||
: int(Eigen::ei_packet_traits<double>::size);
|
? int(Eigen::ei_packet_traits<float>::size)
|
||||||
|
: int(Eigen::ei_packet_traits<double>::size);
|
||||||
setWindowTitle(QString("resolution ")+QString::number(xradius*2/width(), 'e', 2)
|
setWindowTitle(QString("resolution ")+QString::number(xradius*2/width(), 'e', 2)
|
||||||
+(single_precision ? QString(", single ") : QString(", double "))
|
+QString(", up to %1 iterations").arg(threads[0]->max_iter)
|
||||||
|
+(threads[0]->single_precision ? QString(", single ") : QString(", double "))
|
||||||
+QString("precision, ")
|
+QString("precision, ")
|
||||||
+(packetSize==1 ? QString("no vectorization")
|
+(packetSize==1 ? QString("no vectorization")
|
||||||
: QString("vectorized (%1 per packet)").arg(packetSize)));
|
: QString("vectorized (%1 per packet)").arg(packetSize)));
|
||||||
}
|
}
|
||||||
|
|
||||||
QImage image(buffer, img_width, img_height, QImage::Format_RGB32);
|
QImage image(buffer, width()/draft, height()/draft, QImage::Format_RGB32);
|
||||||
QPainter painter(this);
|
QPainter painter(this);
|
||||||
painter.drawImage(QPoint(0, 0), image.scaled(width(), height()));
|
painter.drawImage(QPoint(0, 0), image.scaled(width(), height()));
|
||||||
|
|
||||||
@ -137,6 +163,8 @@ void MandelbrotWidget::mousePressEvent(QMouseEvent *event)
|
|||||||
center = Eigen::Vector2d(center.x() + (event->pos().x() - width()/2) * xradius * 2 / width(),
|
center = Eigen::Vector2d(center.x() + (event->pos().x() - width()/2) * xradius * 2 / width(),
|
||||||
center.y() + (event->pos().y() - height()/2) * yradius * 2 / height());
|
center.y() + (event->pos().y() - height()/2) * yradius * 2 / height());
|
||||||
draft = 16;
|
draft = 16;
|
||||||
|
for(int th = 0; th < threadcount; th++)
|
||||||
|
threads[th]->terminate();
|
||||||
update();
|
update();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -152,6 +180,8 @@ void MandelbrotWidget::mouseMoveEvent(QMouseEvent *event)
|
|||||||
if(t > 2) t = 2;
|
if(t > 2) t = 2;
|
||||||
xradius *= t;
|
xradius *= t;
|
||||||
draft = 16;
|
draft = 16;
|
||||||
|
for(int th = 0; th < threadcount; th++)
|
||||||
|
threads[th]->terminate();
|
||||||
update();
|
update();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,17 +4,37 @@
|
|||||||
#include <Eigen/Array>
|
#include <Eigen/Array>
|
||||||
#include <QtGui/QApplication>
|
#include <QtGui/QApplication>
|
||||||
#include <QtGui/QWidget>
|
#include <QtGui/QWidget>
|
||||||
|
#include <QtCore/QThread>
|
||||||
|
|
||||||
|
class MandelbrotWidget;
|
||||||
|
|
||||||
|
class MandelbrotThread : public QThread
|
||||||
|
{
|
||||||
|
friend class MandelbrotWidget;
|
||||||
|
MandelbrotWidget *widget;
|
||||||
|
long long total_iter;
|
||||||
|
int id, max_iter;
|
||||||
|
bool single_precision;
|
||||||
|
|
||||||
|
public:
|
||||||
|
MandelbrotThread(MandelbrotWidget *w, int i) : widget(w), id(i) {}
|
||||||
|
void run();
|
||||||
|
template<typename Real> void render(int img_width, int img_height);
|
||||||
|
};
|
||||||
|
|
||||||
class MandelbrotWidget : public QWidget
|
class MandelbrotWidget : public QWidget
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
|
||||||
|
friend class MandelbrotThread;
|
||||||
Eigen::Vector2d center;
|
Eigen::Vector2d center;
|
||||||
double xradius;
|
double xradius;
|
||||||
int size;
|
int size;
|
||||||
unsigned char *buffer;
|
unsigned char *buffer;
|
||||||
QPoint lastpos;
|
QPoint lastpos;
|
||||||
int draft;
|
int draft;
|
||||||
|
MandelbrotThread **threads;
|
||||||
|
int threadcount;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void resizeEvent(QResizeEvent *);
|
void resizeEvent(QResizeEvent *);
|
||||||
@ -28,8 +48,16 @@ class MandelbrotWidget : public QWidget
|
|||||||
size(0), buffer(0), draft(16)
|
size(0), buffer(0), draft(16)
|
||||||
{
|
{
|
||||||
setAutoFillBackground(false);
|
setAutoFillBackground(false);
|
||||||
|
threadcount = QThread::idealThreadCount();
|
||||||
|
threads = new MandelbrotThread*[threadcount];
|
||||||
|
for(int th = 0; th < threadcount; th++) threads[th] = new MandelbrotThread(this, th);
|
||||||
|
}
|
||||||
|
~MandelbrotWidget()
|
||||||
|
{
|
||||||
|
if(buffer) delete[]buffer;
|
||||||
|
for(int th = 0; th < threadcount; th++) delete threads[th];
|
||||||
|
delete[] threads;
|
||||||
}
|
}
|
||||||
~MandelbrotWidget() { if(buffer) delete[]buffer; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // MANDELBROT_H
|
#endif // MANDELBROT_H
|
||||||
|
Loading…
Reference in New Issue
Block a user