diff --git a/demos/mandelbrot/mandelbrot.cpp b/demos/mandelbrot/mandelbrot.cpp index f1be9e560..d73a9eb0b 100644 --- a/demos/mandelbrot/mandelbrot.cpp +++ b/demos/mandelbrot/mandelbrot.cpp @@ -15,20 +15,30 @@ void MandelbrotWidget::resizeEvent(QResizeEvent *) } } -template int MandelbrotWidget::render(int max_iter, int img_width, int img_height) +template struct iters_before_test { enum { ret = 8 }; }; +template<> struct iters_before_test { enum { ret = 16 }; }; + +template void MandelbrotThread::render(int img_width, int img_height) { enum { packetSize = Eigen::ei_packet_traits::size }; // number of reals in a Packet typedef Eigen::Matrix Packet; // wrap a Packet as a vector - int alignedWidth = (img_width/packetSize)*packetSize; - double yradius = xradius * img_height / img_width; + enum { iters_before_test = iters_before_test::ret }; + max_iter = (max_iter / iters_before_test) * iters_before_test; + const int alignedWidth = (img_width/packetSize)*packetSize; + unsigned char *const buffer = widget->buffer; + const double xradius = widget->xradius; + const double yradius = xradius * img_height / img_width; typedef Eigen::Matrix Vector2; - Vector2 start(center.x() - xradius, center.y() - yradius); - Vector2 step(2*xradius/img_width, 2*yradius/img_height); - int pix = 0, total_iter = 0; + Vector2 start(widget->center.x() - widget->xradius, widget->center.y() - yradius); + Vector2 step(2*widget->xradius/img_width, 2*yradius/img_height); + total_iter = 0; + int slice_height = img_height / widget->threadcount; - for(int y = 0; y < img_height; y++) + for(int y = id * slice_height; y < (id+1) * slice_height; y++) { + int pix = y * img_width; + // for each pixel, we're going to do the iteration z := z^2 + c where z and c are complex numbers, // starting with z = c = complex coord of the pixel. pzi and pzr denote the real and imaginary parts of z. // pci and pcr denote the real and imaginary parts of c. @@ -47,27 +57,29 @@ template int MandelbrotWidget::render(int max_iter, int img_width Packeti pix_iter = Packeti::zero(), pix_dont_diverge; do { - for(int i = 0; i < 4; i++) + for(int i = 0; i < iters_before_test/4; i++) // peel the inner loop by 4 { - pzr_buf = pzr; - pzr = pzr.cwiseAbs2() - pzi.cwiseAbs2() + pcr; - pzi = 2 * pzr_buf.cwiseProduct(pzi) + pci; +# define ITERATE \ + pzr_buf = pzr; \ + pzr = pzr.cwiseAbs2() - pzi.cwiseAbs2() + pcr; \ + pzi = 2 * pzr_buf.cwiseProduct(pzi) + pci; + ITERATE ITERATE ITERATE ITERATE } pix_dont_diverge = (pzr.cwiseAbs2() + pzi.cwiseAbs2()) - .eval() // temporary fix for lack of vectorizability of what follows - .cwiseLessThan(Packet::constant(4)) + .eval() // temporary fix as what follows is not yet vectorized by Eigen + .cwiseLessThan(Packet::constant(iters_before_test)) .template cast(); - pix_iter += 4 * pix_dont_diverge; + pix_iter += iters_before_test * pix_dont_diverge; j++; - total_iter += 4 * packetSize; + total_iter += iters_before_test * packetSize; } - while(j < max_iter/4 && pix_dont_diverge.any()); + while(j < max_iter/iters_before_test && pix_dont_diverge.any()); // any() is not yet vectorized by Eigen - // compute arbitrary pixel colors + // compute pixel colors for(int i = 0; i < packetSize; i++) { - buffer[4*(pix+i)] = pix_iter[i]*255/max_iter; + buffer[4*(pix+i)] = 255*pix_iter[i]/max_iter; buffer[4*(pix+i)+1] = 0; buffer[4*(pix+i)+2] = 0; } @@ -77,46 +89,60 @@ template int MandelbrotWidget::render(int max_iter, int img_width for(int x = alignedWidth; x < img_width; x++, pix++) buffer[4*pix] = buffer[4*pix+1] = buffer[4*pix+2] = 0; } - return total_iter; + return; +} + +void MandelbrotThread::run() +{ + setTerminationEnabled(true); + double resolution = widget->xradius*2/widget->width(); + max_iter = 128; + if(resolution < 1e-4f) max_iter += 128 * ( - 4 - std::log10(resolution)); + int img_width = widget->width()/widget->draft; + int img_height = widget->height()/widget->draft; + single_precision = resolution > 1e-7f; + + if(single_precision) + render(img_width, img_height); + else + render(img_width, img_height); } void MandelbrotWidget::paintEvent(QPaintEvent *) { - double resolution = xradius*2/width(); - int max_iter = 64; - if(resolution < 1e-4f) max_iter += 32 * ( - 4 - std::log10(resolution)); - max_iter = (max_iter/4)*4; - int img_width = width()/draft; - int img_height = height()/draft; static float max_speed = 0; - int total_iter; - bool single_precision = resolution > 1e-6f; + long long total_iter = 0; QTime time; time.start(); - if(single_precision) - total_iter = render(max_iter, img_width, img_height); - else - total_iter = render(max_iter, img_width, img_height); + for(int th = 0; th < threadcount; th++) + threads[th]->start(QThread::LowPriority); + for(int th = 0; th < threadcount; th++) + { + threads[th]->wait(); + total_iter += threads[th]->total_iter; + } int elapsed = time.elapsed(); if(draft == 1) { float speed = elapsed ? float(total_iter)*1000/elapsed : 0; max_speed = std::max(max_speed, speed); - std::cout << elapsed << " ms elapsed, " - << total_iter << " iters, " + std::cout << threadcount << " threads, " + << elapsed << " ms, " << speed << " iters/s (max " << max_speed << ")" << std::endl; - int packetSize = single_precision ? int(Eigen::ei_packet_traits::size) - : int(Eigen::ei_packet_traits::size); + int packetSize = threads[0]->single_precision + ? int(Eigen::ei_packet_traits::size) + : int(Eigen::ei_packet_traits::size); setWindowTitle(QString("resolution ")+QString::number(xradius*2/width(), 'e', 2) - +(single_precision ? QString(", single ") : QString(", double ")) + +QString(", up to %1 iterations").arg(threads[0]->max_iter) + +(threads[0]->single_precision ? QString(", single ") : QString(", double ")) +QString("precision, ") +(packetSize==1 ? QString("no vectorization") : QString("vectorized (%1 per packet)").arg(packetSize))); } - QImage image(buffer, img_width, img_height, QImage::Format_RGB32); + QImage image(buffer, width()/draft, height()/draft, QImage::Format_RGB32); QPainter painter(this); painter.drawImage(QPoint(0, 0), image.scaled(width(), height())); @@ -137,6 +163,8 @@ void MandelbrotWidget::mousePressEvent(QMouseEvent *event) center = Eigen::Vector2d(center.x() + (event->pos().x() - width()/2) * xradius * 2 / width(), center.y() + (event->pos().y() - height()/2) * yradius * 2 / height()); draft = 16; + for(int th = 0; th < threadcount; th++) + threads[th]->terminate(); update(); } } @@ -152,6 +180,8 @@ void MandelbrotWidget::mouseMoveEvent(QMouseEvent *event) if(t > 2) t = 2; xradius *= t; draft = 16; + for(int th = 0; th < threadcount; th++) + threads[th]->terminate(); update(); } } diff --git a/demos/mandelbrot/mandelbrot.h b/demos/mandelbrot/mandelbrot.h index 23df7c9dd..1468bdca9 100644 --- a/demos/mandelbrot/mandelbrot.h +++ b/demos/mandelbrot/mandelbrot.h @@ -4,17 +4,37 @@ #include #include #include +#include + +class MandelbrotWidget; + +class MandelbrotThread : public QThread +{ + friend class MandelbrotWidget; + MandelbrotWidget *widget; + long long total_iter; + int id, max_iter; + bool single_precision; + + public: + MandelbrotThread(MandelbrotWidget *w, int i) : widget(w), id(i) {} + void run(); + template void render(int img_width, int img_height); +}; class MandelbrotWidget : public QWidget { Q_OBJECT + friend class MandelbrotThread; Eigen::Vector2d center; double xradius; int size; unsigned char *buffer; QPoint lastpos; int draft; + MandelbrotThread **threads; + int threadcount; protected: void resizeEvent(QResizeEvent *); @@ -28,8 +48,16 @@ class MandelbrotWidget : public QWidget size(0), buffer(0), draft(16) { setAutoFillBackground(false); + threadcount = QThread::idealThreadCount(); + threads = new MandelbrotThread*[threadcount]; + for(int th = 0; th < threadcount; th++) threads[th] = new MandelbrotThread(this, th); + } + ~MandelbrotWidget() + { + if(buffer) delete[]buffer; + for(int th = 0; th < threadcount; th++) delete threads[th]; + delete[] threads; } - ~MandelbrotWidget() { if(buffer) delete[]buffer; } }; #endif // MANDELBROT_H