diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 3a25c2840..b6594983f 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -296,6 +296,44 @@ void householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename
   }
 }
 
+// TODO: add a corresponding public API for updating a QR factorization
+/** \internal
+ * Basically a modified copy of @c Eigen::internal::householder_qr_inplace_unblocked that
+ * performs a rank-1 update of the QR matrix in compact storage. This function assumes, that
+ * the first @c k-1 columns of the matrix @c mat contain the QR decomposition of \f$A^N\f$ up to
+ * column k-1. Then the QR decomposition of the k-th column (given by @c newColumn) is computed by
+ * applying the k-1 Householder projectors on it and finally compute the projector \f$H_k\f$ of
+ * it. On exit the matrix @c mat and the vector @c hCoeffs contain the QR decomposition of the
+ * first k columns of \f$A^N\f$. The \a tempData argument must point to at least mat.cols() scalars.  */
+template <typename MatrixQR, typename HCoeffs, typename VectorQR>
+void householder_qr_inplace_update(MatrixQR& mat, HCoeffs& hCoeffs, const VectorQR& newColumn,
+                                   typename MatrixQR::Index k, typename MatrixQR::Scalar* tempData) {
+  typedef typename MatrixQR::Index Index;
+  typedef typename MatrixQR::Scalar Scalar;
+  typedef typename MatrixQR::RealScalar RealScalar;
+  Index rows = mat.rows();
+
+  eigen_assert(k < mat.cols());
+  eigen_assert(k < rows);
+  eigen_assert(hCoeffs.size() == mat.cols());
+  eigen_assert(newColumn.size() == rows);
+  eigen_assert(tempData);
+
+  // Store new column in mat at column k
+  mat.col(k) = newColumn;
+  // Apply H = H_1...H_{k-1} on newColumn (skip if k=0)
+  for (Index i = 0; i < k; ++i) {
+    Index remainingRows = rows - i;
+    mat.col(k)
+        .tail(remainingRows)
+        .applyHouseholderOnTheLeft(mat.col(i).tail(remainingRows - 1), hCoeffs.coeffRef(i), tempData + i + 1);
+  }
+  // Construct Householder projector in-place in column k
+  RealScalar beta;
+  mat.col(k).tail(rows - k).makeHouseholderInPlace(hCoeffs.coeffRef(k), beta);
+  mat.coeffRef(k, k) = beta;
+}
+
 /** \internal */
 template<typename MatrixQR, typename HCoeffs,
   typename MatrixQRScalar = typename MatrixQR::Scalar,
diff --git a/test/householder.cpp b/test/householder.cpp
index 29a6c1d23..3a3d04764 100644
--- a/test/householder.cpp
+++ b/test/householder.cpp
@@ -133,6 +133,89 @@ template<typename MatrixType> void householder(const MatrixType& m)
   VERIFY_IS_APPROX(m3 * m5, m1); // test evaluating rhseq to a dense matrix, then applying
 }
 
+
+template <typename MatrixType>
+void householder_update(const MatrixType& m) {
+  // This test is covering the internal::householder_qr_inplace_update function.
+  // At time of writing, there is not public API that exposes this update behavior directly,
+  // so we are testing the internal implementation.
+
+  const Index rows = m.rows();
+  const Index cols = m.cols();
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
+  typedef Matrix<Scalar, Dynamic, 1> HCoeffsVectorType;
+  typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX;
+  typedef Matrix<Scalar, Dynamic, 1> VectorX;
+
+  VectorX tmpOwner(cols);
+  Scalar* tmp = tmpOwner.data();
+
+  // The matrix to factorize.
+  const MatrixType A = MatrixType::Random(rows, cols); 
+
+  // matQR and hCoeffs will hold the factorization of A,
+  // built by a sequence of calls to `update`.
+  MatrixType matQR(rows, cols);
+  HCoeffsVectorType hCoeffs(cols);
+
+  // householder_qr_inplace_update should be able to build a QR factorization one column at a time.
+  // We verify this by starting with an empty factorization and 'updating' one column at a time.
+  // After each call to update, we should have a QR factorization of the columns presented so far.
+
+  const Index size = (std::min)(rows, cols); // QR can only go up to 'size' b/c that's full rank.
+  for (Index k = 0; k != size; ++k)
+  {
+    // Make a copy of the column to prevent any possibility of 'leaking' other parts of A.
+    const VectorType newColumn = A.col(k); 
+    internal::householder_qr_inplace_update(matQR, hCoeffs, newColumn, k, tmp);
+
+    // Verify Property:
+    // matQR.leftCols(k+1) and hCoeffs.head(k+1) hold
+    // a QR factorization of A.leftCols(k+1).
+    // This is the fundamental guarantee of householder_qr_inplace_update.
+    {
+      const MatrixX matQR_k = matQR.leftCols(k + 1);
+      const VectorX hCoeffs_k = hCoeffs.head(k + 1);
+      MatrixX R = matQR_k.template triangularView<Upper>();
+      MatrixX QxR = householderSequence(matQR_k, hCoeffs_k.conjugate()) * R;
+      VERIFY_IS_APPROX(QxR, A.leftCols(k + 1));
+    }
+
+    // Verify Property:
+    // A sequence of calls to 'householder_qr_inplace_update'
+    // should produce the same result as 'householder_qr_inplace_unblocked'.
+    // This is a property of the current implementation.
+    // If these implementations diverge in the future, 
+    // then simply delete the test of this property.
+    {
+      MatrixX QR_at_once = A.leftCols(k + 1);
+      VectorX hCoeffs_at_once(k + 1);
+      internal::householder_qr_inplace_unblocked(QR_at_once, hCoeffs_at_once, tmp);
+      VERIFY_IS_APPROX(QR_at_once, matQR.leftCols(k + 1));
+      VERIFY_IS_APPROX(hCoeffs_at_once, hCoeffs.head(k + 1));
+    }
+  }
+
+  // Verify Property:
+  // We can go back and update any column to have a new value,
+  // and get a QR factorization of the columns up to that one.  
+  {
+    const Index k = internal::random<Index>(0, size - 1);
+    VectorType newColumn = VectorType::Random(rows);      
+    internal::householder_qr_inplace_update(matQR, hCoeffs, newColumn, k, tmp);
+
+    const MatrixX matQR_k = matQR.leftCols(k + 1);
+    const VectorX hCoeffs_k = hCoeffs.head(k + 1);
+    MatrixX R = matQR_k.template triangularView<Upper>();
+    MatrixX QxR = householderSequence(matQR_k, hCoeffs_k.conjugate()) * R;
+    VERIFY_IS_APPROX(QxR.leftCols(k), A.leftCols(k));
+    VERIFY_IS_APPROX(QxR.col(k), newColumn);
+  }  
+}
+
+
 EIGEN_DECLARE_TEST(householder)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -144,5 +227,9 @@ EIGEN_DECLARE_TEST(householder)
     CALL_SUBTEST_6( householder(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE),internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_7( householder(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE),internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_8( householder(Matrix<double,1,1>()) );
+
+    CALL_SUBTEST_9( householder_update(Matrix<double, 3, 5>()) );
+    CALL_SUBTEST_9( householder_update(Matrix<float, 4, 2>()) );
+    CALL_SUBTEST_9( householder_update(MatrixXcf(internal::random<Index>(1,EIGEN_TEST_MAX_SIZE), internal::random<Index>(1,EIGEN_TEST_MAX_SIZE))) );
   }
 }
diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt
index 631a06014..dcf9500bb 100644
--- a/unsupported/Eigen/CMakeLists.txt
+++ b/unsupported/Eigen/CMakeLists.txt
@@ -12,6 +12,7 @@ set(Eigen_HEADERS
   MatrixFunctions 
   MoreVectorization
   MPRealSupport
+  NNLS
   NonLinearOptimization
   NumericalDiff
   OpenGLSupport
diff --git a/unsupported/Eigen/NNLS b/unsupported/Eigen/NNLS
new file mode 100644
index 000000000..280450850
--- /dev/null
+++ b/unsupported/Eigen/NNLS
@@ -0,0 +1,388 @@
+/* Non-Negagive Least Squares Algorithm for Eigen.
+ *
+ * Copyright (C) 2021 Essex Edwards, <essex.edwards@gmail.com>
+ * Copyright (C) 2013 Hannes Matuschek, hannes.matuschek at uni-potsdam.de
+ *
+ * This Source Code Form is subject to the terms of the Mozilla
+ * Public License v. 2.0. If a copy of the MPL was not distributed
+ * with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+/** \defgroup nnls Non-Negative Least Squares (NNLS) Module
+ * This module provides a single class @c Eigen::NNLS implementing the NNLS algorithm.
+ * The algorithm is described in "SOLVING LEAST SQUARES PROBLEMS", by Charles L. Lawson and
+ * Richard J. Hanson, Prentice-Hall, 1974 and solves optimization problems of the form
+ *
+ * \f[ \min \left\Vert Ax-b\right\Vert_2^2\quad s.t.\, x\ge 0\,.\f]
+ *
+ * The algorithm solves the constrained least-squares problem above by iteratively improving
+ * an estimate of which constraints are active (elements of \f$x\f$ equal to zero)
+ * and which constraints are inactive (elements of \f$x\f$ greater than zero).
+ * Each iteration, an unconstrained linear least-squares problem solves for the
+ * components of \f$x\f$ in the (estimated) inactive set and the sets are updated.
+ * The unconstrained problem minimizes \f$\left\Vert A^Nx^N-b\right\Vert_2^2\f$,
+ * where \f$A^N\f$ is a matrix formed by selecting all columns of A which are
+ * in the inactive set \f$N\f$.
+ *
+ */
+
+#ifndef EIGEN_NNLS_H
+#define EIGEN_NNLS_H
+
+#include "../../Eigen/Core"
+#include "../../Eigen/QR"
+
+#include <limits>
+
+namespace Eigen {
+
+/** \ingroup nnls
+ * \class NNLS
+ * \brief Implementation of the Non-Negative Least Squares (NNLS) algorithm.
+ * \tparam MatrixType The type of the system matrix \f$A\f$.
+ *
+ * This class implements the NNLS algorithm as described in "SOLVING LEAST SQUARES PROBLEMS",
+ * Charles L. Lawson and Richard J. Hanson, Prentice-Hall, 1974. This algorithm solves a least
+ * squares problem iteratively and ensures that the solution is non-negative. I.e.
+ *
+ * \f[ \min \left\Vert Ax-b\right\Vert_2^2\quad s.t.\, x\ge 0 \f]
+ *
+ * The algorithm solves the constrained least-squares problem above by iteratively improving
+ * an estimate of which constraints are active (elements of \f$x\f$ equal to zero)
+ * and which constraints are inactive (elements of \f$x\f$ greater than zero).
+ * Each iteration, an unconstrained linear least-squares problem solves for the
+ * components of \f$x\f$ in the (estimated) inactive set and the sets are updated.
+ * The unconstrained problem minimizes \f$\left\Vert A^Nx^N-b\right\Vert_2^2\f$,
+ * where \f$A^N\f$ is a matrix formed by selecting all columns of A which are
+ * in the inactive set \f$N\f$.
+ *
+ * See <a href="https://en.wikipedia.org/wiki/Non-negative_least_squares">the
+ * wikipedia page on non-negative least squares</a> for more background information.
+ *
+ * \note Please note that it is possible to construct an NNLS problem for which the
+ *       algorithm does not converge. In practice these cases are extremely rare.
+ */
+template <class MatrixType_>
+class NNLS {
+ public:
+  typedef MatrixType_ MatrixType;
+
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    Options = MatrixType::Options,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+  };
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::Index Index;
+
+  /** Type of a row vector of the system matrix \f$A\f$. */
+  typedef Matrix<Scalar, ColsAtCompileTime, 1> SolutionVectorType;
+  /** Type of a column vector of the system matrix \f$A\f$. */
+  typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsVectorType;
+  typedef Matrix<Index, ColsAtCompileTime, 1> IndicesType;
+
+  /** */
+  NNLS();
+
+  /** \brief Constructs a NNLS sovler and initializes it with the given system matrix @c A.
+   * \param A Specifies the system matrix.
+   * \param max_iter Specifies the maximum number of iterations to solve the system.
+   * \param tol Specifies the precision of the optimum.
+   *        This is an absolute tolerance on the gradient of the Lagrangian, \f$A^T(Ax-b)-\lambda\f$
+   *        (with Lagrange multipliers \f$\lambda\f$).
+   */
+  NNLS(const MatrixType &A, Index max_iter = -1, Scalar tol = NumTraits<Scalar>::dummy_precision());
+
+  /** Initializes the solver with the matrix \a A for further solving NNLS problems.
+   *
+   * This function mostly initializes/computes the preconditioner. In the future
+   * we might, for instance, implement column reordering for faster matrix vector products.
+   */
+  template <typename MatrixDerived>
+  NNLS<MatrixType> &compute(const EigenBase<MatrixDerived> &A);
+
+  /** \brief Solves the NNLS problem.
+   *
+   * The dimension of @c b must be equal to the number of rows of @c A, given to the constructor.
+   *
+   * \returns The approximate solution vector \f$ x \f$. Use info() to determine if the solve was a success or not.
+   * \sa info()
+   */
+  const SolutionVectorType &solve(const RhsVectorType &b);
+
+  /** \brief Returns the solution if a problem was solved.
+   * If not, an uninitialized vector may be returned. */
+  const SolutionVectorType &x() const { return x_; }
+
+  /** \returns the tolerance threshold used by the stopping criteria.
+   * \sa setTolerance()
+   */
+  Scalar tolerance() const { return tolerance_; }
+
+  /** Sets the tolerance threshold used by the stopping criteria.
+   *
+   * This is an absolute tolerance on the gradient of the Lagrangian, \f$A^T(Ax-b)-\lambda\f$
+   * (with Lagrange multipliers \f$\lambda\f$).
+   */
+  NNLS<MatrixType> &setTolerance(const Scalar &tolerance) {
+    tolerance_ = tolerance;
+    return *this;
+  }
+
+  /** \returns the max number of iterations.
+   * It is either the value set by setMaxIterations or, by default, twice the number of columns of the matrix.
+   */
+  Index maxIterations() const { return max_iter_ < 0 ? 2 * A_.cols() : max_iter_; }
+
+  /** Sets the max number of iterations.
+   * Default is twice the number of columns of the matrix.
+   * The algorithm requires at least k iterations to produce a solution vector with k non-zero entries.
+   */
+  NNLS<MatrixType> &setMaxIterations(Index maxIters) {
+    max_iter_ = maxIters;
+    return *this;
+  }
+
+  /** \returns the number of iterations (least-squares solves) performed during the last solve */
+  Index iterations() const { return iterations_; }
+
+  /** \returns Success if the iterations converged, and an error values otherwise. */
+  ComputationInfo info() const { return info_; }
+
+ private:
+  /** \internal Adds the given index @c idx to the inactive set N and updates the QR decomposition of \f$A^N\f$. */
+  void moveToInactiveSet_(Index idx);
+
+  /** \internal Removes the given index idx from the inactive set N and updates the QR decomposition of \f$A^N\f$. */
+  void moveToActiveSet_(Index idx);
+
+  /** \internal Solves the least-squares problem \f$\left\Vert y-A^Nx\right\Vert_2^2\f$. */
+  void solveInactiveSet_(const RhsVectorType &b);
+
+ private:
+  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixAtAType;
+
+  /** \internal Holds the maximum number of iterations for the NNLS algorithm.
+   *  @c -1 means to use the default value. */
+  Index max_iter_;
+  /** \internal Holds the number of iterations. */
+  Index iterations_;
+  /** \internal Holds success/fail of the last solve. */
+  ComputationInfo info_;
+  /** \internal Size of the inactive set. */
+  Index numInactive_;
+  /** \internal Accuracy of the algorithm w.r.t the optimality of the solution (gradient). */
+  Scalar tolerance_;
+  /** \internal The system matrix, a copy of the one given to the constructor. */
+  MatrixType A_;
+  /** \internal Precomputed product \f$A^TA\f$. */
+  MatrixAtAType AtA_;
+  /** \internal Will hold the solution. */
+  SolutionVectorType x_;
+  /** \internal Will hold the current gradient.\f$A^Tb - A^TAx\f$ */
+  SolutionVectorType gradient_;
+  /** \internal Will hold the partial solution. */
+  SolutionVectorType y_;
+  /** \internal Precomputed product \f$A^Tb\f$. */
+  SolutionVectorType Atb_;
+  /** \internal Holds the current permutation partitioning the active and inactive sets.
+   * The first @c numInactive_ elements form the inactive set and the rest the active set. */
+  IndicesType index_sets_;
+  /** \internal QR decomposition to solve the (inactive) sub system (together with @c qrCoeffs_). */
+  MatrixType QR_;
+  /** \internal QR decomposition to solve the (inactive) sub system (together with @c QR_). */
+  SolutionVectorType qrCoeffs_;
+  /** \internal Some workspace for QR decomposition. */
+  SolutionVectorType tempSolutionVector_;
+  RhsVectorType tempRhsVector_;
+};
+
+/* ********************************************************************************************
+ * Implementation
+ * ******************************************************************************************** */
+
+template <typename MatrixType>
+NNLS<MatrixType>::NNLS()
+    : max_iter_(-1),
+      iterations_(0),
+      info_(ComputationInfo::InvalidInput),
+      numInactive_(0),
+      tolerance_(NumTraits<Scalar>::dummy_precision()) {}
+
+template <typename MatrixType>
+NNLS<MatrixType>::NNLS(const MatrixType &A, Index max_iter, Scalar tol) : max_iter_(max_iter), tolerance_(tol) {
+  compute(A);
+}
+
+template <typename MatrixType>
+template <typename MatrixDerived>
+NNLS<MatrixType> &NNLS<MatrixType>::compute(const EigenBase<MatrixDerived> &A) {
+  // Ensure Scalar type is real. The non-negativity constraint doesn't obviously extend to complex numbers.
+  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+
+  // max_iter_: unchanged
+  iterations_ = 0;
+  info_ = ComputationInfo::Success;
+  numInactive_ = 0;
+  // tolerance: unchanged
+  A_ = A.derived();
+  AtA_.noalias() = A_.transpose() * A_;
+  x_.resize(A_.cols());
+  gradient_.resize(A_.cols());
+  y_.resize(A_.cols());
+  Atb_.resize(A_.cols());
+  index_sets_.resize(A_.cols());
+  QR_.resize(A_.rows(), A_.cols());
+  qrCoeffs_.resize(A_.cols());
+  tempSolutionVector_.resize(A_.cols());
+  tempRhsVector_.resize(A_.rows());
+
+  return *this;
+}
+
+template <typename MatrixType>
+const typename NNLS<MatrixType>::SolutionVectorType &NNLS<MatrixType>::solve(const RhsVectorType &b) {
+  // Initialize solver
+  iterations_ = 0;
+  info_ = ComputationInfo::NumericalIssue;
+  x_.setZero();
+
+  index_sets_ = IndicesType::LinSpaced(A_.cols(), 0, A_.cols() - 1);  // Identity permutation.
+  numInactive_ = 0;
+
+  // Precompute A^T*b
+  Atb_.noalias() = A_.transpose() * b;
+
+  const Index maxIterations = this->maxIterations();
+
+  // OUTER LOOP
+  while (true) {
+    // Early exit if all variables are inactive, which breaks 'maxCoeff' below.
+    if (A_.cols() == numInactive_) {
+      info_ = ComputationInfo::Success;
+      return x_;
+    }
+
+    // Find the maximum element of the gradient in the active set.
+    // If it is small or negative, then we have converged.
+    // Else, we move that variable to the inactive set.
+    gradient_.noalias() = Atb_ - AtA_ * x_;
+
+    const Index numActive = A_.cols() - numInactive_;
+    Index argmaxGradient = -1;
+    const Scalar maxGradient = gradient_(index_sets_.tail(numActive)).maxCoeff(&argmaxGradient);
+    argmaxGradient += numInactive_;  // beacause tail() skipped the first numInactive_ elements
+
+    if (maxGradient < tolerance_) {
+      info_ = ComputationInfo::Success;
+      return x_;
+    }
+
+    moveToInactiveSet_(argmaxGradient);
+
+    // INNER LOOP
+    while (true) {
+      // Check if max. number of iterations is reached
+      if (iterations_ >= maxIterations) {
+        info_ = ComputationInfo::NoConvergence;
+        return x_;
+      }
+
+      // Solve least-squares problem in inactive set only,
+      // this step is rather trivial as moveToInactiveSet_ & moveToActiveSet_
+      // updates the QR decomposition of inactive columns A^N.
+      // solveInactiveSet_ puts the solution in y_
+      solveInactiveSet_(b);
+      ++iterations_;  // The solve is expensive, so that is what we count as an iteration.
+
+      // Check feasability...
+      bool feasible = true;
+      Scalar alpha = NumTraits<Scalar>::highest();
+      Index infeasibleIdx = -1;  // Which variable became infeasible first.
+      for (Index i = 0; i < numInactive_; i++) {
+        Index idx = index_sets_[i];
+        if (y_(idx) < 0) {
+          // t should always be in [0,1].
+          Scalar t = -x_(idx) / (y_(idx) - x_(idx));
+          if (alpha > t) {
+            alpha = t;
+            infeasibleIdx = i;
+            feasible = false;
+          }
+        }
+      }
+      eigen_assert(feasible || 0 <= infeasibleIdx);
+
+      // If solution is feasible, exit to outer loop
+      if (feasible) {
+        x_ = y_;
+        break;
+      }
+
+      // Infeasible solution -> interpolate to feasible one
+      for (Index i = 0; i < numInactive_; i++) {
+        Index idx = index_sets_[i];
+        x_(idx) += alpha * (y_(idx) - x_(idx));
+      }
+
+      // Remove these indices from the inactive set and update QR decomposition
+      moveToActiveSet_(infeasibleIdx);
+    }
+  }
+}
+
+template <typename MatrixType>
+void NNLS<MatrixType>::moveToInactiveSet_(Index idx) {
+  // Update permutation matrix:
+  std::swap(index_sets_(idx), index_sets_(numInactive_));
+  numInactive_++;
+
+  // Perform rank-1 update of the QR decomposition stored in QR_ & qrCoeff_
+  internal::householder_qr_inplace_update(QR_, qrCoeffs_, A_.col(index_sets_(numInactive_ - 1)), numInactive_ - 1,
+                                          tempSolutionVector_.data());
+}
+
+template <typename MatrixType>
+void NNLS<MatrixType>::moveToActiveSet_(Index idx) {
+  // swap index with last inactive one & reduce number of inactive columns
+  std::swap(index_sets_(idx), index_sets_(numInactive_ - 1));
+  numInactive_--;
+  // Update QR decomposition starting from the removed index up to the end [idx, ..., numInactive_]
+  for (Index i = idx; i < numInactive_; i++) {
+    Index col = index_sets_(i);
+    internal::householder_qr_inplace_update(QR_, qrCoeffs_, A_.col(col), i, tempSolutionVector_.data());
+  }
+}
+
+template <typename MatrixType>
+void NNLS<MatrixType>::solveInactiveSet_(const RhsVectorType &b) {
+  eigen_assert(numInactive_ > 0);
+
+  tempRhsVector_ = b;
+
+  // tmpRHS(0:numInactive_-1) := Q'*b
+  // tmpRHS(numInactive_:end) := useless stuff we would rather not compute at all.
+  tempRhsVector_.applyOnTheLeft(
+      householderSequence(QR_.leftCols(numInactive_), qrCoeffs_.head(numInactive_)).transpose());
+
+  // tempSol(0:numInactive_-1) := inv(R) * Q' * b
+  //  = the least-squares solution for the inactive variables.
+  tempSolutionVector_.head(numInactive_) =            //
+      QR_.topLeftCorner(numInactive_, numInactive_)   //
+          .template triangularView<Upper>()           //
+          .solve(tempRhsVector_.head(numInactive_));  //
+
+  // tempSol(numInactive_:end) := 0 = the value for the constrained variables.
+  tempSolutionVector_.tail(y_.size() - numInactive_).setZero();
+
+  // Back permute into original column order of A
+  y_.noalias() = index_sets_.asPermutation() * tempSolutionVector_.head(y_.size());
+}
+
+}  // namespace Eigen
+
+#endif  // EIGEN_NNLS_H
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index d36da7296..b7661e70c 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -60,6 +60,8 @@ else()
   ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
 endif()
 
+ei_add_test(NNLS)
+
 ei_add_test(sparse_extra   "" "")
 
 find_package(FFTW)
diff --git a/unsupported/test/NNLS.cpp b/unsupported/test/NNLS.cpp
new file mode 100644
index 000000000..4b8ffa8b2
--- /dev/null
+++ b/unsupported/test/NNLS.cpp
@@ -0,0 +1,472 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) Essex Edwards <essex.edwards@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_RUNTIME_NO_MALLOC
+
+#include "main.h"
+#include <unsupported/Eigen/NNLS>
+
+/// Check that 'x' solves the NNLS optimization problem `min ||A*x-b|| s.t. 0 <= x`.
+/// The \p tolerance parameter is the absolute tolerance on the gradient, A'*(A*x-b).
+template <typename MatrixType, typename VectorB, typename VectorX, typename Scalar>
+static void verify_nnls_optimality(const MatrixType &A, const VectorB &b, const VectorX &x, const Scalar tolerance) {
+  // The NNLS optimality conditions are:
+  //
+  // * 0 = A'*A*x - A'*b - lambda
+  // * 0 <= x[i] \forall i
+  // * 0 <= lambda[i] \forall i
+  // * 0 = x[i]*lambda[i] \forall i
+  //
+  // we don't know lambda, but by assuming the first optimality condition is true,
+  // we can derive it and then check the others conditions.
+  const VectorX lambda = A.transpose() * (A * x - b);
+
+  // NNLS solutions are EXACTLY not negative.
+  VERIFY_LE(0, x.minCoeff());
+
+  // Exact lambda would be non-negative, but computed lambda might leak a little
+  VERIFY_LE(-tolerance, lambda.minCoeff());
+
+  // x[i]*lambda[i] == 0 <~~> (x[i]==0) || (lambda[i] is small)
+  VERIFY(((x.array() == Scalar(0)) || (lambda.array() <= tolerance)).all());
+}
+
+template <typename MatrixType, typename VectorB, typename VectorX>
+static void test_nnls_known_solution(const MatrixType &A, const VectorB &b, const VectorX &x_expected) {
+  using Scalar = typename MatrixType::Scalar;
+
+  using std::sqrt;
+  const Scalar tolerance = sqrt(Eigen::GenericNumTraits<Scalar>::epsilon());
+  Index max_iter = 5 * A.cols();  // A heuristic guess.
+  NNLS<MatrixType> nnls(A, max_iter, tolerance);
+  const VectorX x = nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_IS_APPROX(x, x_expected);
+  verify_nnls_optimality(A, b, x, tolerance);
+}
+
+template <typename MatrixType>
+static void test_nnls_random_problem() {
+  //
+  // SETUP
+  //
+
+  Index cols = MatrixType::ColsAtCompileTime;
+  if (cols == Dynamic) cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  Index rows = MatrixType::RowsAtCompileTime;
+  if (rows == Dynamic) rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  VERIFY_LE(cols, rows);  // To have a unique LS solution: cols <= rows.
+
+  // Make some sort of random test problem from a wide range of scales and condition numbers.
+  using std::pow;
+  using Scalar = typename MatrixType::Scalar;
+  const Scalar sqrtConditionNumber = pow(Scalar(10), internal::random<Scalar>(Scalar(0), Scalar(2)));
+  const Scalar scaleA = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
+  const Scalar minSingularValue = scaleA / sqrtConditionNumber;
+  const Scalar maxSingularValue = scaleA * sqrtConditionNumber;
+  MatrixType A(rows, cols);
+  generateRandomMatrixSvs(setupRangeSvs<Matrix<Scalar, Dynamic, 1>>(cols, minSingularValue, maxSingularValue), rows,
+                          cols, A);
+
+  // Make a random RHS also with a random scaling.
+  using VectorB = decltype(A.col(0).eval());
+  const Scalar scaleB = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
+  const VectorB b = scaleB * VectorB::Random(A.rows());
+
+  //
+  // ACT
+  //
+
+  using Scalar = typename MatrixType::Scalar;
+  using std::sqrt;
+  const Scalar tolerance =
+      sqrt(Eigen::GenericNumTraits<Scalar>::epsilon()) * b.cwiseAbs().maxCoeff() * A.cwiseAbs().maxCoeff();
+  Index max_iter = 5 * A.cols();  // A heuristic guess.
+  NNLS<MatrixType> nnls(A, max_iter, tolerance);
+  const NNLS<MatrixType>::SolutionVectorType &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+
+  // In fact, NNLS can fail on some problems, but they are rare in practice.
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  verify_nnls_optimality(A, b, x, tolerance);
+}
+
+static void test_nnls_handles_zero_rhs() {
+  //
+  // SETUP
+  //
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Zero(rows);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 1);  // 0 or 1 would be be fine for an edge case like this.
+  VERIFY_IS_EQUAL(x, VectorXd::Zero(cols));
+}
+
+static void test_nnls_handles_Mx0_matrix() {
+  //
+  // SETUP
+  //
+  const Index rows = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A(rows, 0);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 0);
+  VERIFY_IS_EQUAL(x.size(), 0);
+}
+
+static void test_nnls_handles_0x0_matrix() {
+  //
+  // SETUP
+  //
+  const MatrixXd A(0, 0);
+  const VectorXd b(0);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 0);
+  VERIFY_IS_EQUAL(x.size(), 0);
+}
+
+static void test_nnls_handles_dependent_columns() {
+  //
+  // SETUP
+  //
+  const Index rank = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE / 2);
+  const Index cols = 2 * rank;
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, rank) * MatrixXd::Random(rank, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  const double tolerance = 1e-8;
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  // What should happen when the input 'A' has dependent columns?
+  // We might still succeed. Or we might not converge.
+  // Either outcome is fine. If Success is indicated,
+  // then 'x' must actually be a solution vector.
+
+  if (nnls.info() == ComputationInfo::Success) {
+    verify_nnls_optimality(A, b, x, tolerance);
+  }
+}
+
+static void test_nnls_handles_wide_matrix() {
+  //
+  // SETUP
+  //
+  const Index cols = internal::random<Index>(2, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(2, cols - 1);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  const double tolerance = 1e-8;
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  // What should happen when the input 'A' is wide?
+  // The unconstrained least-squares problem has infinitely many solutions.
+  // Subject the the non-negativity constraints,
+  // the solution might actually be unique (e.g. it is [0,0,..,0]).
+  // So, NNLS might succeed or it might fail.
+  // Either outcome is fine. If Success is indicated,
+  // then 'x' must actually be a solution vector.
+
+  if (nnls.info() == ComputationInfo::Success) {
+    verify_nnls_optimality(A, b, x, tolerance);
+  }
+}
+
+// 4x2 problem, unconstrained solution positive
+static void test_nnls_known_1() {
+  Matrix<double, 4, 2> A(4, 2);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 2, 1> x(2);
+  A << 1, 1, 2, 4, 3, 9, 4, 16;
+  b << 0.6, 2.2, 4.8, 8.4;
+  x << 0.1, 0.5;
+
+  return test_nnls_known_solution(A, b, x);
+}
+
+// 4x3 problem, unconstrained solution positive
+static void test_nnls_known_2() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.73, 3.24, 8.31, 16.72;
+  x << 0.1, 0.5, 0.13;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x4 problem, unconstrained solution non-negative
+static void test_nnls_known_3() {
+  Matrix<double, 4, 4> A(4, 4);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 4, 1> x(4);
+
+  A << 1, 1, 1, 1, 2, 4, 8, 16, 3, 9, 27, 81, 4, 16, 64, 256;
+  b << 0.73, 3.24, 8.31, 16.72;
+  x << 0.1, 0.5, 0.13, 0;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x3 problem, unconstrained solution non-negative
+static void test_nnls_known_4() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.23, 1.24, 3.81, 8.72;
+  x << 0.1, 0, 0.13;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x3 problem, unconstrained solution indefinite
+static void test_nnls_known_5() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.13, 0.84, 2.91, 7.12;
+  // Solution obtained by original nnls() implementation in Fortran
+  x << 0.0, 0.0, 0.1106544;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+static void test_nnls_small_reference_problems() {
+  test_nnls_known_1();
+  test_nnls_known_2();
+  test_nnls_known_3();
+  test_nnls_known_4();
+  test_nnls_known_5();
+}
+
+static void test_nnls_with_half_precision() {
+  // The random matrix generation tools don't work with `half`,
+  // so here's a simpler setup mostly just to check that NNLS compiles & runs with custom scalar types.
+
+  using Mat = Matrix<half, 8, 2>;
+  using VecB = Matrix<half, 8, 1>;
+  using VecX = Matrix<half, 2, 1>;
+  Mat A = Mat::Random();  // full-column rank with high probability.
+  VecB b = VecB::Random();
+
+  NNLS<Mat> nnls(A, 20, half(1e-2f));
+  const VecX x = nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  verify_nnls_optimality(A, b, x, half(1e-1));
+}
+
+static void test_nnls_special_case_solves_in_zero_iterations() {
+  // The particular NNLS algorithm that is implemented starts with all variables
+  // in the active set.
+  // This test builds a system where all constraints are active at the solution,
+  // so that initial guess is already correct.
+  //
+  // If the implementation changes to another algorithm that does not have this property,
+  // then this test will need to change (e.g. starting from all constraints inactive,
+  // or using ADMM, or an interior point solver).
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  const VectorXd b = VectorXd::Random(m);
+  // With high probability, this is full column rank, which we need for uniqueness.
+  MatrixXd A = MatrixXd::Random(m, n);
+  // Make every column of `A` such that adding it to the active set only /increases/ the objective,
+  // this ensuring the NNLS solution is all zeros.
+  const VectorXd alignment = -(A.transpose() * b).cwiseSign();
+  A = A * alignment.asDiagonal();
+
+  NNLS<MatrixXd> nnls(A);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY(nnls.iterations() == 0);
+}
+
+static void test_nnls_special_case_solves_in_n_iterations() {
+  // The particular NNLS algorithm that is implemented starts with all variables
+  // in the active set and then adds one variable to the inactive set each iteration.
+  // This test builds a system where all variables are inactive at the solution,
+  // so it should take 'n' iterations to get there.
+  //
+  // If the implementation changes to another algorithm that does not have this property,
+  // then this test will need to change (e.g. starting from all constraints inactive,
+  // or using ADMM, or an interior point solver).
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  // With high probability, this is full column rank, which we need for uniqueness.
+  const MatrixXd A = MatrixXd::Random(m, n);
+  const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1;  // all positive.
+  const VectorXd b = A * x;
+
+  NNLS<MatrixXd> nnls(A);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY(nnls.iterations() == n);
+}
+
+static void test_nnls_returns_NoConvergence_when_maxIterations_is_too_low() {
+  // Using the special case that takes `n` iterations,
+  // from `test_nnls_special_case_solves_in_n_iterations`,
+  // we can set max iterations too low and that should cause the solve to fail.
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  // With high probability, this is full column rank, which we need for uniqueness.
+  const MatrixXd A = MatrixXd::Random(m, n);
+  const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1;  // all positive.
+  const VectorXd b = A * x;
+
+  NNLS<MatrixXd> nnls(A);
+  const Index max_iters = n - 1;
+  nnls.setMaxIterations(max_iters);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::NoConvergence);
+  VERIFY(nnls.iterations() == max_iters);
+}
+
+static void test_nnls_default_maxIterations_is_twice_column_count() {
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+
+  NNLS<MatrixXd> nnls(A);
+
+  VERIFY_IS_EQUAL(nnls.maxIterations(), 2 * cols);
+}
+
+static void test_nnls_does_not_allocate_during_solve() {
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  NNLS<MatrixXd> nnls(A);
+
+  internal::set_is_malloc_allowed(false);
+  nnls.solve(b);
+  internal::set_is_malloc_allowed(true);
+}
+
+static void test_nnls_repeated_calls_to_compute_and_solve() {
+  const Index cols2 = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows2 = internal::random<Index>(cols2, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A2 = MatrixXd::Random(rows2, cols2);
+  const VectorXd b2 = VectorXd::Random(rows2);
+
+  NNLS<MatrixXd> nnls;
+
+  for (int i = 0; i < 4; ++i) {
+    const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+    const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+    const MatrixXd A = MatrixXd::Random(rows, cols);
+
+    nnls.compute(A);
+    VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+
+    for (int j = 0; j < 3; ++j) {
+      const VectorXd b = VectorXd::Random(rows);
+      const VectorXd x = nnls.solve(b);
+      VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+      verify_nnls_optimality(A, b, x, 1e-4);
+    }
+  }
+}
+
+EIGEN_DECLARE_TEST(NNLS) {
+  // Small matrices with known solutions:
+  CALL_SUBTEST_1(test_nnls_small_reference_problems());
+  CALL_SUBTEST_1(test_nnls_handles_Mx0_matrix());
+  CALL_SUBTEST_1(test_nnls_handles_0x0_matrix());
+
+  for (int i = 0; i < g_repeat; i++) {
+    // Essential NNLS properties, across different types.
+    CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>());
+    CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>());
+    using MatFixed = Matrix<double, 12, 5>;
+    CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>());
+    CALL_SUBTEST_5(test_nnls_with_half_precision());
+
+    // Robustness tests:
+    CALL_SUBTEST_6(test_nnls_handles_zero_rhs());
+    CALL_SUBTEST_6(test_nnls_handles_dependent_columns());
+    CALL_SUBTEST_6(test_nnls_handles_wide_matrix());
+
+    // Properties specific to the implementation,
+    // not NNLS in general.
+    CALL_SUBTEST_7(test_nnls_special_case_solves_in_zero_iterations());
+    CALL_SUBTEST_7(test_nnls_special_case_solves_in_n_iterations());
+    CALL_SUBTEST_7(test_nnls_returns_NoConvergence_when_maxIterations_is_too_low());
+    CALL_SUBTEST_7(test_nnls_default_maxIterations_is_twice_column_count());
+    CALL_SUBTEST_8(test_nnls_repeated_calls_to_compute_and_solve());
+
+    // This test fails. It hits allocations in HouseholderSequence.h
+    // test_nnls_does_not_allocate_during_solve();
+  }
+}