ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/linalg/qr.hpp Source File

Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_QR_HPP
00002 #define VIENNACL_LINALG_QR_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #include <utility>
00025 #include <iostream>
00026 #include <fstream>
00027 #include <string>
00028 #include <algorithm>
00029 #include <vector>
00030 #include <math.h>
00031 #include <cmath>
00032 #include "boost/numeric/ublas/vector.hpp"
00033 #include "boost/numeric/ublas/matrix.hpp"
00034 #include "boost/numeric/ublas/matrix_proxy.hpp"
00035 #include "boost/numeric/ublas/io.hpp"
00036 #include "boost/numeric/ublas/matrix_expression.hpp"
00037 
00038 #include "viennacl/matrix.hpp"
00039 #include "viennacl/linalg/prod.hpp"
00040 
00041 namespace viennacl
00042 {
00043     namespace linalg
00044     {
00045       
00046         // orthogonalises j-th column of A
00047         template <typename MatrixType, typename VectorType>
00048         typename MatrixType::value_type setup_householder_vector(MatrixType const & A, VectorType & v, size_t j)
00049         {
00050           typedef typename MatrixType::value_type   ScalarType;
00051           
00052           //compute norm of column below diagonal:
00053           ScalarType sigma = 0;
00054           ScalarType beta = 0;
00055           for (size_t k = j+1; k<A.size1(); ++k)
00056             sigma += A(k, j) * A(k, j);
00057 
00058           //get v from A:
00059           for (size_t k = j+1; k<A.size1(); ++k)
00060             v[k] = A(k, j);
00061           
00062           if (sigma == 0)
00063             return 0;
00064           else
00065           {
00066             ScalarType mu = sqrt(sigma + A(j,j)*A(j,j));
00067             //std::cout << "mu: " << mu << std::endl;
00068             //std::cout << "sigma: " << sigma << std::endl;
00069             
00070             ScalarType v1;
00071             if (A(j,j) <= 0)
00072               v1 = A(j,j) - mu;
00073             else
00074               v1 = -sigma / (A(j,j) + mu);
00075             
00076             beta = 2.0 * v1 * v1 / (sigma + v1 * v1);
00077             
00078             //divide v by its diagonal element v[j]
00079             v[j] = 1;
00080             for (size_t k = j+1; k<A.size1(); ++k)
00081               v[k] /= v1;
00082           }
00083             
00084           return beta;
00085         }
00086 
00087         // Apply (I - beta v v^T) to the k-th column of A, where v is the reflector starting at j-th row/column
00088         template <typename MatrixType, typename VectorType, typename ScalarType>
00089         void householder_reflect(MatrixType & A, VectorType & v, ScalarType beta, size_t j, size_t k)
00090         {
00091           ScalarType v_in_col = A(j,k);
00092           for (size_t i=j+1; i<A.size1(); ++i)
00093             v_in_col += v[i] * A(i,k);
00094 
00095           for (size_t i=j; i<A.size1(); ++i)
00096             A(i,k) -= beta * v_in_col * v[i];
00097         }
00098 
00099         // Apply (I - beta v v^T) to A, where v is the reflector starting at j-th row/column
00100         template <typename MatrixType, typename VectorType, typename ScalarType>
00101         void householder_reflect(MatrixType & A, VectorType & v, ScalarType beta, size_t j)
00102         {
00103           size_t column_end = A.size2();
00104           
00105           for (size_t k=j; k<column_end; ++k) //over columns
00106             householder_reflect(A, v, beta, j, k);
00107         }
00108         
00109         
00110         template <typename MatrixType, typename VectorType>
00111         void write_householder_to_A(MatrixType & A, VectorType const & v, size_t j)
00112         {
00113           for (size_t i=j+1; i<A.size1(); ++i)
00114             A(i,j) = v[i];
00115         }
00116         
00117         
00118         //takes an inplace QR matrix A and generates Q and R explicitly
00119         template <typename MatrixType, typename VectorType>
00120         void recoverQ(MatrixType const & A, VectorType const & betas, MatrixType & Q, MatrixType & R)
00121         {
00122           typedef typename MatrixType::value_type   ScalarType;
00123           
00124           std::vector<ScalarType> v(A.size1());
00125 
00126           Q.clear();
00127           R.clear();
00128 
00129           //
00130           // Recover R from upper-triangular part of A:
00131           //
00132           size_t i_max = std::min(R.size1(), R.size2());
00133           for (size_t i=0; i<i_max; ++i)
00134             for (size_t j=i; j<R.size2(); ++j)
00135               R(i,j) = A(i,j);
00136          
00137           //
00138           // Recover Q by applying all the Householder reflectors to the identity matrix:
00139           //
00140           for (size_t i=0; i<Q.size1(); ++i)
00141             Q(i,i) = 1.0;
00142 
00143           size_t j_max = std::min(A.size1(), A.size2());
00144           for (size_t j=0; j<j_max; ++j)
00145           {
00146             size_t col_index = j_max - j - 1;
00147             v[col_index] = 1.0;
00148             for (size_t i=col_index+1; i<A.size1(); ++i)
00149               v[i] = A(i, col_index);
00150             
00151             /*std::cout << "Recovery with beta = " << betas[col_index] << ", j=" << col_index << std::endl;
00152             std::cout << "v: ";
00153             for (size_t i=0; i<v.size(); ++i)
00154               std::cout << v[i] << ", ";
00155             std::cout << std::endl;*/
00156 
00157             if (betas[col_index] != 0)
00158               householder_reflect(Q, v, betas[col_index], col_index);
00159           }
00160         }
00161        
00162         template<typename MatrixType>
00163         std::vector<typename MatrixType::value_type> qr(MatrixType & A)
00164         {
00165           typedef typename MatrixType::value_type   ScalarType;
00166           
00167           std::vector<ScalarType> betas(A.size2());
00168           std::vector<ScalarType> v(A.size1());
00169 
00170           //copy A to Q:
00171           for (size_t j=0; j<A.size2(); ++j)
00172           {
00173              betas[j] = setup_householder_vector(A, v, j);
00174              householder_reflect(A, v, betas[j], j);
00175              write_householder_to_A(A, v, j);
00176           }
00177           
00178           return betas;
00179         }
00180         
00181         
00182         
00183         class range
00184         {
00185           public:
00186             range(size_t start, size_t end) : start_(start), end_(end) {}
00187             
00188             size_t lower() const { return start_; }
00189             size_t upper() const { return end_; }
00190             
00191           private:
00192             size_t start_;
00193             size_t end_;
00194         };
00195 
00196         template <typename MatrixType>
00197         class sub_matrix
00198         {
00199           public:
00200             typedef typename MatrixType::value_type value_type;
00201             
00202             sub_matrix(MatrixType & mat,
00203                        range row_range,
00204                        range col_range) : mat_(mat), row_range_(row_range), col_range_(col_range) {}
00205                        
00206             value_type operator()(size_t row, size_t col) const
00207             {
00208               assert(row < size1());
00209               assert(col < size2());
00210               return mat_(row + row_range_.lower(), col + col_range_.lower()); 
00211             }
00212                        
00213             size_t size1() const { return row_range_.upper() - row_range_.lower(); }
00214             size_t size2() const { return col_range_.upper() - col_range_.lower(); }
00215             
00216           private:
00217             MatrixType & mat_;
00218             range row_range_;
00219             range col_range_;
00220         };
00221 
00222 
00223         //computes C = prod(A, B)
00224         template <typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
00225         void prod_AA(MatrixTypeA const & A, MatrixTypeB const & B, MatrixTypeC & C)
00226         {
00227           assert(C.size1() == A.size1());
00228           assert(A.size2() == B.size1());
00229           assert(B.size2() == C.size2());
00230           
00231           typedef typename MatrixTypeC::value_type   ScalarType;
00232           
00233           for (size_t i=0; i<C.size1(); ++i)
00234           {
00235             for (size_t j=0; j<C.size2(); ++j)
00236             {
00237               ScalarType val = 0;
00238               for (size_t k=0; k<A.size2(); ++k)
00239                 val += A(i, k) * B(k, j);
00240               C(i, j) = val;
00241             }
00242           }
00243         }
00244         
00245         //computes C = prod(A^T, B)
00246         template <typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
00247         void prod_TA(MatrixTypeA const & A, MatrixTypeB const & B, MatrixTypeC & C)
00248         {
00249           assert(C.size1() == A.size2());
00250           assert(A.size1() == B.size1());
00251           assert(B.size2() == C.size2());
00252           
00253           typedef typename MatrixTypeC::value_type   ScalarType;
00254           
00255           for (size_t i=0; i<C.size1(); ++i)
00256           {
00257             for (size_t j=0; j<C.size2(); ++j)
00258             {
00259               ScalarType val = 0;
00260               for (size_t k=0; k<A.size1(); ++k)
00261                 val += A(k, i) * B(k, j);
00262               C(i, j) = val;
00263             }
00264           }
00265         }
00266         
00267         
00268 
00269         template<typename MatrixType>
00270         std::vector<typename MatrixType::value_type> inplace_qr(MatrixType & A)
00271         {
00272           typedef typename MatrixType::value_type   ScalarType;
00273           
00274           std::vector<ScalarType> betas(A.size2());
00275           std::vector<ScalarType> v(A.size1());
00276 
00277           size_t block_size = 90;
00278           MatrixType Y(A.size1(), block_size); Y.clear();
00279           MatrixType W(A.size1(), block_size); W.clear();
00280             
00281           //run over A in a block-wise manner:
00282           for (size_t j = 0; j < std::min(A.size1(), A.size2()); j += block_size)
00283           {
00284             //determine Householder vectors:
00285             for (size_t k = 0; k < block_size; ++k)
00286             {
00287               betas[j+k] = setup_householder_vector(A, v, j+k);
00288               for (size_t l = k; l < block_size; ++l)
00289                 householder_reflect(A, v, betas[j+k], j+k, j+l);
00290 
00291               write_householder_to_A(A, v, j+k);
00292             }
00293 
00294             //
00295             // Setup Y:
00296             //
00297             for (size_t k = 0; k < block_size; ++k)
00298             {
00299               //write Householder to Y:
00300               Y(k,k) = 1.0;
00301               for (size_t l=k+1; l<A.size1(); ++l)
00302                 Y(l,k) = A(l, j+k);
00303             }
00304             
00305             //
00306             // Setup W:
00307             //
00308             
00309             //first vector:
00310             W(j, 0) = -betas[j];
00311             for (size_t l=j+1; l<A.size1(); ++l)
00312               W(l,0) = -betas[j] * A(l, j);
00313             
00314             //k-th column of W is given by -beta * (Id + W*Y^T) v_k, where W and Y have k-1 columns
00315             for (size_t k = 1; k < block_size; ++k)
00316             {
00317               //compute Y^T v_k:
00318               std::vector<ScalarType> temp(k);  //actually of size (k \times 1)
00319               for (size_t l=0; l<k; ++l)
00320                 for (size_t n=j; n<A.size1(); ++n)
00321                   temp[l] += Y(n, l) * Y(n, k);
00322                 
00323               //compute W * temp and add to z, which is directly written to W:
00324               for (size_t n=0; n<A.size1(); ++n)
00325               {
00326                 ScalarType val = 0;
00327                 for (size_t l=0; l<k; ++l)
00328                   val += temp[l] * W(n, l);
00329                 W(n, k) = -1.0 * betas[j+k] * (Y(n, k) + val);
00330               }
00331             }
00332 
00333             //
00334             //apply (I+WY^T)^T = I + Y W^T to the remaining columns of A:
00335             //
00336             
00337             if (A.size2() - j - block_size > 0)
00338             {
00339               //temp = prod(W^T, A)
00340               
00341               MatrixType temp(block_size, A.size2() - j - block_size);
00342               
00343               boost::numeric::ublas::range A_rows(j, A.size1());
00344               boost::numeric::ublas::range A_cols(j+block_size, A.size2());
00345               boost::numeric::ublas::matrix_range<MatrixType> A_part(A, A_rows, A_cols);
00346 
00347               viennacl::matrix<ScalarType, viennacl::column_major> gpu_A_part(A_part.size1(), A_part.size2());
00348               viennacl::copy(A_part, gpu_A_part);
00349 
00350               //transfer W
00351               boost::numeric::ublas::range W_cols(0, block_size);
00352               boost::numeric::ublas::matrix_range<MatrixType> W_part(W, A_rows, W_cols);
00353               viennacl::matrix<ScalarType, viennacl::column_major> gpu_W(W_part.size1(), W_part.size2());
00354               viennacl::copy(W_part, gpu_W);
00355               
00356               viennacl::matrix<ScalarType, viennacl::column_major> gpu_temp(gpu_W.size2(), gpu_A_part.size2());
00357               gpu_temp = viennacl::linalg::prod(trans(gpu_W), gpu_A_part);
00358               
00359               
00360               
00361               //A += Y * temp:
00362               boost::numeric::ublas::range Y_cols(0, Y.size2());
00363               boost::numeric::ublas::matrix_range<MatrixType> Y_part(Y, A_rows, Y_cols);
00364               
00365               viennacl::matrix<ScalarType, viennacl::column_major> gpu_Y(Y_part.size1(), Y_part.size2());
00366               viennacl::copy(Y_part, gpu_Y);
00367 
00368               //A_part += prod(Y_part, temp);
00369               gpu_A_part += prod(gpu_Y, gpu_temp);
00370               
00371               MatrixType A_part_back(A_part.size1(), A_part.size2());
00372               viennacl::copy(gpu_A_part, A_part_back);
00373                 
00374               A_part = A_part_back;
00375               //A_part += prod(Y_part, temp);
00376             }
00377           }
00378           
00379           return betas;
00380         }
00381 
00382 
00383         template<typename MatrixType>
00384         std::vector<typename MatrixType::value_type> inplace_qr_ublas(MatrixType & A)
00385         {
00386           typedef typename MatrixType::value_type   ScalarType;
00387           
00388           std::vector<ScalarType> betas(A.size2());
00389           std::vector<ScalarType> v(A.size1());
00390 
00391           size_t block_size = 3;
00392           MatrixType Y(A.size1(), block_size); Y.clear();
00393           MatrixType W(A.size1(), block_size); W.clear();
00394             
00395           //run over A in a block-wise manner:
00396           for (size_t j = 0; j < std::min(A.size1(), A.size2()); j += block_size)
00397           {
00398             //determine Householder vectors:
00399             for (size_t k = 0; k < block_size; ++k)
00400             {
00401               betas[j+k] = setup_householder_vector(A, v, j+k);
00402               for (size_t l = k; l < block_size; ++l)
00403                 householder_reflect(A, v, betas[j+k], j+k, j+l);
00404 
00405               write_householder_to_A(A, v, j+k);
00406             }
00407 
00408             //
00409             // Setup Y:
00410             //
00411             for (size_t k = 0; k < block_size; ++k)
00412             {
00413               //write Householder to Y:
00414               Y(k,k) = 1.0;
00415               for (size_t l=k+1; l<A.size1(); ++l)
00416                 Y(l,k) = A(l, j+k);
00417             }
00418             
00419             //
00420             // Setup W:
00421             //
00422             
00423             //first vector:
00424             W(j, 0) = -betas[j];
00425             for (size_t l=j+1; l<A.size1(); ++l)
00426               W(l,0) = -betas[j] * A(l, j);
00427             
00428             //k-th column of W is given by -beta * (Id + W*Y^T) v_k, where W and Y have k-1 columns
00429             for (size_t k = 1; k < block_size; ++k)
00430             {
00431               //compute Y^T v_k:
00432               std::vector<ScalarType> temp(k);  //actually of size (k \times 1)
00433               for (size_t l=0; l<k; ++l)
00434                 for (size_t n=j; n<A.size1(); ++n)
00435                   temp[l] += Y(n, l) * Y(n, k);
00436                 
00437               //compute W * temp and add to z, which is directly written to W:
00438               for (size_t n=0; n<A.size1(); ++n)
00439               {
00440                 ScalarType val = 0;
00441                 for (size_t l=0; l<k; ++l)
00442                   val += temp[l] * W(n, l);
00443                 W(n, k) = -1.0 * betas[j+k] * (Y(n, k) + val);
00444               }
00445             }
00446 
00447             //
00448             //apply (I+WY^T)^T = I + Y W^T to the remaining columns of A:
00449             //
00450             
00451             if (A.size2() - j - block_size > 0)
00452             {
00453               //temp = prod(W^T, A)
00454               MatrixType temp(block_size, A.size2() - j - block_size);
00455               
00456               boost::numeric::ublas::range A_rows(j, A.size1());
00457               boost::numeric::ublas::range A_cols(j+block_size, A.size2());
00458               boost::numeric::ublas::matrix_range<MatrixType> A_part(A, A_rows, A_cols);
00459 
00460               boost::numeric::ublas::range W_cols(0, block_size);
00461               boost::numeric::ublas::matrix_range<MatrixType> W_part(W, A_rows, W_cols);
00462               
00463               temp = boost::numeric::ublas::prod(trans(W_part), A_part);
00464               
00465               
00466               //A += Y * temp:
00467               boost::numeric::ublas::range Y_cols(0, Y.size2());
00468               boost::numeric::ublas::matrix_range<MatrixType> Y_part(Y, A_rows, Y_cols);
00469               
00470               A_part += prod(Y_part, temp);
00471             }
00472           }
00473           
00474           return betas;
00475         }
00476 
00477 
00478         template<typename MatrixType>
00479         std::vector<typename MatrixType::value_type> inplace_qr_pure(MatrixType & A)
00480         {
00481           typedef typename MatrixType::value_type   ScalarType;
00482           
00483           std::vector<ScalarType> betas(A.size2());
00484           std::vector<ScalarType> v(A.size1());
00485 
00486           size_t block_size = 5;
00487           MatrixType Y(A.size1(), block_size); Y.clear();
00488           MatrixType W(A.size1(), block_size); W.clear();
00489             
00490           //run over A in a block-wise manner:
00491           for (size_t j = 0; j < std::min(A.size1(), A.size2()); j += block_size)
00492           {
00493             //determine Householder vectors:
00494             for (size_t k = 0; k < block_size; ++k)
00495             {
00496               betas[j+k] = setup_householder_vector(A, v, j+k);
00497               for (size_t l = k; l < block_size; ++l)
00498                 householder_reflect(A, v, betas[j+k], j+k, j+l);
00499 
00500               write_householder_to_A(A, v, j+k);
00501             }
00502 
00503             //
00504             // Setup Y:
00505             //
00506             for (size_t k = 0; k < block_size; ++k)
00507             {
00508               //write Householder to Y:
00509               Y(k,k) = 1.0;
00510               for (size_t l=k+1; l<A.size1(); ++l)
00511                 Y(l,k) = A(l, j+k);
00512             }
00513             
00514             //
00515             // Setup W:
00516             //
00517             
00518             //first vector:
00519             W(j, 0) = -betas[j];
00520             for (size_t l=j+1; l<A.size1(); ++l)
00521               W(l,0) = -betas[j] * A(l, j);
00522             
00523             //k-th column of W is given by -beta * (Id + W*Y^T) v_k, where W and Y have k-1 columns
00524             for (size_t k = 1; k < block_size; ++k)
00525             {
00526               //compute Y^T v_k:
00527               std::vector<ScalarType> temp(k);  //actually of size (k \times 1)
00528               for (size_t l=0; l<k; ++l)
00529                 for (size_t n=j; n<A.size1(); ++n)
00530                   temp[l] += Y(n, l) * Y(n, k);
00531                 
00532               //compute W * temp and add to z, which is directly written to W:
00533               for (size_t n=0; n<A.size1(); ++n)
00534               {
00535                 ScalarType val = 0;
00536                 for (size_t l=0; l<k; ++l)
00537                   val += temp[l] * W(n, l);
00538                 W(n, k) = -1.0 * betas[j+k] * (Y(n, k) + val);
00539               }
00540             }
00541 
00542             //
00543             //apply (I+WY^T)^T = I + Y W^T to the remaining columns of A:
00544             //
00545             
00546             if (A.size2() - j - block_size > 0)
00547             {
00548               //temp = prod(W^T, A)
00549               MatrixType temp(block_size, A.size2() - j - block_size);
00550               ScalarType entry = 0;
00551               for (size_t l = 0; l < temp.size2(); ++l)
00552               {
00553                 for (size_t k = 0; k < temp.size1(); ++k)
00554                 {
00555                   entry = 0;
00556                   for (size_t n = j; n < A.size1(); ++n)
00557                     entry += W(n, k) * A(n, j + block_size + l);
00558                   temp(k,l) = entry;
00559                 }
00560               }
00561               
00562               //A += Y * temp:
00563               for (size_t l = j+block_size; l < A.size2(); ++l)
00564               {
00565                 for (size_t k = j; k<A.size1(); ++k)
00566                 {
00567                   ScalarType val = 0;
00568                   for (size_t n=0; n<block_size; ++n)
00569                     val += Y(k, n) * temp(n, l-j-block_size);
00570                   A(k, l) += val;
00571                 }
00572               }
00573             }
00574           }
00575           
00576           return betas;
00577         }
00578         
00579     } //linalg
00580 } //viennacl
00581 
00582 
00583 #endif