atomic_three_mat_mul.hpp

View page source

Matrix Multiply as an Atomic Operation

See Also

atomic_two_eigen_mat_mul.hpp

Purpose

Use scalar double operations in an atomic_three operation that computes the matrix product for AD<double operations.

parameter_x

This example demonstrates the use of the parameter_x argument to the atomic_three virtual functions.

type_x

This example also demonstrates the use of the type_x argument to the atomic_three virtual functions.

Matrix Dimensions

The first three components of the argument vector ax in the call afun ( ax , ay ) are parameters and contain the matrix dimensions. This enables them to be different for each use of the same atomic function afun . These dimensions are:

ax [0]

nr_left

number of rows in the left matrix and result matrix

ax [1]

n_middle

columns in the left matrix and rows in right matrix

ax [2]

nc_right

number of columns in the right matrix and result matrix

Left Matrix

The number of elements in the left matrix is

n_left = nr_left * n_middle

The elements are in ax [3] through ax [2+ n_left ] in row major order.

Right Matrix

The number of elements in the right matrix is

n_right = n_middle * nc_right

The elements are in ax [3+ n_left ] through ax [2+ n_left + n_right ] in row major order.

Result Matrix

The number of elements in the result matrix is

n_result = nr_left * nc_right

The elements are in ay [0] through ay [ n_result -1 ] in row major order.

Start Class Definition

# include <cppad/cppad.hpp>
namespace { // Begin empty namespace
using CppAD::vector;
//
// matrix result = left * right
class atomic_mat_mul : public CppAD::atomic_three<double> {

Constructor

public:
   // ---------------------------------------------------------------------
   // constructor
   atomic_mat_mul(void) : CppAD::atomic_three<double>("mat_mul")
   { }
private:

Left Operand Element Index

Index in the Taylor coefficient matrix tx of a left matrix element.

   size_t left(
      size_t i        , // left matrix row index
      size_t j        , // left matrix column index
      size_t k        , // Taylor coeffocient order
      size_t nk       , // number of Taylor coefficients in tx
      size_t nr_left  , // rows in left matrix
      size_t n_middle , // rows in left and columns in right
      size_t nc_right ) // columns in right matrix
   {  assert( i < nr_left );
      assert( j < n_middle );
      return (3 + i * n_middle + j) * nk + k;
   }

Right Operand Element Index

Index in the Taylor coefficient matrix tx of a right matrix element.

   size_t right(
      size_t i        , // right matrix row index
      size_t j        , // right matrix column index
      size_t k        , // Taylor coeffocient order
      size_t nk       , // number of Taylor coefficients in tx
      size_t nr_left  , // rows in left matrix
      size_t n_middle , // rows in left and columns in right
      size_t nc_right ) // columns in right matrix
   {  assert( i < n_middle );
      assert( j < nc_right );
      size_t offset = 3 + nr_left * n_middle;
      return (offset + i * nc_right + j) * nk + k;
   }

Result Element Index

Index in the Taylor coefficient matrix ty of a result matrix element.

   size_t result(
      size_t i        , // result matrix row index
      size_t j        , // result matrix column index
      size_t k        , // Taylor coeffocient order
      size_t nk       , // number of Taylor coefficients in ty
      size_t nr_left  , // rows in left matrix
      size_t n_middle , // rows in left and columns in right
      size_t nc_right ) // columns in right matrix
   {  assert( i < nr_left  );
      assert( j < nc_right );
      return (i * nc_right + j) * nk + k;
   }

Forward Matrix Multiply

Forward mode multiply Taylor coefficients in tx and sum into ty (for one pair of left and right orders)

   void forward_multiply(
      size_t                 k_left   , // order for left coefficients
      size_t                 k_right  , // order for right coefficients
      const vector<double>&  tx       , // domain space Taylor coefficients
              vector<double>&  ty       , // range space Taylor coefficients
      size_t                 nr_left  , // rows in left matrix
      size_t                 n_middle , // rows in left and columns in right
      size_t                 nc_right ) // columns in right matrix
   {
      size_t nx       = 3 + (nr_left + nc_right) * n_middle;
      size_t nk       = tx.size() / nx;
# ifndef NDEBUG
      size_t ny       = nr_left * nc_right;
      assert( nk == ty.size() / ny );
# endif
      //
      size_t k_result = k_left + k_right;
      assert( k_result < nk );
      //
      for(size_t i = 0; i < nr_left; i++)
      {  for(size_t j = 0; j < nc_right; j++)
         {  double sum = 0.0;
            for(size_t ell = 0; ell < n_middle; ell++)
            {  size_t i_left  = left(
                  i, ell, k_left, nk, nr_left, n_middle, nc_right
               );
               size_t i_right = right(
                  ell, j,  k_right, nk, nr_left, n_middle, nc_right
               );
               sum           += tx[i_left] * tx[i_right];
            }
            size_t i_result = result(
               i, j, k_result, nk, nr_left, n_middle, nc_right
            );
            ty[i_result]   += sum;
         }
      }
   }

Reverse Matrix Multiply

Reverse mode partials of Taylor coefficients and sum into px (for one pair of left and right orders)

   void reverse_multiply(
      size_t                 k_left  , // order for left coefficients
      size_t                 k_right , // order for right coefficients
      const vector<double>&  tx      , // domain space Taylor coefficients
      const vector<double>&  ty      , // range space Taylor coefficients
              vector<double>&  px      , // partials w.r.t. tx
      const vector<double>&  py      , // partials w.r.t. ty
      size_t                 nr_left  , // rows in left matrix
      size_t                 n_middle , // rows in left and columns in right
      size_t                 nc_right ) // columns in right matrix
   {
      size_t nx       = 3 + (nr_left + nc_right) * n_middle;
      size_t nk       = tx.size() / nx;
# ifndef NDEBUG
      size_t ny       = nr_left * nc_right;
      assert( nk == ty.size() / ny );
# endif
      assert( tx.size() == px.size() );
      assert( ty.size() == py.size() );
      //
      size_t k_result = k_left + k_right;
      assert( k_result < nk );
      //
      for(size_t i = 0; i < nr_left; i++)
      {  for(size_t j = 0; j < nc_right; j++)
         {  size_t i_result = result(
               i, j, k_result, nk, nr_left, n_middle, nc_right
            );
            for(size_t ell = 0; ell < n_middle; ell++)
            {  size_t i_left  = left(
                  i, ell, k_left, nk, nr_left, n_middle, nc_right
               );
               size_t i_right = right(
                  ell, j,  k_right, nk, nr_left, n_middle, nc_right
               );
               // sum        += tx[i_left] * tx[i_right];
               px[i_left]    += tx[i_right] * py[i_result];
               px[i_right]   += tx[i_left]  * py[i_result];
            }
         }
      }
      return;
   }

for_type

Routine called by CppAD during afun(ax, ay) .

   // calculate type_y
   virtual bool for_type(
      const vector<double>&               parameter_x ,
      const vector<CppAD::ad_type_enum>&  type_x      ,
      vector<CppAD::ad_type_enum>&        type_y      )
   {  assert( parameter_x.size() == type_x.size() );
      bool ok = true;
      ok &= type_x[0] == CppAD::constant_enum;
      ok &= type_x[1] == CppAD::constant_enum;
      ok &= type_x[2] == CppAD::constant_enum;
      if( ! ok )
         return false;
      //
      size_t nr_left  = size_t( parameter_x[0] );
      size_t n_middle = size_t( parameter_x[1] );
      size_t nc_right = size_t( parameter_x[2] );
      //
      ok &= type_x.size() == 3 + (nr_left + nc_right) * n_middle;
      ok &= type_y.size() == n_middle * nc_right;
      if( ! ok )
         return false;
      //
      // commpute type_y
      size_t nk = 1; // number of orders
      size_t k  = 0; // order
      for(size_t i = 0; i < nr_left; ++i)
      {  for(size_t j = 0; j < nc_right; ++j)
         {  // compute type for result[i, j]
            CppAD::ad_type_enum type_yij = CppAD::constant_enum;
            for(size_t ell = 0; ell < n_middle; ++ell)
            {  // index for left(i, ell)
               size_t i_left = left(
                  i, ell, k, nk, nr_left, n_middle, nc_right
               );
               // indx for right(ell, j)
               size_t i_right = right(
                  ell, j, k, nk, nr_left, n_middle, nc_right
               );
               // multiplication on left or right by the constant zero
               // always results in a constant
               bool zero_left  = type_x[i_left] == CppAD::constant_enum;
               zero_left      &= parameter_x[i_left] == 0.0;
               bool zero_right = type_x[i_right] == CppAD::constant_enum;
               zero_right     &= parameter_x[i_right] == 0.0;
               if( ! (zero_left | zero_right) )
               {  type_yij = std::max(type_yij, type_x[i_left] );
                  type_yij = std::max(type_yij, type_x[i_right] );
               }
            }
            size_t i_result = result(
               i, j, k, nk, nr_left, n_middle, nc_right
            );
            type_y[i_result] = type_yij;
         }
      }
      return true;
   }

forward

Routine called by CppAD during Forward mode.

   virtual bool forward(
      const vector<double>&              parameter_x ,
      const vector<CppAD::ad_type_enum>& type_x ,
      size_t                             need_y ,
      size_t                             q      ,
      size_t                             p      ,
      const vector<double>&              tx     ,
      vector<double>&                    ty     )
   {  size_t n_order  = p + 1;
      size_t nr_left  = size_t( tx[ 0 * n_order + 0 ] );
      size_t n_middle = size_t( tx[ 1 * n_order + 0 ] );
      size_t nc_right = size_t( tx[ 2 * n_order + 0 ] );
# ifndef NDEBUG
      size_t nx       = 3 + (nr_left + nc_right) * n_middle;
      size_t ny       = nr_left * nc_right;
# endif
      assert( nx * n_order == tx.size() );
      assert( ny * n_order == ty.size() );
      size_t i, j, ell;

      // initialize result as zero
      size_t k;
      for(i = 0; i < nr_left; i++)
      {  for(j = 0; j < nc_right; j++)
         {  for(k = q; k <= p; k++)
            {  size_t i_result = result(
                  i, j, k, n_order, nr_left, n_middle, nc_right
               );
               ty[i_result] = 0.0;
            }
         }
      }
      for(k = q; k <= p; k++)
      {  // sum the produces that result in order k
         for(ell = 0; ell <= k; ell++)
            forward_multiply(
               ell, k - ell, tx, ty, nr_left, n_middle, nc_right
            );
      }

      // all orders are implemented, so always return true
      return true;
   }

reverse

Routine called by CppAD during Reverse mode.

   virtual bool reverse(
      const vector<double>&              parameter_x ,
      const vector<CppAD::ad_type_enum>& type_x      ,
      size_t                             p           ,
      const vector<double>&              tx          ,
      const vector<double>&              ty          ,
      vector<double>&                    px          ,
      const vector<double>&              py          )
   {  size_t n_order  = p + 1;
      size_t nr_left  = size_t( tx[ 0 * n_order + 0 ] );
      size_t n_middle = size_t( tx[ 1 * n_order + 0 ] );
      size_t nc_right = size_t( tx[ 2 * n_order + 0 ] );
# ifndef NDEBUG
      size_t nx       = 3 + (nr_left + nc_right) * n_middle;
      size_t ny       = nr_left * nc_right;
# endif
      assert( nx * n_order == tx.size() );
      assert( ny * n_order == ty.size() );
      assert( px.size() == tx.size() );
      assert( py.size() == ty.size() );

      // initialize summation
      for(size_t i = 0; i < px.size(); i++)
         px[i] = 0.0;

      // number of orders to differentiate
      size_t k = n_order;
      while(k--)
      {  // differentiate the produces that result in order k
         for(size_t ell = 0; ell <= k; ell++)
            reverse_multiply(
               ell, k - ell, tx, ty, px, py, nr_left, n_middle, nc_right
            );
      }

      // all orders are implented, so always return true
      return true;
   }

jac_sparsity

   // Jacobian sparsity routine called by CppAD
   virtual bool jac_sparsity(
      const vector<double>&               parameter_x ,
      const vector<CppAD::ad_type_enum>&  type_x      ,
      bool                                dependency  ,
      const vector<bool>&                 select_x    ,
      const vector<bool>&                 select_y    ,
      CppAD::sparse_rc< vector<size_t> >& pattern_out )
   {
      size_t n = select_x.size();
      size_t m = select_y.size();
      assert( parameter_x.size() == n );
      assert( type_x.size() == n );
      //
      size_t nr_left  = size_t( parameter_x[0] );
      size_t n_middle = size_t( parameter_x[1] );
      size_t nc_right = size_t( parameter_x[2] );
      size_t nk       = 1; // only one order
      size_t k        = 0; // order zero
      //
      // count number of non-zeros in sparsity pattern
      size_t nnz = 0;
      for(size_t i = 0; i < nr_left; ++i)
      {  for(size_t j = 0; j < nc_right; ++j)
         {  size_t i_result = result(
               i, j, k, nk, nr_left, n_middle, nc_right
            );
            if( select_y[i_result] )
            {  for(size_t ell = 0; ell < n_middle; ++ell)
               {  size_t i_left = left(
                     i, ell, k, nk, nr_left, n_middle, nc_right
                  );
                  size_t i_right = right(
                     ell, j, k, nk, nr_left, n_middle, nc_right
                  );
                  bool zero_left  =
                     type_x[i_left] == CppAD::constant_enum;
                  zero_left      &= parameter_x[i_left] == 0.0;
                  bool zero_right =
                     type_x[i_right] == CppAD::constant_enum;
                  zero_right     &= parameter_x[i_right] == 0.0;
                  if( ! (zero_left | zero_right ) )
                  {  bool var_left  =
                        type_x[i_left] == CppAD::variable_enum;
                     bool var_right =
                        type_x[i_right] == CppAD::variable_enum;
                     if( select_x[i_left] & var_left )
                        ++nnz;
                     if( select_x[i_right] & var_right )
                        ++nnz;
                  }
               }
            }
         }
      }
      //
      // fill in the sparsity pattern
      pattern_out.resize(m, n, nnz);
      size_t idx = 0;
      for(size_t i = 0; i < nr_left; ++i)
      {  for(size_t j = 0; j < nc_right; ++j)
         {  size_t i_result = result(
               i, j, k, nk, nr_left, n_middle, nc_right
            );
            if( select_y[i_result] )
            {  for(size_t ell = 0; ell < n_middle; ++ell)
               {  size_t i_left = left(
                     i, ell, k, nk, nr_left, n_middle, nc_right
                  );
                  size_t i_right = right(
                     ell, j, k, nk, nr_left, n_middle, nc_right
                  );
                  bool zero_left  =
                     type_x[i_left] == CppAD::constant_enum;
                  zero_left      &= parameter_x[i_left] == 0.0;
                  bool zero_right =
                     type_x[i_right] == CppAD::constant_enum;
                  zero_right     &= parameter_x[i_right] == 0.0;
                  if( ! (zero_left | zero_right ) )
                  {  bool var_left  =
                        type_x[i_left] == CppAD::variable_enum;
                     bool var_right =
                        type_x[i_right] == CppAD::variable_enum;
                     if( select_x[i_left] & var_left )
                        pattern_out.set(idx++, i_result, i_left);
                     if( select_x[i_right] & var_right )
                        pattern_out.set(idx++, i_result, i_right);
                  }
               }
            }
         }
      }
      assert( idx == nnz );
      //
      return true;
   }

hes_sparsity

   // Jacobian sparsity routine called by CppAD
   virtual bool hes_sparsity(
      const vector<double>&               parameter_x ,
      const vector<CppAD::ad_type_enum>&  type_x      ,
      const vector<bool>&                 select_x    ,
      const vector<bool>&                 select_y    ,
      CppAD::sparse_rc< vector<size_t> >& pattern_out )
   {
      size_t n = select_x.size();
      assert( parameter_x.size() == n );
      assert( type_x.size() == n );
      //
      size_t nr_left  = size_t( parameter_x[0] );
      size_t n_middle = size_t( parameter_x[1] );
      size_t nc_right = size_t( parameter_x[2] );
      size_t nk       = 1; // only one order
      size_t k        = 0; // order zero
      //
      // count number of non-zeros in sparsity pattern
      size_t nnz = 0;
      for(size_t i = 0; i < nr_left; ++i)
      {  for(size_t j = 0; j < nc_right; ++j)
         {  size_t i_result = result(
               i, j, k, nk, nr_left, n_middle, nc_right
            );
            if( select_y[i_result] )
            {  for(size_t ell = 0; ell < n_middle; ++ell)
               {  // i_left depends on i, ell
                  size_t i_left = left(
                     i, ell, k, nk, nr_left, n_middle, nc_right
                  );
                  // i_right depens on ell, j
                  size_t i_right = right(
                     ell, j, k, nk, nr_left, n_middle, nc_right
                  );
                  bool var_left   = select_x[i_left] &
                     (type_x[i_left] == CppAD::variable_enum);
                  bool var_right  = select_x[i_right] &
                     (type_x[i_right] == CppAD::variable_enum);
                  if( var_left & var_right )
                        nnz += 2;
               }
            }
         }
      }
      //
      // fill in the sparsity pattern
      pattern_out.resize(n, n, nnz);
      size_t idx = 0;
      for(size_t i = 0; i < nr_left; ++i)
      {  for(size_t j = 0; j < nc_right; ++j)
         {  size_t i_result = result(
               i, j, k, nk, nr_left, n_middle, nc_right
            );
            if( select_y[i_result] )
            {  for(size_t ell = 0; ell < n_middle; ++ell)
               {  size_t i_left = left(
                     i, ell, k, nk, nr_left, n_middle, nc_right
                  );
                  size_t i_right = right(
                     ell, j, k, nk, nr_left, n_middle, nc_right
                  );
                  bool var_left   = select_x[i_left] &
                     (type_x[i_left] == CppAD::variable_enum);
                  bool var_right  = select_x[i_right] &
                     (type_x[i_right] == CppAD::variable_enum);
                  if( var_left & var_right )
                  {  // Cannot possibly set the same (i_left, i_right)
                     // pair twice.
                     assert( i_left != i_right );
                     pattern_out.set(idx++, i_left, i_right);
                     pattern_out.set(idx++, i_right, i_left);
                  }
               }
            }
         }
      }
      assert( idx == nnz );
      //
      return true;
   }

rev_depend

Routine called when a function using mat_mul is optimized.

   // calculate depend_x
   virtual bool rev_depend(
      const vector<double>&              parameter_x ,
      const vector<CppAD::ad_type_enum>& type_x      ,
      vector<bool>&                      depend_x    ,
      const vector<bool>&                depend_y    )
   {  assert( parameter_x.size() == depend_x.size() );
      assert( parameter_x.size() == type_x.size() );
      bool ok = true;
      //
      size_t nr_left  = size_t( parameter_x[0] );
      size_t n_middle = size_t( parameter_x[1] );
      size_t nc_right = size_t( parameter_x[2] );
      //
      ok &= depend_x.size() == 3 + (nr_left + nc_right) * n_middle;
      ok &= depend_y.size() == n_middle * nc_right;
      if( ! ok )
         return false;
      //
      // initialize depend_x
      for(size_t ell = 0; ell < 3; ++ell)
         depend_x[ell] = true; // always need these parameters
      for(size_t ell = 3; ell < depend_x.size(); ++ell)
         depend_x[ell] = false; // initialize as false
      //
      // commpute depend_x
      size_t nk = 1; // number of orders
      size_t k  = 0; // order
      for(size_t i = 0; i < nr_left; ++i)
      {  for(size_t j = 0; j < nc_right; ++j)
         {  // check depend for result[i, j]
            size_t i_result = result(
               i, j, k, nk, nr_left, n_middle, nc_right
            );
            if( depend_y[i_result] )
            {  for(size_t ell = 0; ell < n_middle; ++ell)
               {  // index for left(i, ell)
                  size_t i_left = left(
                     i, ell, k, nk, nr_left, n_middle, nc_right
                  );
                  // indx for right(ell, j)
                  size_t i_right = right(
                     ell, j, k, nk, nr_left, n_middle, nc_right
                  );
                  bool zero_left  =
                     type_x[i_left] == CppAD::constant_enum;
                  zero_left      &= parameter_x[i_left] == 0.0;
                  bool zero_right =
                     type_x[i_right] == CppAD::constant_enum;
                  zero_right     &= parameter_x[i_right] == 0.0;
                  if( ! zero_right )
                     depend_x[i_left]  = true;
                  if( ! zero_left )
                     depend_x[i_right] = true;
               }
            }
         }
      }
      return true;
   }

End Class Definition

}; // End of mat_mul class
}  // End empty namespace