atomic_four_lin_ode_reverse.hpp¶

View page source

Atomic Linear ODE Reverse Mode: Example Implementation¶

Purpose¶

The reverse routine overrides the virtual functions used by the atomic_four base; see reverse .

First Order Theory¶

We are given a vector \(w \in \B{R}^m\) and need to compute

\[\partial_x w^\R{T} z(r, x)\]

see the definition of z(t, x) . Consider the Lagrangian corresponding to \(w^\R{T} z(r, x)\) as the objective and the ODE as the constraint:

\[L(x, \lambda) = w^\R{T} z(r, x) + \int_0^r \lambda(t, x)^\R{T} [ A(x) z(t, x) - z_t (t, x) ] \R{d} t\]

where \(\lambda : \R{R} \times \B{R}^n \rightarrow \B{R}^m\) is a smooth function. If \(z(t, x)\) satisfies its ODE, then

\[\partial_x w^\R{T} z(r, x) = \partial_x L(x, \lambda)\]

We use the following integration by parts to replace the \(z_t (t, x)\) term in the integral defining \(L(x, \lambda)\):

\[- \int_0^r \lambda(t, x)^\R{T} z_t (t, x) \R{d} t = - \left. \lambda(t, x)^\R{T} z(t, x) \right|_0^r + \int_0^r \lambda_t (t, x)^\R{T} z(t, x) \R{d} t\]

Adding the condition \(\lambda(r, x) = w\), and noting that \(z(0, x) = b(x)\), we have

\[L(x, \lambda) = \lambda(0, x)^\R{T} z(0, x) + \int_0^r \lambda_t (t, x)^\R{T} z(t, x) \R{d} t + \int_0^r \lambda(t, x)^\R{T} A(x) z(t, x) \R{d} t\]
\[L(x, \lambda) = \lambda(0, x)^\R{T} b (x) + \int_0^r [ \lambda_t (t, x)^\R{T} + \lambda(t, x)^\R{T} A(x) ] z(t, x) \R{d} t\]
\[L(x, \lambda) = \lambda(0, x)^\R{T} b (x) + \int_0^r z(t, x)^\R{T} [ \lambda_t (t, x) + A(x)^\R{T} \lambda(t, x) ] \R{d} t\]

The partial derivative of \(L(x, \lambda)\) with respect to \(b_j\), (not including the dependence of \(\lambda(t, x)\) on \(x\)) is :

\[\partial_{b(j)} L(x, \lambda) = \lambda_j (0, x)\]

The partial derivative of \(L(x, \lambda)\) with respect to \(A_{i,j}\) (not including The dependence of \(\lambda(t, x)\) on \(x\)) is :

\[\partial_{A(i,j)} L(x, \lambda) = \int_0^r \partial_{A(i,j)} z(t, x)^\R{T} [ \lambda_t (t, x) + A(x)^\R{T} \lambda(t, x) ] \R{d} t + \int_0^r z_j (t, x) \lambda_i (t, x) \R{d} t\]

If \(\lambda(t, x)\) satisfies the ODE

\[0 = \lambda_t (t, x) + A(x)^\R{T} \lambda(t, x)\]

The partial derivative with respect to \(A_{i,j}\) is

\[\partial_{A(i,j)} L(x, \lambda) = \int_0^r z_j (t, x) \lambda_i (t, x) \R{d} t\]

In summary, we can compute an approximate solution for the initial value ODE:

\[z_t (t, x) = A(x) z(t, x) \W{,} z(0, x) = b(x)\]

and approximate solution for the final value ODE:

\[\lambda_t (t, x) = - A(x)^\R{T} \lambda(t, x) \W{,} \lambda(r, x) = w\]

Using the notation nnz , row , and col , We can compute an approximation for

\[\begin{split}\partial_{x(k)} w^\R{T} z(r, x) = \left\{ \begin{array}{lll} \int_0^r \lambda_i (t, x) z_j (r, x) \R{d} t & \R{where} \; i = \R{row} [k] \W{,} j = \R{col}[k] & \R{if} \; k < nnz \\ \lambda_i (0, x) & \R{where} \; i = k - nnz & \R{otherwise} % \end{array} \right.\end{split}\]

Second Order Theory¶

atomic_four_lin_ode_reverse_2 .

Simpson’s Rule¶

This example uses Simpson’s rule to approximate the integral

\[\int_0^r \lambda_i (t, x) z_j (t, x) \R{d} t\]

Any other approximation for this integral can be used.

Source¶

# include <cppad/example/atomic_four/lin_ode/lin_ode.hpp>

namespace CppAD { // BEGIN_CPPAD_NAMESPACE
// ----------------------------------------------------------------------------
// reverse override for Base
template <class Base>
bool atomic_lin_ode<Base>::reverse(
    size_t                                     call_id     ,
    const CppAD::vector<bool>&                 select_x    ,
    size_t                                     order_up    ,
    const CppAD::vector<Base>&                 taylor_x    ,
    const CppAD::vector<Base>&                 taylor_y    ,
    CppAD::vector<Base>&                       partial_x   ,
    const CppAD::vector<Base>&                 partial_y   )
{
    // order_up
    if( order_up > 0 )
        return false;
    //
    // r, step, pattern, transpose
    Base      r;
    Base      step;
    sparse_rc pattern;
    bool      transpose;
    get(call_id, r, step, pattern, transpose);
    //
    reverse_one(
        r, step, pattern, transpose, select_x, taylor_x, partial_y, partial_x
    );
    //
    return true;
}
// ---------------------------------------------------------------------------
// reverse override for AD<Base>
template <class Base>
bool atomic_lin_ode<Base>::reverse(
    size_t                                           call_id    ,
    const CppAD::vector<bool>&                       select_x   ,
    size_t                                           order_up   ,
    const CppAD::vector< CppAD::AD<Base> >&          ataylor_x  ,
    const CppAD::vector< CppAD::AD<Base> >&          ataylor_y  ,
    CppAD::vector< CppAD::AD<Base> >&                apartial_x ,
    const CppAD::vector< CppAD::AD<Base> >&          apartial_y )
{
    // order_up
    if( order_up > 0 )
        return false;
    //
    reverse_one(call_id, select_x, ataylor_x, apartial_y, apartial_x);
    //
    return true;
}
// ---------------------------------------------------------------------------
// reverse_one
// Base version of first order reverse mode calculation as in Theory above
template <class Base>
void atomic_lin_ode<Base>::reverse_one(
    const Base&                                r           ,
    const Base&                                step        ,
    const sparse_rc&                           pattern     ,
    const bool&                                transpose   ,
    const CppAD::vector<bool>&                 select_x    ,
    const CppAD::vector<Base>&                 x           ,
    const CppAD::vector<Base>&                 w           ,
    CppAD::vector<Base>&                       partial_x   )
{
    //
    // nnz
    size_t nnz    = pattern.nnz();
    //
    // m
    size_t m = w.size();
    CPPAD_ASSERT_UNKNOWN( w.size() == m );
    CPPAD_ASSERT_UNKNOWN( pattern.nr() == m );
    CPPAD_ASSERT_UNKNOWN( pattern.nc() == m );
    //
    // n
# ifndef NDEBUG
    size_t n = nnz + m;
# endif
    //
    // x, partial_x
    CPPAD_ASSERT_UNKNOWN( x.size()  == n );
    CPPAD_ASSERT_UNKNOWN( partial_x.size() == n );
    //
    // n_step
    size_t n_step = 2;
    if( step < abs(r) / Base(n_step) )
        n_step = size_t( abs(r) / step );
    while( step < abs(r) / Base(n_step) )
        ++n_step;
    if( n_step % 2 == 1 )
        ++n_step;
    CPPAD_ASSERT_UNKNOWN( n_step % 2 == 0 );
    //
    // h
    Base h = r / Base(n_step);
    //
    // x_tmp = [A, b]
    CppAD::vector<Base> x_tmp = x;
    //
    // z_all
    CppAD::vector< CppAD::vector<Base> > z_all(n_step + 1);
    //
    // z_all[0] = z(0, x)
    z_all[0].resize(m);
    for(size_t i = 0; i < m; ++i)
        z_all[0][i] = x[nnz + i];
    //
    // p
    for(size_t p = 0; p < n_step; ++p)
    {   // x_tmp = [A, z(h*p, x) ]
        for(size_t i = 0; i < nnz; ++i)
            x_tmp[nnz + i] = z_all[p][i];
        //
        // z_all[p+1] = z( h*(p+1), x)
        z_all[p+1].resize(m);
        base_solver(h, step, pattern, transpose, x_tmp, z_all[p+1]);
    }
    // lambda_previous = lambda(r, x) = w
    CppAD::vector<Base> lambda_previous = w;
    //
    // lambda_middle, lambda_next
    CppAD::vector<Base> lambda_middle(m), lambda_next(m);
    //
    // partial_x
    // partial_A L(x, lambda)
    for(size_t k = 0; k < nnz; ++k)
        partial_x[k] = Base(0.0);
    //
    // p
    size_t p = n_step - 1;
    while(p)
    {   CPPAD_ASSERT_UNKNOWN( p % 2 == 1 );
        //
        // x_tmp = [ A, lambda( (p+1)*h, x ) ]
        for(size_t i = 0; i < m; ++i)
            x_tmp[nnz + i] = lambda_previous[i];
        //
        // lambda_middle = lambda(p*h, x)
        // We convert the final value ODE to an initial value ODE by changing
        // the sign of A^T and changing limits from [(p+1)*h, p*h] -> [0, h].
        base_solver(h, step, pattern, ! transpose, x_tmp, lambda_middle);
        //
        // x_tmp = [ A, lambda(p*h, x)]
        for(size_t i = 0; i < m; ++i)
            x_tmp[nnz + i] = lambda_middle[i];
        //
        // lambda_next = lambda((p-1), x)
        base_solver(h, step, pattern, ! transpose, x_tmp, lambda_next);
        //
        // partial_x
        // partail_A L(x, lambda)
        for(size_t k = 0; k < nnz; ++k) if( select_x[k] )
        {   size_t i = pattern.row()[k];
            size_t j = pattern.col()[k];
            if( transpose )
                std::swap(i, j);
            //
            // sum  = lambda_i ((p+1)*h, x) * z_j ((p+1)*h, x)
            Base sum = lambda_previous[i] * z_all[p+1][j];
            //
            // sum += 4 * lambad_i(p*h, x) * z_j(p*h, x)
            sum += Base(4.0) * lambda_middle[i] * z_all[p][j];
            //
            // sum += lambda_i ((p-1)*h, x) * z_j ((p-1)*h, x)
            sum += lambda_next[i] * z_all[p-1][j];
            //
            // Simpson's rule for int_0^2*h lambda_i (t, x) z_j (t, x) dt
            Base integral = h * sum / Base(3.0);
            //
            // partial_{A(i,j)}
            partial_x[k] += integral;
        }
        //
        // lambda_previous
        lambda_previous = lambda_next;
        //
        // p
        if( p == 1 )
            p = 0;
        else
            p -= 2;
    }
    //
    // partial_x
    // partial_b L(x, lambda) = lambda(0, x)
    for(size_t i = 0; i < m; ++i)
    {   // partial_{b(i)}
        partial_x[nnz + i] = lambda_next[i];
    }
    //
    return;
}
// ---------------------------------------------------------------------------
// reverse_one
// AD version of first order reverse mode calculation as in Theory above
template <class Base>
void atomic_lin_ode<Base>::reverse_one(
    const size_t&                              call_id     ,
    const CppAD::vector<bool>&                 select_x    ,
    const CppAD::vector< CppAD::AD<Base> >&    ax          ,
    const CppAD::vector< CppAD::AD<Base> >&    aw          ,
    CppAD::vector< CppAD::AD<Base> >&          apartial_x  )
{   typedef CppAD::AD<Base> ad_base;
    //
    //
    // r, step, pattern, transpose
    Base       r;
    Base       step;
    sparse_rc pattern;
    bool      transpose;
    get(call_id, step, r, pattern, transpose);
    //
    // nnz
    size_t nnz = pattern.nnz();
    //
    // m
    size_t m = aw.size();
    CPPAD_ASSERT_UNKNOWN( aw.size() == m );
    CPPAD_ASSERT_UNKNOWN( pattern.nr() == m );
    CPPAD_ASSERT_UNKNOWN( pattern.nc() == m );
    //
    // n
# ifndef NDEBUG
    size_t n = nnz + m;
# endif
    //
    // ax, apartial_x
    CPPAD_ASSERT_UNKNOWN( ax.size()  == n );
    CPPAD_ASSERT_UNKNOWN( apartial_x.size() == n );
    //
    // n_step
    size_t n_step = 2;
    if( step < abs(r) / Base(n_step) )
        n_step = size_t( abs(r) / step );
    while( step < abs(r) / Base(n_step) )
        ++n_step;
    if( n_step % 2 == 1 )
        ++n_step;
    CPPAD_ASSERT_UNKNOWN( n_step % 2 == 0 );
    //
    // h
    Base h = r / Base(n_step);
    //
    // ax_tmp = [A, b]
    CppAD::vector<ad_base> ax_tmp = ax;
    //
    // az_all
    CppAD::vector< CppAD::vector<ad_base> > az_all(n_step + 1);
    //
    // az_all[0] = z(0, x)
    az_all[0].resize(m);
    for(size_t i = 0; i < m; ++i)
        az_all[0][i] = ax[nnz + i];
    //
    // call_id_1
    size_t call_id_1 = (*this).set(h, step, pattern, transpose);
    //
    // p
    for(size_t p = 0; p < n_step; ++p)
    {   // ax_tmp = [A, z(h*p, x) ]
        for(size_t i = 0; i < nnz; ++i)
            ax_tmp[nnz + i] = az_all[p][i];
        //
        // az_all[p+1] = az( h*(p+1), x)
        // This interface requires a separate atomic function call for each
        // step in the Simpson's rule integration. Perhaps it would be more
        // efficient (but more complicated) to have an option whereby one call
        // that returns all the values in az_all expect for az_all[0].
        az_all[p+1].resize(m);
        (*this)(call_id_1, ax_tmp, az_all[p+1]);
    }
    // alambda_previous = lambda(r, x) = w
    CppAD::vector<ad_base> alambda_previous = aw;
    //
    // alambda_middle, alambda_next
    CppAD::vector<ad_base> alambda_middle(m), alambda_next(m);
    //
    // apartial_x
    // apartial_A L(x, lambda)
    for(size_t k = 0; k < nnz; ++k)
        apartial_x[k] = ad_base(0.0);
    //
    // call_id_2
    size_t call_id_2 = (*this).set(h, step, pattern, ! transpose);
    //
    //
    // p
    size_t p = n_step - 1;
    while(p)
    {   CPPAD_ASSERT_UNKNOWN( p % 2 == 1 );
        //
        // ax_tmp = [ A, lambda( (p+1)*h, x ) ]
        for(size_t i = 0; i < m; ++i)
            ax_tmp[nnz + i] = alambda_previous[i];
        //
        // alambda_middle = lambda(p*h, x)
        // We convert the final value ODE to an initial value ODE by changing
        // the sign of A^T and changing limits from [(p+1)*h, p*h] -> [0, h].
        (*this)(call_id_2, ax_tmp, alambda_middle);
        //
        // ax_tmp = [ A, lambda(p*h, x)]
        for(size_t i = 0; i < m; ++i)
            ax_tmp[nnz + i] = alambda_middle[i];
        //
        // alambda_next = lambda((p-1), x)
        (*this)(call_id_2, ax_tmp, alambda_next);
        //
        // apartial_x
        // partail_A L(x, lambda)
        for(size_t k = 0; k < nnz; ++k) if( select_x[k] )
        {   size_t i = pattern.row()[k];
            size_t j = pattern.col()[k];
            if( transpose )
                std::swap(i, j);
            //
            // asum  = lambda_i ((p+1)*h, x) * z_j ((p+1)*h, x)
            ad_base asum = alambda_previous[i] * az_all[p+1][j];
            //
            // asum += 4 * lambad_i(p*h, x) * z_j(p*h, x)
            asum += ad_base(4.0) * alambda_middle[i] * az_all[p][j];
            //
            // asum += lambda_i ((p-1)*h, x) * z_j ((p-1)*h, x)
            asum += alambda_next[i] * az_all[p-1][j];
            //
            // Simpson's rule for int_0^2*h lambda_i (t, x) z_j (t, x) dt
            ad_base aintegral = h * asum / ad_base(3.0);
            //
            // apartial_x
            // partial_{A(i,j)}
            apartial_x[k] += aintegral;
        }
        //
        // alambda_previous
        alambda_previous = alambda_next;
        //
        // p
        if( p == 1 )
            p = 0;
        else
            p -= 2;
    }
    //
    // apartial_x
    // partial_b L(x, lambda) = lambda(0, x)
    for(size_t i = 0; i < m; ++i)
    {   // partial_{b(i)}
        apartial_x[nnz + i] = alambda_next[i];
    }
    //
    return;
}
} // END_CPPAD_NAMESPACE