openmp_get_started.cpp

View page source

Getting Started Using OpenMP Threads With CppAD

in_parallel

see in_parallel .

thread_number

see thread_num .

ADFun Constructor

If you use the Sequence Constructor for the original function, you will need to clear the Taylor coefficient memory associated with the function using capacity_order ; e.g.

CppAD::ADFun fun(ax, ay);
fun.capacity_order(0);

If you do not free the Taylor coefficient memory in fun , the function assignments will allocate zero order Taylor coefficients for each function in fun_thread using thread zero. Depending on what you do in parallel mode, you may attempt to free that memory using another thread. For example, if you change USE_DEFAULT_ADFUN_CONSTRUCTOR from 1 to 0, you will get the message:

Attempt to return memory for a different thread while in parallel mode

Source Code

# include <cppad/cppad.hpp>
# include <omp.h>

# define USE_DEFAULT_ADFUN_CONSTRUCTOR 1

namespace {
    //
    // d_vector, ad_vector, fun_vector
    typedef CPPAD_TESTVECTOR(double)                  d_vector;
    typedef CPPAD_TESTVECTOR( CppAD::AD<double> )    ad_vector;
    typedef CPPAD_TESTVECTOR( CppAD::ADFun<double> ) fun_vector;
    //
    // in_parallel
    bool in_parallel(void)
    {   return omp_in_parallel() != 0;
    }
    //
    // thread_number
    size_t thread_number(void)
    {   return static_cast<size_t>( omp_get_thread_num() );
    }
    //
    // partial
    double partial(
        CppAD::ADFun<double>& f, size_t j, const d_vector& x
    )
    {   // f
        // This will cause an assert if Taylor coefficients were allocated
        // by a different thread.
        f.capacity_order(0);
        //
        size_t nx = x.size();
        d_vector dx(nx), dy(1);
        for(size_t k = 0; k < nx; ++k)
            dx[k] = 0.0;
        dx[j] = 1.0;
        f.Forward(0, x);
        dy = f.Forward(1, dx);
        return dy[0];
    }
}
bool get_started(void)
{   // ok
    bool ok = true;
    //
    // eps99
    double eps99 = 99.0 * std::numeric_limits<double>::epsilon();
    //
    // nx, ax
    size_t nx = 10;
    ad_vector ax(nx);
    for(size_t j = 0; j < nx; ++j)
        ax[j] = 1.0;
    CppAD::Independent(ax);
    //
    // fun
    ad_vector  ay(1);
    ay[0] = ax[0];
    for(size_t j = 1; j < nx; ++j)
        ay[0] *= ax[j];
# if USE_DEFAULT_ADFUN_CONSTRUCTOR
    CppAD::ADFun<double> fun;
    fun.Dependent(ax, ay);
# else
    // This allocates memory for first order Taylor coefficients using thread 0.
    // An assert will occur at f.capacity_order(0) in run_one_thread when
    // it is called by a different thread.
    CppAD::ADFun<double> fun(ax, ay);
# endif
    //
    // num_threads, f_thread
    size_t num_threads = 4;
    fun_vector f_thread(num_threads);
    for(size_t i = 0; i < num_threads; ++i)
        f_thread[i] = fun;
    //
    // x
    d_vector x(nx);
    for(size_t j = 0; j < nx; ++j)
        ax[j] = 1.0 + 1.0 / double(j+1);
    //
    // parallel_setup
    omp_set_num_threads( int(num_threads) );
    ok &= ! in_parallel();
    CppAD::thread_alloc::parallel_setup(
        num_threads, in_parallel, thread_number
    );
    //
    // parallel_ad
    CppAD::parallel_ad<double>();
    //
    // hold_memory
    // optional and may improve speed if you do a lot of memory allocation
    CppAD::thread_alloc::hold_memory(true);
    //
    // Jac
    ad_vector Jac(nx);
    //
    // j
    // OpenMP does not allow one to use a size_t here.
    int int_j;
# pragma omp parallel for
    for(int_j = 0; int_j < int(nx); ++int_j)
    {   size_t j                 = size_t(int_j);
        size_t thread_num        = thread_number();
        CppAD::ADFun<double>& f  = f_thread[thread_num];
        Jac[j] = partial(f, j, x);
    }
    //
    // hold_memory
    // free memory for other threads before this (the master thread)
    CppAD::thread_alloc::parallel_setup(1, nullptr, nullptr);
    ok &= ! in_parallel();
    CppAD::thread_alloc::hold_memory(false);
    for(size_t i = 1; i < num_threads; ++i)
        CppAD::thread_alloc::free_available(i);
    CppAD::thread_alloc::free_available(0);
    //
    // j
    for(int_j = 0; int_j < int(nx); ++int_j)
    {   size_t j = size_t(int_j);
        //
        // check
        double check = 1.0;
        for(size_t k = 0; k < nx; ++k)
            if(k != j)
                check *= x[j];
        //
        // ok
        ok &= CppAD::NearEqual(Jac[j], check, eps99, eps99);
    }
    return ok;
}