multi_newton_setup

View page source

Set Up Multi-Threaded Newton Method

Syntax

ok = multi_newton_setup (
      num_sub , xlow , xup , epsilon , max_itr , num_threads
)

Purpose

These routine does the setup for splitting finding all the zeros in an interval into separate sub-intervals, one for each thread.

Thread

It is assumed that this function is called by thread zero, and all the other threads are blocked (waiting).

num_sub

See num_sub in multi_newton_run .

xlow

See xlow in multi_newton_run .

xup

See xup in multi_newton_run .

epsilon

See epsilon in multi_newton_run .

max_itr

See max_itr in multi_newton_run .

num_threads

See num_threads in multi_newton_run .

Source

namespace {
bool multi_newton_setup(
   size_t num_sub                              ,
   double xlow                                 ,
   double xup                                  ,
   double epsilon                              ,
   size_t max_itr                              ,
   size_t num_threads                          )
{
   num_threads  = std::max(num_threads_, size_t(1));
   bool ok      = num_threads == thread_alloc::num_threads();
   ok          &= thread_alloc::thread_num() == 0;

   // inputs that are same for all threads
   epsilon_ = epsilon;
   max_itr_ = max_itr;

   // resize the work vector to accomidate the number of threads
   ok &= work_all_.size() == 0;
   work_all_.resize(num_threads);

   // length of each sub interval
   sub_length_ = (xup - xlow) / double(num_sub);

   // determine values that are specific to each thread
   size_t num_min   = num_sub / num_threads; // minimum num_sub
   size_t num_more  = num_sub % num_threads; // number that have one more
   size_t sum_num   = 0;  // sum with respect to thread of num_sub
   size_t thread_num, num_sub_thread;
   for(thread_num = 0; thread_num < num_threads; thread_num++)
   {
# if  USE_THREAD_ALLOC_FOR_WORK_ALL
      // allocate separate memory for this thread to avoid false sharing
      size_t min_bytes(sizeof(work_one_t)), cap_bytes;
      void* v_ptr = thread_alloc::get_memory(min_bytes, cap_bytes);
      work_all_[thread_num] = static_cast<work_one_t*>(v_ptr);

      // thread_alloc is a raw memory allocator; i.e., it does not call
      // the constructor for the objects it creates. The vector
      // class requires it's constructor to be called so we do it here
      new(& (work_all_[thread_num]->x) ) vector<double>();
# else
      work_all_[thread_num] = new work_one_t;
# endif

      // number of sub-intervalse for this thread
      if( thread_num < num_more  )
         num_sub_thread = num_min + 1;
      else
         num_sub_thread = num_min;

      // when thread_num == 0, xlow_thread == xlow
      double xlow_thread = xlow + double(sum_num) * sub_length_;

      // when thread_num == num_threads - 1, xup_thread = xup
      double xup_thread =
         xlow + double(sum_num + num_sub_thread) * sub_length_;
      if( thread_num == num_threads - 1 )
         xup_thread = xup;

      // update sum_num for next time through loop
      sum_num += num_sub_thread;

      // input information specific to this thread
      work_all_[thread_num]->num_sub = num_sub_thread;
      work_all_[thread_num]->xlow    = xlow_thread;
      work_all_[thread_num]->xup     = xup_thread;
      ok &= work_all_[thread_num]->x.size() == 0;

      // in case this thread does not get called
      work_all_[thread_num]->ok = false;
   }
   ok &= sum_num == num_sub;
   return ok;
}
}