#ifndef HAVE_GLOBAL_HPP
#define HAVE_GLOBAL_HPP
// Autogenerated - do not edit by hand !
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <limits>
#include <set>
#include <sstream>
#include <valarray>
#include <vector>

#include "config.hpp"
#include "radix.hpp"

/** \brief Automatic differentiation library designed for TMB
    \details See `TMBad::ADFun`.
*/
namespace TMBad {

typedef TMBAD_HASH_TYPE hash_t;
typedef TMBAD_INDEX_TYPE Index;
typedef TMBAD_SCALAR_TYPE Scalar;
typedef std::pair<Index, Index> IndexPair;
typedef TMBAD_INDEX_VECTOR IndexVector;

struct global;
/** \brief Get pointer to current global AD context (or NULL if no context is
 * active). */
global *get_glob();

template <class T>
std::ostream &operator<<(std::ostream &out, const std::vector<T> &v) {
  out << "{";
  size_t last = v.size() - 1;
  for (size_t i = 0; i < v.size(); ++i) {
    out << v[i];
    if (i != last) out << ", ";
  }
  out << "}";
  return out;
}

/** \brief Union of closed intervals */
template <class T>
struct intervals {
  struct ep : std::pair<T, bool> {
    bool left() const { return !this->second; }
    ep(T x, bool type) : std::pair<T, bool>(x, type) {}
    operator T() { return this->first; }
  };
  std::set<ep> x;
  typedef typename std::set<ep>::iterator iterator;
  /** \brief Insert new interval [a,b]
      \return `false` if union did not change. `true` otherwise.
  */
  bool insert(T a, T b) {
    ep x1(a, false);
    ep x2(b, true);
    iterator it1 = x.upper_bound(x1);
    iterator it2 = x.lower_bound(x2);

    bool insert_x1 = (it1 == x.end()) || it1->left();
    bool insert_x2 = (it2 == x.end()) || it2->left();

    bool change = (it1 != it2) || insert_x1;

    if (it1 != it2) {
      x.erase(it1, it2);
    }

    if (insert_x1) x.insert(x1);
    if (insert_x2) x.insert(x2);
    return change;
  }
  /** \brief Apply a functor to each interval */
  template <class F>
  F &apply(F &f) const {
    for (iterator it = x.begin(); it != x.end();) {
      ep a = *it;
      ++it;
      ep b = *it;
      ++it;
      f(a, b);
    }
    return f;
  }
  struct print_interval {
    void operator()(T a, T b) { Rcout << "[ " << a << " , " << b << " ] "; }
  };
  void print() {
    print_interval f;
    this->apply(f);
    Rcout << "\n";
  }
};

struct Dependencies : std::vector<Index> {
  typedef std::vector<Index> Base;
  std::vector<std::pair<Index, Index> > I;
  Dependencies();
  void clear();
  void add_interval(Index a, Index b);
  void add_segment(Index start, Index size);

  void monotone_transform_inplace(const std::vector<Index> &x);

  template <class F>
  F &apply(F &f) {
    for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
    for (size_t i = 0; i < I.size(); i++) {
      for (Index j = I[i].first; j <= I[i].second; j++) {
        f(j);
      }
    }
    return f;
  }

  template <class F>
  F &apply_if_not_visited(F &f, intervals<Index> &visited) {
    for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
    for (size_t i = 0; i < I.size(); i++) {
      if (visited.insert(I[i].first, I[i].second)) {
        for (Index j = I[i].first; j <= I[i].second; j++) {
          f(j);
        }
      }
    }
    return f;
  }

  bool any(const std::vector<bool> &x) const;
};

/** \brief Define `segment_ref` array to access inside `ForwardArgs`
    or `ReverseArgs` */
enum ArrayAccess { x_read, y_read, y_write, dx_read, dx_write, dy_read };
template <class Args, ArrayAccess What>
struct Accessor {};
template <class Args>
struct Accessor<Args, x_read> {
  typename Args::value_type operator()(const Args &args, Index j) const {
    return args.x(j);
  }
};
template <class Args>
struct Accessor<Args, y_read> {
  typename Args::value_type operator()(const Args &args, Index j) const {
    return args.y(j);
  }
};
template <class Args>
struct Accessor<Args, y_write> {
  typename Args::value_type &operator()(Args &args, Index j) {
    return args.y(j);
  }
};
template <class Args>
struct Accessor<Args, dx_read> {
  typename Args::value_type operator()(const Args &args, Index j) const {
    return args.dx(j);
  }
};
template <class Args>
struct Accessor<Args, dx_write> {
  typename Args::value_type &operator()(Args &args, Index j) {
    return args.dx(j);
  }
};
template <class Args>
struct Accessor<Args, dy_read> {
  typename Args::value_type operator()(const Args &args, Index j) const {
    return args.dy(j);
  }
};

/** \brief Provide *inplace* read access to value *or* derivative
    arrays
    \note The contained references might go out of scope.
    \warning Use with caution!
*/
template <class T>
struct IndirectAccessor {
  const std::vector<T> &x;
  const std::vector<Index> &i;
  Index tail;
  IndirectAccessor(const std::vector<T> &x, const std::vector<Index> &i,
                   Index tail = 0)
      : x(x), i(i), tail(tail) {}
  T operator[](size_t j) const { return i[j] < tail ? 0 : x[i[j]]; }
  size_t size() const { return i.size(); }
  operator std::vector<T>() const {
    std::vector<T> ans(i.size());
    for (size_t j = 0; j < ans.size(); j++) ans[j] = (*this)[j];
    return ans;
  }
};

template <class T>
struct UpdatingAccess {
  T &x;
  T &operator+=(const T &other);
  T &operator-=(const T &other);
  UpdatingAccess(T &other) : x(other) {}
  operator T &() { return x; }
};

/** \brief Provide read/write access to an array segment

    This class gives a common interface to vectorized array access
    during forward or reverse sweeps.  The purpose is to reduce the
    amount of manually written loops *and* to provide a compile time
    check for access permissions.
*/
template <class Args, ArrayAccess What>
struct segment_ref {
  typedef typename Args::value_type Type;
  Accessor<Args, What> element_access;
  Args args;
  Index from, n;
  segment_ref(const Args &args, Index from, Index n)
      : args(args), from(from), n(n) {}
  template <class Other>
  operator Other() {
    Other ans(n);
    for (size_t i = 0; i < n; i++) {
      ans[i] = element_access(args, from + i);
    }
    return ans;
  }
  Type operator[](Index i) const { return element_access(args, from + i); }
  size_t size() const { return n; }
  template <class Other>
  segment_ref &operator=(const Other &other) {
    for (size_t i = 0; i < n; i++) {
      element_access(args, from + i) = other[i];
    }
    return *this;
  }
  template <class Other>
  segment_ref &operator+=(const Other &other) {
    for (size_t i = 0; i < n; i++) {
      element_access(args, from + i) += other[i];
    }
    return *this;
  }
  template <class Other>
  segment_ref &operator-=(const Other &other) {
    for (size_t i = 0; i < n; i++) {
      element_access(args, from + i) -= other[i];
    }
    return *this;
  }
};

/** \brief Argument class to handle array access of operator inputs and outputs.
    - Values (`global::values`) and derivatives (`global::derivs`) are
      stored as two contiguous arrays of the same size.
    - Operator inputs can be scattered and thus require indirect lookup.
    - Operator outputs are always contiguous and thus do not require
      any indirect lookup.
      \warning An `Args` class instance becomes invalid if
      `global::inputs` is modified.
*/
template <class dummy = void>
struct Args {
  /** \brief Array for indirect access of operator inputs */
  const Index *inputs;
  /** \brief Input/output pointers.
      - `ptr.first` points into the `input` array.
      - `ptr.second` points into the actual value/derivative array
  */
  IndexPair ptr;
  /** \brief Get variable index of j'th input to current operator */
  Index input(Index j) const { return inputs[ptr.first + j]; }
  /** \brief Get variable index of j'th output of current operator */
  Index output(Index j) const { return ptr.second + j; }
  Args(const IndexVector &inputs) : inputs(inputs.data()) {
    ptr.first = 0;
    ptr.second = 0;
  }
};
/** \brief Access input/output values during a forward pass. Write access
    granted for the output value only.
    \warning A `ForwardArgs` class instance becomes invalid if
    `global::inputs` or `global::values` are modified.
*/
template <class Type>
struct ForwardArgs : Args<> {
  typedef std::vector<Type> TypeVector;
  typedef Type value_type;
  Type *values;
  global *glob_ptr;
  /** \brief j'th input variable of this operator */
  Type x(Index j) const { return values[input(j)]; }
  /** \brief j'th output variable of this operator */
  Type &y(Index j) { return values[output(j)]; }
  /** \brief pointer version - use with caution. */
  Type *x_ptr(Index j) { return &values[input(j)]; }
  /** \brief pointer version - use with caution. */
  Type *y_ptr(Index j) { return &values[output(j)]; }
  /** \brief segment version */
  segment_ref<ForwardArgs, x_read> x_segment(Index from, Index size) {
    return segment_ref<ForwardArgs, x_read>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ForwardArgs, y_write> y_segment(Index from, Index size) {
    return segment_ref<ForwardArgs, y_write>(*this, from, size);
  }
  ForwardArgs(const IndexVector &inputs, TypeVector &values,
              global *glob_ptr = NULL)
      : Args<>(inputs), values(values.data()), glob_ptr(glob_ptr) {}
};
/** \brief Access input/output values and derivatives during a reverse
    pass. Write access granted for the input derivative only.
    \warning A `ReverseArgs` class instance becomes invalid if
    `global::inputs`, `global::values` or `global::derivs` are
    modified.
*/
template <class Type>
struct ReverseArgs : Args<> {
  typedef std::vector<Type> TypeVector;
  typedef Type value_type;
  Type *values;
  Type *derivs;
  global *glob_ptr;
  /** \brief j'th input variable of this operator */
  Type x(Index j) const { return values[input(j)]; }
  /** \brief j'th output variable of this operator */
  Type y(Index j) const { return values[output(j)]; }
  /** \brief Partial derivative of end result wrt. j'th input variable of
      this operator */
  UpdatingAccess<Type> dx(Index j) { return derivs[input(j)]; }
  /** \brief Partial derivative of end result wrt. j'th output variable of
      this operator */
  Type dy(Index j) const { return derivs[output(j)]; }
  /** \brief pointer version - use with caution. */
  Type *x_ptr(Index j) { return &values[input(j)]; }
  /** \brief pointer version - use with caution. */
  Type *y_ptr(Index j) { return &values[output(j)]; }
  /** \brief pointer version - use with caution. */
  Type *dx_ptr(Index j) { return &derivs[input(j)]; }
  /** \brief pointer version - use with caution. */
  Type *dy_ptr(Index j) { return &derivs[output(j)]; }
  /** \brief segment version */
  segment_ref<ReverseArgs, x_read> x_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, x_read>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ReverseArgs, y_read> y_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, y_read>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ReverseArgs, dx_write> dx_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, dx_write>(*this, from, size);
  }
  /** \brief segment version */
  segment_ref<ReverseArgs, dy_read> dy_segment(Index from, Index size) {
    return segment_ref<ReverseArgs, dy_read>(*this, from, size);
  }
  ReverseArgs(const IndexVector &inputs, TypeVector &values, TypeVector &derivs,
              global *glob_ptr = NULL)
      : Args<>(inputs),
        values(values.data()),
        derivs(derivs.data()),
        glob_ptr(glob_ptr) {
    ptr.first = (Index)inputs.size();
    ptr.second = (Index)values.size();
  }
};

template <>
struct ForwardArgs<bool> : Args<> {
  typedef std::vector<bool> BoolVector;
  BoolVector &values;
  intervals<Index> &marked_intervals;
  bool x(Index j) { return values[input(j)]; }
  BoolVector::reference y(Index j) { return values[output(j)]; }
  ForwardArgs(const IndexVector &inputs, BoolVector &values,
              intervals<Index> &marked_intervals)
      : Args<>(inputs), values(values), marked_intervals(marked_intervals) {}
  /** \brief Helper */
  template <class Operator>
  bool any_marked_input(const Operator &op) {
    if (Operator::implicit_dependencies) {
      Dependencies dep;
      op.dependencies(*this, dep);
      return dep.any(values);
    } else {
      Index ninput = op.input_size();
      for (Index j = 0; j < ninput; j++)
        if (x(j)) return true;
    }
    return false;
  }
  /** \brief Helper */
  template <class Operator>
  void mark_all_output(const Operator &op) {
    if (Operator::forward_updating) {
      Dependencies dep;
      op.dependencies_updating(*this, dep);

      for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;

      for (size_t i = 0; i < dep.I.size(); i++) {
        Index a = dep.I[i].first;
        Index b = dep.I[i].second;
        bool insert = marked_intervals.insert(a, b);
        if (insert) {
          for (Index j = a; j <= b; j++) {
            values[j] = true;
          }
        }
      }
    } else {
      Index noutput = op.output_size();
      for (Index j = 0; j < noutput; j++) y(j) = true;
    }
  }
  /** \brief Dense sparsity pattern */
  template <class Operator>
  bool mark_dense(const Operator &op) {
    if (any_marked_input(op)) {
      mark_all_output(op);
      return true;
    }
    return false;
  }
};

template <>
struct ReverseArgs<bool> : Args<> {
  typedef std::vector<bool> BoolVector;
  BoolVector &values;
  intervals<Index> &marked_intervals;
  BoolVector::reference x(Index j) { return values[input(j)]; }
  bool y(Index j) { return values[output(j)]; }
  ReverseArgs(IndexVector &inputs, BoolVector &values,
              intervals<Index> &marked_intervals)
      : Args<>(inputs), values(values), marked_intervals(marked_intervals) {
    ptr.first = (Index)inputs.size();
    ptr.second = (Index)values.size();
  }
  /** \brief Helper */
  template <class Operator>
  bool any_marked_output(const Operator &op) {
    if (Operator::elimination_protected) return true;
    if (Operator::forward_updating) {
      Dependencies dep;
      op.dependencies_updating(*this, dep);
      return dep.any(values);
    } else {
      Index noutput = op.output_size();
      for (Index j = 0; j < noutput; j++)
        if (y(j)) return true;
    }
    return false;
  }
  /** \brief Helper */
  template <class Operator>
  void mark_all_input(const Operator &op) {
    if (Operator::implicit_dependencies) {
      Dependencies dep;
      op.dependencies(*this, dep);

      for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;

      for (size_t i = 0; i < dep.I.size(); i++) {
        Index a = dep.I[i].first;
        Index b = dep.I[i].second;
        bool insert = marked_intervals.insert(a, b);
        if (insert) {
          for (Index j = a; j <= b; j++) {
            values[j] = true;
          }
        }
      }
    } else {
      Index ninput = op.input_size();
      for (Index j = 0; j < ninput; j++) x(j) = true;
    }
  }
  /** \brief Dense sparsity pattern */
  template <class Operator>
  bool mark_dense(const Operator &op) {
    if (any_marked_output(op)) {
      mark_all_input(op);
      return true;
    }
    return false;
  }
};

std::string tostr(const Index &x);

std::string tostr(const Scalar &x);

struct Writer : std::string {
  static std::ostream *cout;
  Writer(std::string str);
  Writer(Scalar x);
  Writer();

  template <class V>
  std::string vinit(const V &x) {
    std::string y = "{";
    for (size_t i = 0; i < x.size(); i++)
      y = y + (i == 0 ? "" : ",") + tostr(x[i]);
    y = y + "}";
    return y;
  }

  std::string p(std::string x);
  Writer operator+(const Writer &other);
  Writer operator-(const Writer &other);
  Writer operator-();
  Writer operator*(const Writer &other);
  Writer operator/(const Writer &other);

  Writer operator*(const Scalar &other);
  Writer operator+(const Scalar &other);

  void operator=(const Writer &other);
  void operator+=(const Writer &other);
  void operator-=(const Writer &other);
  void operator*=(const Writer &other);
  void operator/=(const Writer &other);

  template <class T>
  friend Writer &operator<<(Writer &w, const T &v) {
    *cout << v;
    return w;
  }
  template <class T>
  friend Writer &operator<<(Writer &w, const std::valarray<T> &x) {
    *cout << w.vinit(x);
    return w;
  }
};

template <>
struct ForwardArgs<Writer> : ForwardArgs<Scalar> {
  typedef std::vector<Scalar> ScalarVector;
  typedef ForwardArgs<Scalar> Base;
  /** \brief Write constants in the source code? */
  bool const_literals;
  /** \brief Enable indirect local addressing mode within loop? */
  bool indirect;
  void set_indirect() {
    indirect = true;
    ptr.first = 0;
    ptr.second = 0;
  }
  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
  Writer y_const(Index j) {
    TMBAD_ASSERT2(!indirect, "Attempt to write constants within loop?");
    return tostr(Base::y(j));
  }
  ForwardArgs(IndexVector &inputs, ScalarVector &values)
      : ForwardArgs<Scalar>(inputs, values) {
    const_literals = false;
    indirect = false;
  }
};

template <>
struct ReverseArgs<Writer> : Args<> {
  typedef std::vector<Scalar> ScalarVector;
  /** \brief Write constants in the source code? */
  bool const_literals;
  /** \brief Enable indirect local addressing mode within loop? */
  bool indirect;
  void set_indirect() {
    indirect = true;
    ptr.first = 0;
    ptr.second = 0;
  }
  Writer dxd(Index j) { return "d[" + tostr(input(j)) + "]"; }
  Writer dyd(Index j) { return "d[" + tostr(output(j)) + "]"; }
  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
  Writer dxi(Index j) { return "d[i[" + tostr(Index(ptr.first + j)) + "]]"; }
  Writer dyi(Index j) { return "d[o[" + tostr(Index(ptr.second + j)) + "]]"; }
  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
  Writer dx(Index j) { return (indirect ? dxi(j) : dxd(j)); }
  Writer dy(Index j) { return (indirect ? dyi(j) : dyd(j)); }

  ReverseArgs(IndexVector &inputs, ScalarVector &values) : Args<>(inputs) {
    const_literals = false;
    indirect = false;
    ptr.first = (Index)inputs.size();
    ptr.second = (Index)values.size();
  }
};

struct Position {
  Position(Index node, Index first, Index second);
  Position();
  Index node;
  IndexPair ptr;
  bool operator<(const Position &other) const;
};

/** \brief Utility: sort inplace */
template <class T>
void sort_inplace(std::vector<T> &x) {
  std::sort(x.begin(), x.end());
}

/** \brief Utility: sort unique inplace */
template <class T>
void sort_unique_inplace(std::vector<T> &x) {
  std::sort(x.begin(), x.end());
  typename std::vector<T>::iterator last = std::unique(x.begin(), x.end());
  x.erase(last, x.end());
}

/** \brief Operator graph in compressed row storage */
struct graph {
  std::vector<Index> j;
  std::vector<Index> p;
  graph();
  size_t num_neighbors(Index node);
  Index *neighbors(Index node);
  bool empty();
  size_t num_nodes();
  void print();
  /** \brief Private workspace used by `graph::search`. Must either be
      empty or filled with false when not in use. */
  std::vector<bool> mark;
  /** \brief Used to lookup operator (node) of an independent variable */
  std::vector<Index> inv2op;
  /** \brief Used to lookup operator (node) of a dependent variable */
  std::vector<Index> dep2op;
  /** \brief Number of column indices by row, i.e. number of *outgoing* edges */
  std::vector<Index> rowcounts();
  /** \brief Number of row indices by column, i.e. number of *ingoing* edges */
  std::vector<Index> colcounts();
  /** \brief Perform a breadth-first search
      \param start Nodes that initiate the search
      \param visited On input contains boolean mask of already visited
      nodes which should be skipped from the search. On output the
      result of the search has been marked as visited.
      \param result The result of the search is appended to this vector.
      \note `start` and `result` are allowed to be references to the same
     object.
  */
  void bfs(const std::vector<Index> &start, std::vector<bool> &visited,
           std::vector<Index> &result);
  /** \brief Find sub graph
      \param start On input contains the nodes from which to perform
      the search. On output contains the sorted result of the search.
      \param sort_input  Remove duplicates before searching? Can be set to false
     if `start` is unique. \param sort_output Sort the result of the search?
     Output must be sorted in order to work as a valid subgraph (!) \warning
     `start` really is *nodes* and **not** *variables*.
      - To initiate a search from an independent variable `i` use `inv2op[i]` to
     lookup the corresponding node !
      - To initiate a search from a dependent variable `i` use `dep2op[i]` to
     lookup the corresponding node !
  */
  void search(std::vector<Index> &start, bool sort_input = true,
              bool sort_output = true);
  /** \copydoc search
      \param visited On input contains boolean mask of already visited
      nodes which should be skipped from the search. On output the
      result of the search has been marked as visited.
      \note To perform the search along a *restricted graph* one can
      pass the complement of the restriction as 'already visited'.
  */
  void search(std::vector<Index> &start, std::vector<bool> &visited,
              bool sort_input = true, bool sort_output = true);
  /** \brief Find boundary of subgraph
      \param subgraph Nodes of the subgraph
      \details Returns the set of nodes *outside the subgraph* which
      are connected to some node in the subgraph.
  */
  std::vector<Index> boundary(const std::vector<Index> &subgraph);
  /** \brief Construct a graph
      \param num_nodes Number of nodes
      \param edges Graph edges represented as node pairs
  */
  graph(size_t num_nodes, const std::vector<IndexPair> &edges);
};

namespace {
template <class CompleteOperator, bool dynamic>
struct constructOperator {};
template <class CompleteOperator>
struct constructOperator<CompleteOperator, false> {
  CompleteOperator *operator()() {
    static CompleteOperator *pOp = new CompleteOperator();
    return pOp;
  }
};
template <class CompleteOperator>
struct constructOperator<CompleteOperator, true> {
  CompleteOperator *operator()() {
    CompleteOperator *pOp = new CompleteOperator();
    return pOp;
  }

  template <class T1>
  CompleteOperator *operator()(const T1 &x1) {
    CompleteOperator *pOp = new CompleteOperator(x1);
    return pOp;
  }

  template <class T1, class T2>
  CompleteOperator *operator()(const T1 &x1, const T2 &x2) {
    CompleteOperator *pOp = new CompleteOperator(x1, x2);
    return pOp;
  }

  template <class T1, class T2, class T3>
  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3) {
    CompleteOperator *pOp = new CompleteOperator(x1, x2, x3);
    return pOp;
  }

  template <class T1, class T2, class T3, class T4>
  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3,
                               const T4 &x4) {
    CompleteOperator *pOp = new CompleteOperator(x1, x2, x3, x4);
    return pOp;
  }
};
}  // namespace

/** \brief Bitwise collection of selected operator flags
    \details These flags are available for any operator in the
   `operation_stack`.
*/
struct op_info {
  /** \brief Type used for internal integer representation */
  typedef int IntRep;
  /** \brief Internal integer representation */
  IntRep code;
  /** \brief Enumeration of selected boolean flags in `global::Operator` */
  enum op_flag {
    /** \copydoc global::Operator::dynamic */
    dynamic,
    /** \copydoc global::Operator::is_zero_deriv */
    is_zero_deriv,
    /** \copydoc global::Operator::is_linear */
    is_linear,
    /** \copydoc global::Operator::is_constant */
    is_constant,
    /** \copydoc global::Operator::independent_variable */
    independent_variable,
    /** \copydoc global::Operator::dependent_variable */
    dependent_variable,
    /** \copydoc global::Operator::allow_remap */
    allow_remap,
    /** \copydoc global::Operator::elimination_protected */
    elimination_protected,
    /** \copydoc global::Operator::forward_updating */
    forward_updating,
    /** \copydoc global::Operator::reverse_updating */
    reverse_updating,
    /** \brief Mark end of enum */
    op_flag_count
  };
  template <class T>
  IntRep get_flags(T op) {
    return

        (op.dynamic * (1 << dynamic)) |
        (op.is_zero_deriv * (1 << is_zero_deriv)) |
        (op.is_linear * (1 << is_linear)) |
        (op.is_constant * (1 << is_constant)) |
        (op.independent_variable * (1 << independent_variable)) |
        (op.dependent_variable * (1 << dependent_variable)) |
        (op.allow_remap * (1 << allow_remap)) |
        (op.elimination_protected * (1 << elimination_protected)) |
        (op.forward_updating * (1 << forward_updating)) |
        (op.reverse_updating * (1 << reverse_updating));
  }
  op_info();
  op_info(op_flag f);

  template <class T>
  op_info(T op) : code(get_flags(op)) {}
  /** \brief Test if a given flag is set */
  bool test(op_flag f) const;
  op_info &operator|=(const op_info &other);
  op_info &operator&=(const op_info &other);
};

/** \brief Struct defining the main AD context.

    - An AD context holds the three data arrays defining the tape: `opstack`,
   `inputs` and `values`.
    - An AD context can be activated (set global) using `ad_start()` or
   inactivated using `ad_stop()`.
    - `get_glob()` gives a pointer to the current active AD context.
    - AD contexts can be started and stopped while others are running (nested AD
   contexts).
    - An AD context has a unique parent context. The *context stack* is defined
   as the recursive parent traversal from `get_glob()` (top) to `NULL` (bottom).
*/
struct global {
  struct ad_plain;
  struct ad_aug;
  typedef TMBAD_REPLAY_TYPE Replay;
  struct ad_segment;
  struct print_config;
  /** \brief The abstract operator for the operation stack `global::opstack`
      - The methods in this class must be implemented for all operators.
     However, in practice most members can be autogenerated by the `Complete`
     class. \note Virtual member functions are associated with a significant
      overhead (vtable lookup). For performance reasons, it is
      therefore important to squeeze as much work as possible into
      these virtual methods.
  */
  struct OperatorPure {
    /** \brief Increment input/output pointers to prepare for the next
        OperatorPure in the stack */
    virtual void increment(IndexPair &ptr) = 0;
    /** \brief Decrement input/output pointers to prepare for the
        previous OperatorPure in the stack */
    virtual void decrement(IndexPair &ptr) = 0;
    /** \brief Update output values of this OperatorPure */
    virtual void forward(ForwardArgs<Scalar> &args) = 0;
    /** \brief Update input derivs of this OperatorPure */
    virtual void reverse(ReverseArgs<Scalar> &args) = 0;
    /** \brief Fast equivalent of combined `forward()` *and* `increment()` */
    virtual void forward_incr(ForwardArgs<Scalar> &args) = 0;
    /** \brief Fast equivalent of combined `decrement()` *and* `reverse()` */
    virtual void reverse_decr(ReverseArgs<Scalar> &args) = 0;
    /** \brief Number of inputs to this OperatorPure */
    virtual Index input_size() = 0;
    /** \brief Number of outputs from this OperatorPure */
    virtual Index output_size() = 0;
    /** \brief Mark forward dependencies \details Calculate \f$y=Jx\f$
        where \f$J\f$ denotes the Jacobian sparsity pattern of the
        operator. Operators are by default assumed to be dense.
    */
    virtual void forward(ForwardArgs<bool> &args) = 0;
    /** \brief Mark reverse dependencies \details Calculate \f$x=J^Ty\f$
        where \f$J\f$ denotes the Jacobian sparsity pattern of the
        operator. Operators are by default assumed to be dense.
    */
    virtual void reverse(ReverseArgs<bool> &args) = 0;
    /** \copydoc forward_incr */
    virtual void forward_incr(ForwardArgs<bool> &args) = 0;
    /** \copydoc reverse_decr */
    virtual void reverse_decr(ReverseArgs<bool> &args) = 0;
    /** \brief Conditionally mark *all* outputs */
    virtual void forward_incr_mark_dense(ForwardArgs<bool> &args) = 0;
    /** \brief Get the indices of variables required by this operator.
        \details Result of this operation (the indices) are *appended*
        to the `dep` argument. Its main purpose is to build the
        computational graph. Under normal circumstances the
        information can be autogenerated from the operator
        inputs. However there is a subtle exception: Operators are
        allowed to take reference inputs. For instance a matrix multiply
        only requires addresses of the first element of the two input
        matrices. Such cases must have special code to provide **all**
        input addresses.
        \note This information is somewhat overlapping with that of
        `reverse(ReverseArgs<Replay>& args)`.
    */
    virtual void dependencies(Args<> &args, Dependencies &dep) = 0;
    /** \brief Get the indices of variables updated by this operator.
        \details Used only when `Operator::forward_updating` flag is set.
    */
    virtual void dependencies_updating(Args<> &args, Dependencies &dep) = 0;
    /** \brief Replay operation sequence. \copydoc forward */
    virtual void forward(ForwardArgs<Replay> &args) = 0;
    /** \brief Replay operation sequence. \copydoc reverse */
    virtual void reverse(ReverseArgs<Replay> &args) = 0;
    /** \brief Replay operation sequence. \copydoc forward_incr */
    virtual void forward_incr(ForwardArgs<Replay> &args) = 0;
    /** \brief Replay operation sequence. \copydoc reverse_decr */
    virtual void reverse_decr(ReverseArgs<Replay> &args) = 0;
    /** \brief Source code writer. \copydoc forward */
    virtual void forward(ForwardArgs<Writer> &args) = 0;
    /** \brief Source code writer. \copydoc reverse */
    virtual void reverse(ReverseArgs<Writer> &args) = 0;
    /** \brief Source code writer. \copydoc forward_incr */
    virtual void forward_incr(ForwardArgs<Writer> &args) = 0;
    /** \brief Source code writer. \copydoc reverse_decr */
    virtual void reverse_decr(ReverseArgs<Writer> &args) = 0;
    /** \brief Name of this OperatorPure */
    virtual const char *op_name() { return "NoName"; }
    /** \brief Lookup table for operator fusion. Merge this
        OperatorPure with an identical copy. If no match return
        NULL. */
    virtual OperatorPure *self_fuse() = 0;
    /** \brief Lookup table for operator fusion. Merge this
        OperatorPure with another operator. If no match return
        NULL. */
    virtual OperatorPure *other_fuse(OperatorPure *other) = 0;
    /** \brief Return a copy of this OperatorPure. */
    virtual OperatorPure *copy() = 0;
    /** \brief Deallocate this OperatorPure. */
    virtual void deallocate() = 0;
    /** \brief Get operator info. */
    virtual op_info info() = 0;
    /** \brief Optional operator_data */
    virtual void *operator_data() = 0;
    /** \brief Operator identifier \details If two operators have
        equal identifier it can be assumed that they represent equal
        mappings, i.e. same input implies same output.
    */
    virtual void *identifier() = 0;
    /** \brief Optional print method */
    virtual void print(print_config cfg) = 0;
    /** \brief Get pointer to operator before it was
        completed. \warning Avoid unless strictly necessary */
    virtual void *incomplete() = 0;
    virtual ~OperatorPure() {}
  };

  /** \brief Operation stack
      \details Essentially just a vector of operations with some
      memory management on top.
      \warning Use inherited members with caution. E.g. `resize(0)`
      wouldn't do the expected. Use `clear()` instead.
  */
  struct operation_stack : std::vector<OperatorPure *> {
    typedef std::vector<OperatorPure *> Base;
    /** \brief Bitwise max of operator flags in this stack */
    op_info any;
    /** \brief Default CTOR */
    operation_stack();
    /** \brief Copy CTOR */
    operation_stack(const operation_stack &other);
    /** \brief Add new operator to this stack and update bitwise operator
     * information */
    void push_back(OperatorPure *x);
    /** \brief Copy assignment */
    operation_stack &operator=(const operation_stack &other);
    ~operation_stack();
    /** \brief Clear the operation stack without freeing the container */
    void clear();
    void copy_from(const operation_stack &other);
  };

  /** \brief Operation stack */
  operation_stack opstack;
  /** \brief Contiguous workspace for taped variables (same length as
   * `global::derivs`) */
  std::vector<Scalar> values;
  /** \brief Contiguous workspace for derivatives (same length as
   * `global::values`) */
  std::vector<Scalar> derivs;
  /** \brief Pointers into `global::values` determining operator inputs */
  IndexVector inputs;
  /** \brief Pointers into `global::values` determining **independent**
   * variables */
  std::vector<Index> inv_index;
  /** \brief Pointers into `global::values` determining **dependent** variables
   */
  std::vector<Index> dep_index;

  mutable std::vector<IndexPair> subgraph_ptr;
  std::vector<Index> subgraph_seq;
  /** \brief Optional pointer to compiled code. \warning Experimental */
  void (*forward_compiled)(Scalar *);
  /** \brief Optional pointer to compiled code. \warning Experimental */
  void (*reverse_compiled)(Scalar *, Scalar *);

  global();
  /** \brief Clear all workspace without actually freeing the workspace
      \note Intended use: **retaping** without re-allocating */
  void clear();

  /** \brief Release unnecessary workspace to the system \details In
      contrast to `clear` this method does not destroy the object. Its
      intended use is to be called before and/or after a memory
      intensive routine, e.g.  `global::optimize` or
      `global::build_graph`.
      The function attempts to shrink the workspace of the most critical
     containers:
      - `derivs` is freed completely.
      - `subgraph_ptr` is freed completely.
      - `values` is reduced if the potential relative reduction is
     'significant'.
      - `inputs` is reduced if the potential relative reduction is
     'significant'. \param tol Number between zero and one. A reduction is
      considered 'significant' if it is at least `1-tol`.
  */
  void shrink_to_fit(double tol = .9);

  /** \brief Set derivatives to zero
      \param start Specify 'tail sweep' starting from this position
  */
  void clear_deriv(Position start = Position(0, 0, 0));

  /** \brief Reference to i'th input value (parameter) */
  Scalar &value_inv(Index i);
  /** \brief Reference to i'th component of the gradient */
  Scalar &deriv_inv(Index i);
  /** \brief Reference to i'th component of the function value */
  Scalar &value_dep(Index i);
  /** \brief Reference to i'th 'range direction' used to seed the derivative  */
  Scalar &deriv_dep(Index i);
  /** \brief The three pointers defining the begining of the tape */
  Position begin();
  /** \brief The three pointers defining the end of the tape */
  Position end();

  /** \brief Substitute of std::vector<bool> with all elements `true` */
  struct no_filter {
    CONSTEXPR bool operator[](size_t i) const;
  };
  /** \brief Generic forward sweep
      \param begin Skip operators up to this point.
      \param node_filter Skip operators not in this mask.
      \note Don't forget to set `args.ptr` when using `begin`.
  */
  template <class ForwardArgs, class NodeFilter>
  void forward_loop(ForwardArgs &args, size_t begin,
                    const NodeFilter &node_filter) const {
    for (size_t i = begin; i < opstack.size(); i++) {
      if (node_filter[i])
        opstack[i]->forward_incr(args);
      else
        opstack[i]->increment(args.ptr);
    }
  }
  /** \copydoc forward_loop */
  template <class ForwardArgs>
  void forward_loop(ForwardArgs &args, size_t begin = 0) const {
    forward_loop(args, begin, no_filter());
  }
  /** \brief Generic reverse sweep
      \param node_filter Skip operators not in this mask.
      \param begin Skip operators up to this point.
  */
  template <class ReverseArgs, class NodeFilter>
  void reverse_loop(ReverseArgs &args, size_t begin,
                    const NodeFilter &node_filter) const {
    for (size_t i = opstack.size(); i > begin;) {
      i--;
      if (node_filter[i])
        opstack[i]->reverse_decr(args);
      else
        opstack[i]->decrement(args.ptr);
    }
  }
  /** \copydoc reverse_loop */
  template <class ReverseArgs>
  void reverse_loop(ReverseArgs &args, size_t begin = 0) const {
    reverse_loop(args, begin, no_filter());
  }
  /** \brief Generic forward sweep along global::subgraph_seq */
  template <class ForwardArgs>
  void forward_loop_subgraph(ForwardArgs &args) const {
    subgraph_cache_ptr();
    for (size_t j = 0; j < subgraph_seq.size(); j++) {
      Index i = subgraph_seq[j];
      args.ptr = subgraph_ptr[i];
      opstack[i]->forward(args);
    }
  }
  /** \brief Generic reverse sweep along global::subgraph_seq */
  template <class ReverseArgs>
  void reverse_loop_subgraph(ReverseArgs &args) const {
    subgraph_cache_ptr();
    for (size_t j = subgraph_seq.size(); j > 0;) {
      j--;
      Index i = subgraph_seq[j];
      args.ptr = subgraph_ptr[i];
      opstack[i]->reverse(args);
    }
  }
  /** \brief Generic clear array along global::subgraph
      \details Applicable for
      - std::vector<Scalar>
      - std::vector<Replay>
      - std::vector<bool>
      \note This function calls `subgraph_cache_ptr()`. It follows that
      complexity is proportional to the full graph for the first
      call. Following calls have complexity proportional to the
      subgraph.
  */
  template <class Vector>
  void clear_array_subgraph(Vector &array,
                            typename Vector::value_type value =
                                typename Vector::value_type(0)) const {
    if (array.size() != values.size()) {
      array.resize(values.size());
      std::fill(array.begin(), array.end(), value);
      return;
    }
    subgraph_cache_ptr();
    for (size_t j = 0; j < subgraph_seq.size(); j++) {
      Index i = subgraph_seq[j];
      size_t noutput = opstack[i]->output_size();
      for (size_t k = 0; k < noutput; k++)
        array[subgraph_ptr[i].second + k] = value;
    }
  }

  /** \brief Full or partial forward sweep through the operation stack. Updates
     `global::values`. \param start Specify 'tail sweep' starting from this
     position
  */
  void forward(Position start = Position(0, 0, 0));
  /** \brief Full or partial reverse sweep through the operation stack. Updates
     `global::derivs`. \param start Specify 'tail sweep' starting from this
     position \note A reverse tail sweep through nodes `opstack[start:end]` will
     in general affect the derivative array outside the desired range, i.e. the
     derivative array should be considered as invalidated for indices smaller
     than `start.ptr.second`.
  */
  void reverse(Position start = Position(0, 0, 0));
  /** \brief Forward sweep along a subgraph. */
  void forward_sub();
  /** \brief Reverse sweep along a subgraph. */
  void reverse_sub();

  /** \brief Full forward dependency sweep through the operation stack. */
  void forward(std::vector<bool> &marks);
  /** \brief Full reverse dependency sweep through the operation stack. */
  void reverse(std::vector<bool> &marks);
  /** \brief Forward sweep along a subgraph.
      Subgraph can be specified through a sequence `subgraph_seq`
      (default) or through a boolean `node_filter`.
  */
  void forward_sub(std::vector<bool> &marks,
                   const std::vector<bool> &node_filter = std::vector<bool>());
  /** \brief Reverse sweep along a subgraph.
      Subgraph can be specified through a sequence `subgraph_seq`
      (default) or through a boolean `node_filter`.
  */
  void reverse_sub(std::vector<bool> &marks,
                   const std::vector<bool> &node_filter = std::vector<bool>());
  /** \brief Full forward dependency sweep through the operation stack.
      \details For each operator mark *all* outputs if *any* inputs
      are marked.  The intended use of this function is to determine a
      valid (complete topoligically sorted) subgraph.
      \note A reverse version of this function seems less useful so we
      do not yet provide it.
      \warning FIXME Indirect dependencies are not yet accounted for.
 */
  void forward_dense(std::vector<bool> &marks);

  intervals<Index> get_intervals(op_info::op_flag flag, bool reverse = true,
                                 bool forward = false) const;

  intervals<Index> updating_intervals() const;

  intervals<Index> get_intervals_sub(op_info::op_flag flag, bool reverse = true,
                                     bool forward = false) const;

  intervals<Index> updating_intervals_sub() const;

  struct replay {
    /** \brief Mimic `orig` operation sequence value array `global::values` */
    std::vector<Replay> values;
    /** \brief Mimic `orig` operation sequence derivative array `global::derivs`
     */
    std::vector<Replay> derivs;
    /** \brief Operation sequence to replay **from** */
    const global &orig;
    /** \brief Operation sequence to replay **to** */
    global &target;
    /** \brief Active operation sequence before this replay was initiated */
    global *parent_glob;
    /** \copydoc global::value_inv */
    Replay &value_inv(Index i);
    /** \copydoc global::deriv_inv */
    Replay &deriv_inv(Index i);
    /** \copydoc global::value_dep */
    Replay &value_dep(Index i);
    /** \copydoc global::deriv_dep */
    Replay &deriv_dep(Index i);
    /** \brief Initialize pointers `orig` and `target`
        \details This is a cheap `O(1)` operation
    */
    replay(const global &orig, global &target);
    /** \brief Initiate this replay
        \details
        - Set `this->target` as the current active operation sequence **and**
        - Populate `this->values` with `Replay` copies of all `orig.values`.
        Complexity is `O(n)`.
        \note `target` is still empty after this operation if `ad_aug` is used
       as replay type (the default).
    */
    void start();
    /** \brief Stop this replay
        \details
        Restore the previous active operation sequence
    */
    void stop();
    /** \brief Add 'updatable' derivatives to the tape */
    void add_updatable_derivs(const intervals<Index> &I);
    /** \brief Initialize derivative array used by this replay */
    void clear_deriv();
    /** \brief Forward replay of `replay::orig` operation sequence.
        Updates `replay::values` array **and** `replay::target` operation
       sequence. \param inv_tags Write an independent variable tag for each
       function argument ? \param dep_tags Write a dependent variable tag for
       each function value ? \param node_filter Operators to keep in replay
    */
    void forward(bool inv_tags = true, bool dep_tags = true,
                 Position start = Position(0, 0, 0),
                 const std::vector<bool> &node_filter = std::vector<bool>());
    /** \brief Reverse replay of `replay::orig` operation sequence.
        Updates `replay::derivs` array **and** `replay::target` operation
       sequence. \param dep_tags Write a dependent variable tag for each
       function derivative ? \param inv_tags Write an independent variable tag
       for each derivative weight ? \param node_filter Operators to keep in
       replay
    */
    void reverse(bool dep_tags = true, bool inv_tags = false,
                 Position start = Position(0, 0, 0),
                 const std::vector<bool> &node_filter = std::vector<bool>());
    /** \brief Replay forward sweep along a subgraph. */
    void forward_sub();
    /** \brief Replay reverse sweep along a subgraph. */
    void reverse_sub();
    /** \copydoc global::clear_deriv_sub */
    void clear_deriv_sub();
  };

  /** \brief Replay this operation sequence to itself
      \param inv_tags Keep independent variable tags?
      \param dep_tags Keep dependent variable tags?
  */
  void forward_replay(bool inv_tags = true, bool dep_tags = true);

  /** \brief Cache array pointers required by all subgraph routines
      \note Requires a sweep through the full computational graph. In
      addition a workspace of `sizeof(IndexPair) * opstack.size()` is
      allocated.
  */
  void subgraph_cache_ptr() const;
  /** \brief Convert selected variables to a subgraph sequence
      \details For each variable check if the variable is marked. If
      yes add the (unique) operator, that generated the variable as
      its result, to the subgraph sequence. Although an operator can
      generate multiple variables it is guarantied that an operator is
      only added once.
  */
  void set_subgraph(const std::vector<bool> &marks, bool append = false);
  /** \brief Mark all outputs of a subgraph */
  void mark_subgraph(std::vector<bool> &marks);
  /** \brief Unmark all outputs of a subgraph */
  void unmark_subgraph(std::vector<bool> &marks);
  /** \brief Create a full subgraph (for testing only) */
  void subgraph_trivial();
  /** \brief Clear derivative array along a subgraph.
      \details Clear all *relevant* input variable derivatives by
      noting that each such *input* variable must also be an *output*
      of some operator node.
  */
  void clear_deriv_sub();
  /** \brief Extract a subgraph as a new global object. Fast when
      called many times.

      \details Sub-graph can either be *sorted* (common case) or just
      *topologically sorted* (e.g. for graph permutations). In both
      cases the subgraph may or may not include all independent/dependent
      variables.

      It's important to note that this routine permutes `inv_index`
      and `dep_index` to reflect the original parameter order (which
      is only relevant in the un-sorted case). FIXME: Correct the
      code!

      \param var_remap Workspace containing garbage on output. Can be
      used to avoid re-allocating.
      \param new_glob Target of extraction. Empty by default.
      \note Complexity proportional to subgraph **if workspace is
      allocated**. Otherwise proportional to full graph.
      \note `global::subgraph_seq` must be calculated in advance.
      \warning FIXME: Must copy the dynamic operator table by default
      At least two applications of this function:
      - Tape optimizer. Here we want to delete the old glob after extraction.
      - Fast quadrature. Here we want to keep the old glob.
      \note A non-empty extraction target `new_glob` may be useful for
      function slicing.
      \note Special attention must be payed to variables that affect
      the subgraph trajectory without being part of the
      trajectory. These *boundary* variables can be remapped (using
      `var_remap`) prior to calling this function. Note that by
      default boundary variables are remapped to an arbitrary variable
      (the first variable `values[0]`).
  */
  global extract_sub(std::vector<Index> &var_remap, global new_glob = global());
  /** \brief In-place subgraph extractor \details Slower, but more
      memory efficient, than `extract_sub`. Subset is by boolean
      marked **variables** rather than actual subgraph nodes.
  */
  void extract_sub_inplace(std::vector<bool> marks);
  /** \brief Extract a subgraph as a new global object. Slow when called many
     times. \note `global::subgraph_seq` must be calculated in advance.
  */
  global extract_sub();

  /** \brief Build variable -> operator node lookup table using a
      single forward pass. The resulting sequence is monotonically
      increasing.
      \details For each variable (i.e. pointer into global::values)
      find operator (i.e. pointer into global::opstack) that generated
      the variable as its results.
      \note Size of this table is `values.size()`.
  */
  std::vector<Index> var2op();
  /** \brief Select operators from a subset of variables
      An operator is selected if it *generates* or optionally
      *modifies* any of the selected variables.
      In absence of 'updating operators', this is the fast equivalent of
      `ans[ var2op() [ which(values) ] ] <- true`
      However, the code has been updated to also work for 'updating
      operators' by setting `include_updating=true`.
  */
  std::vector<bool> var2op(const std::vector<bool> &values,
                           bool include_updating = true);
  /** \brief Get variables produces by a node seqence */
  std::vector<Index> op2var(const std::vector<Index> &seq);
  /** \brief Get variables produced by a node seqence */
  std::vector<bool> op2var(const std::vector<bool> &seq_mark);
  /** \brief General operator -> variable table generator.
      \details For each operator match its output variable in a given
      subset of variables. If a match exists return the index of the
      match, otherwise return a missing value code. If several matches
      exist, return the index corresponding to the *first match* from
      the left.
      \note Size of this table is `opstack.size()`.
  */
  std::vector<Index> op2idx(const std::vector<Index> &var_subset,
                            Index NA = (Index)-1);
  /** \brief General boolean table generator */
  std::vector<bool> mark_space(size_t n, const std::vector<Index> ind);
  /** \brief Boolean representation of independent variable positions */
  std::vector<bool> inv_marks();
  /** \brief Boolean representation of dependent variable positions */
  std::vector<bool> dep_marks();
  /** \brief Boolean representation of sub graph positions */
  std::vector<bool> subgraph_marks();

  struct append_edges {
    size_t &i;
    const std::vector<bool> &keep_var;
    std::vector<Index> &var2op;
    std::vector<IndexPair> &edges;

    std::vector<bool> op_marks;
    size_t pos;
    append_edges(size_t &i, size_t num_nodes, const std::vector<bool> &keep_var,
                 std::vector<Index> &var2op, std::vector<IndexPair> &edges);
    void operator()(Index dep_j);

    void start_iteration();

    void end_iteration();
  };
  /** \brief Build a graph-representation of the operation stack
      \details The graph is defined as follows:
      - Nodes are operators i.e., the number of nodes is `opstack.size()`
      - A connection from Op1 to Op2 is present <==> there exists a *variable*
     which is input to Op2 and output from Op1 \param transpose Flag to reverse
     all edges. \param keep_var Boolean mask of *variables* to be considered as
     potential edges. \param deriv Is the graph to be used for derivative
     calculations only?
  */
  graph build_graph(bool transpose, const std::vector<bool> &keep_var,
                    bool deriv = false);
  /** \brief Construct operator graph with forward connections
      \details See `build_graph()`
  */
  graph forward_graph(std::vector<bool> keep_var = std::vector<bool>(0),
                      bool deriv = false);
  /** \brief Construct operator graph with reverse connections
      \details See `build_graph()`
  */
  graph reverse_graph(std::vector<bool> keep_var = std::vector<bool>(0),
                      bool deriv = false);

  /** \brief Test if two tapes are identical
      \details We say that two tapes are identical if the same input
      is guarantied to result in the same output.
  */
  bool identical(const global &other) const;

  /** \brief Simple hash code of scalar */
  template <class T>
  void hash(hash_t &h, T x) const {
    static const size_t n =
        (sizeof(T) / sizeof(hash_t)) + (sizeof(T) % sizeof(hash_t) != 0);
    hash_t buffer[n];
    std::fill(buffer, buffer + n, 0);
    for (size_t i = 0; i < sizeof(x); i++)
      ((char *)buffer)[i] = ((char *)&x)[i];
    hash_t A = 54059;
    hash_t B = 76963;
    for (size_t i = 0; i < n; i++) h = (A * h) ^ (B * buffer[i]);
  }

  /** \brief Simple hash code of tape
      \details The hash function has the following properties:
      - If x and y have different hash codes then x and y are **not**
     `identical()`.
      - If x and y have equal hash codes then x and y are **likely**
        `identical()`. This situation must always be followed up by a
        collision test.
  */
  hash_t hash() const;

  /** \brief Configuration of hash_sweep */
  struct hash_config {
    /** \brief Use unique code for each independent variable? (see `hash_sweep`)
     */
    bool strong_inv;
    /** \brief Include numerical value as part of hash code for constants? (see
     * `hash_sweep`) */
    bool strong_const;
    /** \brief Use unique hash code for each output of an operator? */
    bool strong_output;
    /** \brief Reduce returned hash values to one per dependent variable? */
    bool reduce;
    /** \brief Deterministic hash codes? */
    bool deterministic;
    /** \brief Optionally control seeding of InvOp in case `strong_inv=true` */
    std::vector<Index> inv_seed;
  };

  /** \brief Calculate hash codes of each dependent variable using a
      single forward sweep
      \details The hash function has the following properties:
      - If x and y have different hash codes then x and y are **not**
     `identical()`.
      - If x and y have equal hash codes then x and y are **likely**
        `identical()`. This situation must always be followed up by a
        collision test.

        In the **weak** case the hash codes attempt to discriminate
        between *mappings*. In the **strong** case the hash codes
        discriminate between sub-expressions.
        For example consider the computational graph of

        (x1+x2) * (x3+x4)

        In the weak case (x1+x2) and (x3+x4) are considered identical
        because the mappings f(x1,x2)=x1+x2 and f(x3,x4)=x3+x4 are
        identical.
        If, in addition, the mappings are evaluated at the exact same
        arguments then they are identical in the strong sense (equal
        sub-expressions).

        The hash function works as follows. Starting out with a scalar
        hash update function `hash(x,h)` we generalize to vectors
        using the recursion

        `hash(x_1,...,x_n) := hash(x_1, hash(x_2,...,x_n) )`

        Hash codes of the operation

        `(y_1,...,y_m) = f(x_1,...,x_n)`

        are defined as

        `hash(y_k) = hash(f, x_1, ..., x_n) + k - 1`

        assuming `f` is represented by its `OperatorPure::identifier()`.

        Note, that dynamic operators by default have a unique
        identifier (their memory address). It follows that the hash
        code of output variables are (*very* likely) unique as well.

        For the reason above, it may seem tempting to skip input
        hashing completely in the dynamic operator case. However, that
        won't work because we allow dynamic operators to have a static
        identifier, see `Operator::add_static_identifier`.

        \param weak If `true` calculate weak hash codes. Otherwise strong.

        \return In the weak case a vector of same length as the number
        of dependent variables. In the strong case return the entire
        vector of variable hash codes (same length as `values`).
  */
  std::vector<hash_t> hash_sweep(hash_config cfg) const;
  /** \brief Handle two common cases */
  std::vector<hash_t> hash_sweep(bool weak = true) const;

  /** \brief Very simple tape optimizer
      \details The tape optimizer attempts to remove variables unless they are
      - Direct input variables
      - Direct output variables
      - Variables that affect the output variables.
      \warning Sparse operators can have input variables that are
      marked for removal while at the same time having input variables
      that must be kept. In this case the redundant input variables
      are remapped to an arbitrary variable (number zero - see
      `global::extract_sub`). Formally this is correct but in practice
      it could result in nan function evaluations (Example: Vectorized
      log function with negative values of `x[0]`).
  */
  void eliminate();

  /** \brief Configuration of print method */
  struct print_config {
    std::string prefix, mark;
    int depth;
    print_config();
  };
  /** \brief Print workspace */
  void print(print_config cfg);
  /** \brief Print workspace with default configuration (works from debugger) */
  void print();

  /** \brief Operator with input/output dimension known at compile time */
  template <int ninput_, int noutput_ = 1>
  struct Operator {
    /** \brief Does this operator require dynamic allocation ? */
    static const bool dynamic = false;
    /** \brief Number of operator inputs */
    static const int ninput = ninput_;
    /** \brief Number of operator outputs */
    static const int noutput = noutput_;
    /** \brief Is output of this operator an independent variable ? */
    static const int independent_variable = false;
    /** \brief Is output of this operator a dependent variable ? */
    static const int dependent_variable = false;
    /** \brief Have `input_size` and `output_size` members been defined ? */
    static const bool have_input_size_output_size = false;
    /** \brief Have `increment` and `decrement` members been defined ? */
    static const bool have_increment_decrement = false;
    /** \brief Have `forward` and `reverse` members been defined ? */
    static const bool have_forward_reverse = true;
    /** \brief Have `forward_incr` and `reverse_incr` members been defined ? */
    static const bool have_forward_incr_reverse_decr = false;
    /** \brief Have `forward_mark` and `reverse_mark` members been defined ? */
    static const bool have_forward_mark_reverse_mark = false;
    /** \brief Have `dependencies` member been defined ? */
    static const bool have_dependencies = false;
    /** \brief Is it safe to remap the inputs of this operator?
        \details In general it is illegal to remap operators that
        assume a certain memory layout of its inputs, e.g. BLAS
        routines. Make sure to set to `false` for those cases.
    */
    static const bool allow_remap = true;
    /**
       \brief Does this operator have implicit dependencies?
       \details Signify that this operator uses indirect inputs
       - **and** that all these inputs are available through the
       `dependencies()` function (in particular `have_dependencies` must be true
       in this case)
       - **and** that these indirect dependencies should be used to auto
       implement the marker routines `forward_mark_reverse_mark`. \note FIXME: I
       think `allow_remap` can eventually be deprecated?
    */
    static const bool implicit_dependencies = false;
    /** \brief Should this operator have a static identifier ? */
    static const bool add_static_identifier = false;
    /** \brief Should this operator replay it self by invoking the copy CTOR ?
     */
    static const bool add_forward_replay_copy = false;
    /** \brief Does this class have an `eval` member from which to define
     * forward ? */
    static const bool have_eval = false;
    /** \brief How many times can this operator be doubled ? */
    static const int max_fuse_depth = 2;
    /** \brief Is this a linear operator ? */
    static const bool is_linear = false;
    /** \brief Is this a constant operator ? */
    static const bool is_constant = false;
    /** \brief Is this a zero-derivative operator (piecewise constant) ? */
    static const bool is_zero_deriv = false;
    /** \brief Protect this operator from elimination by the tape optimizer ? */
    static const bool elimination_protected = false;
    /** \brief This operator **may** update existing variables ?

        \details A 'forward_updating' operator is allowed to update
        (increment/decrement) *certain* ('updatable') variables
        already on the tape.  In general this property breaks basic
        principles of reverse mode AD unless the following extra
        requirement is satisfied:

        - Once an updatable variable has been *read* it may no longer be
       updated.

        This requrement always holds for the derivative variables during reverse
       replay. We note that 'forward_updating' operators are considered an
       extension to the standard AD framework. In particular, a more complex
        dependency anlysis is required:

        - A `forward_updating` operator **must** implement the member
          `dependencies_updating()` defining which variables are
          updated.
        - Forward updating operators **must** have `implicit_dependencies=true`
       (FIXME: not sure about this).
    */
    static const bool forward_updating = false;
    /** \brief The derivative of this operator is `forward_updating`

        - The `reverse_updating` flag signifies that necessary
          derivative workspaces are added to the tape prior to any
          reverse replay.
    */
    static const bool reverse_updating = false;
    /** \brief Default implementation of `OperatorPure::dependencies_updating()`
     */
    void dependencies_updating(Args<> &args, Dependencies &dep) const {}
    /** \brief How to fuse this operator (self) with another (other) */
    OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
      return NULL;
    }
    /** \brief Return operator specific dynamic information (optional) */
    void *operator_data() { return NULL; }
    /** \brief Print this operator (optional) */
    void print(print_config cfg) {}
  };
  /** \brief Operator that requires dynamic allocation. Compile time
      known input/output size */
  template <int ninput, int noutput>
  struct DynamicOperator : Operator<ninput, noutput> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
  };
  /** \brief Operator that requires dynamic allocation. Compile time
      known input size */
  template <int ninput>
  struct DynamicOutputOperator : Operator<ninput, -1> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    Index noutput;
  };
  template <int noutput = 1>
  struct DynamicInputOperator : Operator<-1, noutput> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    Index ninput;
  };
  struct DynamicInputOutputOperator : Operator<-1, -1> {
    /** \copydoc Operator::dynamic */
    static const bool dynamic = true;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth = 0;
    Index ninput_, noutput_;
    DynamicInputOutputOperator(Index ninput, Index noutput);
    Index input_size() const;
    Index output_size() const;
    static const bool have_input_size_output_size = true;
  };

  /** \brief Add default implementation of mandatory members:
      `input_size` ans `output_size` */
  template <class OperatorBase>
  struct AddInputSizeOutputSize : OperatorBase {
    INHERIT_CTOR(AddInputSizeOutputSize, OperatorBase)
    Index input_size() const { return this->ninput; }
    Index output_size() const { return this->noutput; }
    static const bool have_input_size_output_size = true;
  };

  /** \brief Add default implementation of mandatory members:
      `increment` and `decrement` */
  template <class OperatorBase>
  struct AddIncrementDecrement : OperatorBase {
    INHERIT_CTOR(AddIncrementDecrement, OperatorBase)
    void increment(IndexPair &ptr) {
      ptr.first += this->input_size();
      ptr.second += this->output_size();
    }
    void decrement(IndexPair &ptr) {
      ptr.first -= this->input_size();
      ptr.second -= this->output_size();
    }
    static const bool have_increment_decrement = true;
  };

  /** \brief Add default implementation of mandatory members:
      `forward` and `reverse` **from** `forward_incr` and
      `reverse_decr` */
  template <class OperatorBase>
  struct AddForwardReverse : OperatorBase {
    INHERIT_CTOR(AddForwardReverse, OperatorBase)

    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      ForwardArgs<Type> args_cpy(args);
      OperatorBase::forward_incr(args_cpy);
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      ReverseArgs<Type> args_cpy(args);
      OperatorBase::increment(args_cpy.ptr);
      OperatorBase::reverse_decr(args_cpy);
    }
    static const bool have_forward_reverse = true;
  };

  /** \brief Add default implementation of mandatory members:
      `forward_incr` and `reverse_decr` **from** `forward` and
      `reverse` */
  template <class OperatorBase>
  struct AddForwardIncrReverseDecr : OperatorBase {
    INHERIT_CTOR(AddForwardIncrReverseDecr, OperatorBase)

    template <class Type>
    void forward_incr(ForwardArgs<Type> &args) {
      OperatorBase::forward(args);
      OperatorBase::increment(args.ptr);
    }

    template <class Type>
    void reverse_decr(ReverseArgs<Type> &args) {
      OperatorBase::decrement(args.ptr);
      OperatorBase::reverse(args);
    }
    static const bool have_forward_incr_reverse_decr = true;
  };

  /** \brief Add default implementation of mandatory members:
      `forward_mark` and `reverse_mark` */
  template <class OperatorBase>
  struct AddForwardMarkReverseMark : OperatorBase {
    INHERIT_CTOR(AddForwardMarkReverseMark, OperatorBase)

    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      OperatorBase::forward(args);
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      OperatorBase::reverse(args);
    }

    void forward(ForwardArgs<bool> &args) { args.mark_dense(*this); }
    void reverse(ReverseArgs<bool> &args) { args.mark_dense(*this); }
    static const bool have_forward_mark_reverse_mark = true;
  };

  /** \brief Add default implementation of mandatory member:
      `dependencies` */
  template <class OperatorBase>
  struct AddDependencies : OperatorBase {
    INHERIT_CTOR(AddDependencies, OperatorBase)
    void dependencies(Args<> &args, Dependencies &dep) const {
      Index ninput_ = this->input_size();
      for (Index j = 0; j < ninput_; j++) dep.push_back(args.input(j));
    }
    static const bool have_dependencies = true;
  };

  /** \brief Add default implementation of mandatory member:
      `forward` **from** optional member `eval` */
  template <class OperatorBase, int ninput>
  struct AddForwardFromEval : OperatorBase {};
  /** \brief Unary case */
  template <class OperatorBase>
  struct AddForwardFromEval<OperatorBase, 1> : OperatorBase {
    INHERIT_CTOR(AddForwardFromEval, OperatorBase)
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      args.y(0) = this->eval(args.x(0));
    }
  };
  /** \brief Binary case */
  template <class OperatorBase>
  struct AddForwardFromEval<OperatorBase, 2> : OperatorBase {
    INHERIT_CTOR(AddForwardFromEval, OperatorBase)
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      args.y(0) = this->eval(args.x(0), args.x(1));
    }
  };

  /** \brief Utility for member completion */
  template <bool flag, class Yes, class No>
  struct if_else {};
  template <class Yes, class No>
  struct if_else<true, Yes, No> {
    typedef Yes type;
  };
  template <class Yes, class No>
  struct if_else<false, Yes, No> {
    typedef No type;
  };

  /** \brief Generate all mandatory members */
  template <class OperatorBase>
  struct CPL {
    static const bool test1 = !OperatorBase::have_eval;
    /** \copydoc AddForwardFromEval */
    typedef typename if_else<
        test1, OperatorBase,
        AddForwardFromEval<OperatorBase, OperatorBase::ninput> >::type Result1;

    static const bool test2 = Result1::have_input_size_output_size;
    /** \copydoc AddInputSizeOutputSize */
    typedef
        typename if_else<test2, Result1, AddInputSizeOutputSize<Result1> >::type
            Result2;

    static const bool test3 = !Result2::have_dependencies;
    /** \copydoc AddDependencies */
    typedef typename if_else<test3, AddDependencies<Result2>, Result2>::type
        Result3;

    static const bool test4 = Result3::have_increment_decrement;
    /** \copydoc AddIncrementDecrement */
    typedef
        typename if_else<test4, Result3, AddIncrementDecrement<Result3> >::type
            Result4;

    static const bool test5 = Result4::have_forward_mark_reverse_mark;
    /** \copydoc AddForwardMarkReverseMark */
    typedef typename if_else<test5, Result4,
                             AddForwardMarkReverseMark<Result4> >::type Result5;

    static const bool test6 = Result5::have_forward_reverse &&
                              !Result5::have_forward_incr_reverse_decr;
    /** \copydoc AddForwardIncrReverseDecr */
    typedef typename if_else<test6, AddForwardIncrReverseDecr<Result5>,
                             Result5>::type Result6;

    static const bool test7 = Result6::have_forward_incr_reverse_decr &&
                              !Result6::have_forward_reverse;
    /** \copydoc AddForwardReverse */
    typedef typename if_else<test7, AddForwardReverse<Result6>, Result6>::type
        Result7;

    typedef Result7 type;
  };

  /** \brief Fuse two operators */
  template <class Operator1, class Operator2>
  struct Fused : Operator<Operator1::ninput + Operator2::ninput,
                          Operator1::noutput + Operator2::noutput> {
    typename CPL<Operator1>::type Op1;
    typename CPL<Operator2>::type Op2;
    /** \copydoc Operator::independent_variable */
    static const int independent_variable =
        Operator1::independent_variable && Operator2::independent_variable;
    /** \copydoc Operator::dependent_variable */
    static const int dependent_variable =
        Operator1::dependent_variable && Operator2::dependent_variable;
    /** \copydoc Operator::max_fuse_depth */
    static const int max_fuse_depth =
        (Operator1::max_fuse_depth < Operator2::max_fuse_depth
             ? Operator1::max_fuse_depth - 1
             : Operator2::max_fuse_depth - 1);
    /** \copydoc Operator::is_linear */
    static const bool is_linear = Operator1::is_linear && Operator2::is_linear;
    template <class Type>
    void forward_incr(ForwardArgs<Type> &args) {
      Op1.forward_incr(args);
      Op2.forward_incr(args);
    }
    template <class Type>
    void reverse_decr(ReverseArgs<Type> &args) {
      Op2.reverse_decr(args);
      Op1.reverse_decr(args);
    }
    /** \copydoc Operator::have_forward_incr_reverse_decr */
    static const bool have_forward_incr_reverse_decr = true;
    /** \copydoc Operator::have_forward_reverse */
    static const bool have_forward_reverse = false;
    const char *op_name() { return "Fused"; }
  };
  /** \brief Replicate an operator
      \details If an operator occurs many times in a row the
      information can be compressed. Knowing the operator type and the
      number of occurences is sufficient.
      \note When replacing a batch of binary operators by `Rep` the
      memory *reduction* is assymptotically `1/3` (ratio will in
      general depend on the configuration typedefs).
  */
  template <class Operator1>
  struct Rep : DynamicOperator<-1, -1> {
    typename CPL<Operator1>::type Op;
    /** \copydoc Operator::independent_variable */
    static const int independent_variable = Operator1::independent_variable;
    /** \copydoc Operator::dependent_variable */
    static const int dependent_variable = Operator1::dependent_variable;
    /** \copydoc Operator::is_linear */
    static const bool is_linear = Operator1::is_linear;
    Index n;
    Rep(Index n) : n(n) {}
    Index input_size() const { return Operator1::ninput * n; }
    Index output_size() const { return Operator1::noutput * n; }
    /** \brief \copydoc Operator::have_input_size_output_size */
    static const bool have_input_size_output_size = true;
    template <class Type>
    void forward_incr(ForwardArgs<Type> &args) {
      for (size_t i = 0; i < (size_t)n; i++) Op.forward_incr(args);
    }
    template <class Type>
    void reverse_decr(ReverseArgs<Type> &args) {
      for (size_t i = 0; i < (size_t)n; i++) Op.reverse_decr(args);
    }
    /** \brief \copydoc Operator::have_forward_incr_reverse_decr */
    static const bool have_forward_incr_reverse_decr = true;
    /** \brief \copydoc Operator::have_forward_reverse */
    static const bool have_forward_reverse = false;
    /** \brief Attempt to apply input compression to this Rep operator.
        \details Try to convert this operator to a
        `global::RepCompress` operator. If conversion fails return
        `NULL`.
    */
    OperatorPure *compress() {
      TMBAD_ASSERT(false);
      std::vector<Index> &inputs = get_glob()->inputs;
      size_t k = Op.input_size();
      size_t start = inputs.size() - k * n;
      std::valarray<Index> increment(k);
      if (k > 0) {
        for (size_t i = 0; i < (size_t)n - 1; i++) {
          std::valarray<Index> v1(&inputs[start + i * k], k);
          std::valarray<Index> v2(&inputs[start + (i + 1) * k], k);
          if (i == 0) {
            increment = v2 - v1;
          } else {
            bool ok = (increment == (v2 - v1)).min();
            if (!ok) return NULL;
          }
        }
      }

      size_t reduction = (n - 1) * k;
      inputs.resize(inputs.size() - reduction);
      return get_glob()->getOperator<RepCompress<Operator1> >(n, increment);
    }
    OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
      OperatorPure *op1 = get_glob()->getOperator<Operator1>();
      if (op1 == other) {
        this->n++;
        return self;
      }
      return NULL;
    }
    const char *op_name() { return "Rep"; }
  };
  /** \brief Replicate an operator and apply input compression
      \details This operator is like `global::Rep` except that it also
      represents input compression. Input compression means that the
      input indices of any given operator can be obtained by adding a
      constant vector to the inputs of the previous operator.
      \note When replacing a batch of binary operators by
      `RepCompress` the memory *reduction* is assymptotically `2/3`
      (ratio will in general depend on the configuration typedefs).
      \warning TO BE DEPRECATED. HAS IMPLICIT DEPENDENCIES.
  */
  template <class Operator1>
  struct RepCompress : DynamicOperator<-1, -1> {
    /** \copydoc Operator::independent_variable */
    static const int independent_variable = Operator1::independent_variable;
    /** \copydoc Operator::dependent_variable */
    static const int dependent_variable = Operator1::dependent_variable;
    /** \copydoc Operator::is_linear */
    static const bool is_linear = Operator1::is_linear;
    typename CPL<Operator1>::type Op;
    Index n;

    std::valarray<Index> increment_pattern;
    RepCompress(Index n, std::valarray<Index> v) : n(n), increment_pattern(v) {}
    Index input_size() const { return Operator1::ninput; }
    Index output_size() const { return Operator1::noutput * n; }
    /** \brief \copydoc Operator::have_input_size_output_size */
    static const bool have_input_size_output_size = true;
    /** \brief Forward method applicable for Scalar and bool case */
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      std::valarray<Index> inputs(input_size());
      for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
      ForwardArgs<Type> args_cpy = args;
      args_cpy.inputs = &inputs[0];
      args_cpy.ptr.first = 0;
      for (size_t i = 0; i < (size_t)n; i++) {
        Op.forward(args_cpy);
        inputs += this->increment_pattern;
        args_cpy.ptr.second += Op.output_size();
      }
    }
    /** \brief Reverse method applicable for Scalar and bool case */
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      std::valarray<Index> inputs(input_size());
      for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
      inputs += n * this->increment_pattern;
      ReverseArgs<Type> args_cpy = args;
      args_cpy.inputs = &inputs[0];
      args_cpy.ptr.first = 0;
      args_cpy.ptr.second += n * Op.output_size();
      for (size_t i = 0; i < (size_t)n; i++) {
        inputs -= this->increment_pattern;
        args_cpy.ptr.second -= Op.output_size();
        Op.reverse(args_cpy);
      }
    }
    /** \brief Calculate all inputs */
    void dependencies(Args<> &args, Dependencies &dep) const {
      std::valarray<Index> inputs(input_size());
      for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
      for (size_t i = 0; i < (size_t)n; i++) {
        dep.insert(dep.end(), &inputs[0], &inputs[0] + inputs.size());
        inputs += this->increment_pattern;
      }
    }
    static const bool have_dependencies = true;
    void forward(ForwardArgs<Writer> &args) {
      std::valarray<Index> inputs(Op.input_size());
      for (size_t i = 0; i < (size_t)Op.input_size(); i++)
        inputs[i] = args.input(i);
      std::valarray<Index> outputs(Op.output_size());
      for (size_t i = 0; i < (size_t)Op.output_size(); i++)
        outputs[i] = args.output(i);
      Writer w;
      int ninp = Op.input_size();
      int nout = Op.output_size();

      w << "for (int count = 0, " << "i[" << ninp << "]=" << inputs << ", "
        << "di[" << ninp << "]=" << increment_pattern << ", " << "o[" << nout
        << "]=" << outputs << "; " << "count < " << n << "; count++) {\n";

      w << "    ";
      ForwardArgs<Writer> args_cpy = args;
      args_cpy.set_indirect();
      Op.forward(args_cpy);
      w << "\n";

      w << "    ";
      w << "for (int k=0; k<" << ninp << "; k++) i[k] += di[k];\n";
      w << "    ";
      w << "for (int k=0; k<" << nout << "; k++) o[k] += " << nout << ";\n";

      w << "  ";
      w << "}";
    }
    void reverse(ReverseArgs<Writer> &args) {
      std::valarray<Index> inputs(Op.input_size());
      for (size_t i = 0; i < (size_t)Op.input_size(); i++)
        inputs[i] = args.input(i);
      inputs += n * increment_pattern;
      std::valarray<Index> outputs(Op.output_size());
      for (size_t i = 0; i < (size_t)Op.output_size(); i++)
        outputs[i] = args.output(i);
      outputs += n * Op.output_size();
      Writer w;
      int ninp = Op.input_size();
      int nout = Op.output_size();

      w << "for (int count = 0, " << "i[" << ninp << "]=" << inputs << ", "
        << "di[" << ninp << "]=" << increment_pattern << ", " << "o[" << nout
        << "]=" << outputs << "; " << "count < " << n << "; count++) {\n";

      w << "    ";
      w << "for (int k=0; k<" << ninp << "; k++) i[k] -= di[k];\n";
      w << "    ";
      w << "for (int k=0; k<" << nout << "; k++) o[k] -= " << nout << ";\n";

      w << "    ";
      ReverseArgs<Writer> args_cpy = args;
      args_cpy.set_indirect();
      Op.reverse(args_cpy);
      w << "\n";

      w << "  ";
      w << "}";
    }
    /** \brief \copydoc Operator::have_forward_incr_reverse_decr */
    static const bool have_forward_incr_reverse_decr = false;
    /** \brief \copydoc Operator::have_forward_reverse */
    static const bool have_forward_reverse = true;
    /** \brief \copydoc Operator::have_forward_mark_reverse_mark */
    static const bool have_forward_mark_reverse_mark = true;
    const char *op_name() { return "CRep"; }

    struct operator_data_t {
      OperatorPure *Op;
      Index n;
      std::valarray<Index> ip;
      operator_data_t(const RepCompress &x)
          : Op(get_glob()->getOperator<Operator1>()),
            n(x.n),
            ip(x.increment_pattern) {}
      ~operator_data_t() { Op->deallocate(); }
      bool operator==(const operator_data_t &other) {
        return (Op == other.Op) && (ip.size() == other.ip.size()) &&
               ((ip - other.ip).min() == 0);
      }
    };
    void *operator_data() { return new operator_data_t(*this); }
    OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
      if (this->op_name() == other->op_name()) {
        operator_data_t *p1 =
            static_cast<operator_data_t *>(self->operator_data());
        operator_data_t *p2 =
            static_cast<operator_data_t *>(other->operator_data());
        bool match = (*p1 == *p2);
        int other_n = p2->n;
        delete p1;
        delete p2;
        if (match) {
          std::vector<Index> &inputs = get_glob()->inputs;
          size_t reduction = increment_pattern.size();
          inputs.resize(inputs.size() - reduction);
          this->n += other_n;
          other->deallocate();
          return self;
        }
      }
      return NULL;
    }
  };

  /** \brief Operator auto-completion.

      Generate the mandatory members required by `OperatorPure` based on a given
     `Operator`.
  */
  template <class OperatorBase>
  struct Complete : OperatorPure {
    typename CPL<OperatorBase>::type Op;
    INHERIT_CTOR(Complete, Op)
    ~Complete() {}
    void forward(ForwardArgs<Scalar> &args) { Op.forward(args); }
    void reverse(ReverseArgs<Scalar> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<Scalar> &args) { Op.forward_incr(args); }
    void reverse_decr(ReverseArgs<Scalar> &args) { Op.reverse_decr(args); }

    void forward(ForwardArgs<Replay> &args) {
      if (Op.add_forward_replay_copy)
        forward_replay_copy(args);
      else
        Op.forward(args);
    }
    void reverse(ReverseArgs<Replay> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<Replay> &args) {
      if (Op.add_forward_replay_copy) {
        forward_replay_copy(args);
        increment(args.ptr);
      } else
        Op.forward_incr(args);
    }
    void reverse_decr(ReverseArgs<Replay> &args) { Op.reverse_decr(args); }

    void forward(ForwardArgs<bool> &args) { Op.forward(args); }
    void reverse(ReverseArgs<bool> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<bool> &args) { Op.forward_incr(args); }
    void reverse_decr(ReverseArgs<bool> &args) { Op.reverse_decr(args); }
    void forward_incr_mark_dense(ForwardArgs<bool> &args) {
      args.mark_dense(Op);
      Op.increment(args.ptr);
    };

    void forward(ForwardArgs<Writer> &args) { Op.forward(args); }
    void reverse(ReverseArgs<Writer> &args) { Op.reverse(args); }
    void forward_incr(ForwardArgs<Writer> &args) { Op.forward_incr(args); }
    void reverse_decr(ReverseArgs<Writer> &args) { Op.reverse_decr(args); }
    /** \brief Move a stack allocated instance to the heap and let the
       `operation_stack` manage the memory \details This is only useful for
       dynamic operators and therfore disallowed for static operators.
    */
    std::vector<ad_plain> operator()(const std::vector<ad_plain> &x) {
      TMBAD_ASSERT2(OperatorBase::dynamic,
                    "Stack to heap copy only allowed for dynamic operators");
      Complete *pOp = new Complete(*this);
      return get_glob()->add_to_stack<OperatorBase>(pOp, x);
    }
    ad_segment operator()(const ad_segment &x) {
      TMBAD_ASSERT2(OperatorBase::dynamic,
                    "Stack to heap copy only allowed for dynamic operators");
      Complete *pOp = new Complete(*this);
      return get_glob()->add_to_stack<OperatorBase>(pOp, x);
    }
    ad_segment operator()(const ad_segment &x, const ad_segment &y) {
      TMBAD_ASSERT2(OperatorBase::dynamic,
                    "Stack to heap copy only allowed for dynamic operators");
      Complete *pOp = new Complete(*this);
      return get_glob()->add_to_stack<OperatorBase>(pOp, x, y);
    }
    template <class T>
    std::vector<T> operator()(const std::vector<T> &x) {
      std::vector<ad_plain> x_(x.begin(), x.end());
      std::vector<ad_plain> y_ = (*this)(x_);
      std::vector<T> y(y_.begin(), y_.end());
      return y;
    }
    void forward_replay_copy(ForwardArgs<Replay> &args) {
      std::vector<ad_plain> x(Op.input_size());
      for (size_t i = 0; i < x.size(); i++) x[i] = args.x(i);
      std::vector<ad_plain> y =
          get_glob()->add_to_stack<OperatorBase>(this->copy(), x);
      for (size_t i = 0; i < y.size(); i++) args.y(i) = y[i];
    }
    void dependencies(Args<> &args, Dependencies &dep) {
      Op.dependencies(args, dep);
    }
    void dependencies_updating(Args<> &args, Dependencies &dep) {
      Op.dependencies_updating(args, dep);
    }
    void increment(IndexPair &ptr) { Op.increment(ptr); }
    void decrement(IndexPair &ptr) { Op.decrement(ptr); }
    Index input_size() { return Op.input_size(); }
    Index output_size() { return Op.output_size(); }
    const char *op_name() { return Op.op_name(); }
    void print(print_config cfg) { Op.print(cfg); }

    template <class Operator_, int depth>
    struct SelfFuse {
      typedef Rep<Operator_> type;
      OperatorPure *operator()() {
        return get_glob()->template getOperator<type>(2);
      }
    };
    template <class Operator_>
    struct SelfFuse<Operator_, 0> {
      OperatorPure *operator()() { return NULL; }
    };
    OperatorPure *self_fuse() {
      return SelfFuse<OperatorBase, OperatorBase::max_fuse_depth>()();
    }
    OperatorPure *other_fuse(OperatorPure *other) {
      return Op.other_fuse(this, other);
    }
    OperatorPure *copy() {
      if (Op.dynamic)
        return new Complete(*this);
      else
        return this;
    }
    void deallocate() {
      if (!Op.dynamic) return;
      delete this;
    }
    op_info info() {
      op_info info(Op);
      return info;
    }
    void *identifier() {
      if (Op.add_static_identifier) {
        static void *id = new char();
        return id;
      } else
        return (void *)this;
    }
    void *operator_data() { return Op.operator_data(); }
    void *incomplete() { return &Op; }
  };

  template <class OperatorBase>
  Complete<OperatorBase> *getOperator() const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()();
  }
  template <class OperatorBase, class T1>
  Complete<OperatorBase> *getOperator(const T1 &x1) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1);
  }
  template <class OperatorBase, class T1, class T2>
  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1, x2);
  }
  template <class OperatorBase, class T1, class T2, class T3>
  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2,
                                      const T3 &x3) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1, x2, x3);
  }
  template <class OperatorBase, class T1, class T2, class T3, class T4>
  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2, const T3 &x3,
                                      const T4 &x4) const {
    return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
        x1, x2, x3, x4);
  }
  struct InvOp : Operator<0> {
    static const int independent_variable = true;
    template <class Type>
    void forward(ForwardArgs<Type> &args) {}
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {}
    const char *op_name();
  };

  struct DepOp : Operator<1> {
    static const bool is_linear = true;
    static const int dependent_variable = true;
    static const bool have_eval = true;
    template <class Type>
    Type eval(Type x0) {
      return x0;
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      args.dx(0) += args.dy(0);
    }
    const char *op_name();
  };

  struct ConstOp : Operator<0, 1> {
    static const bool is_linear = true;
    static const bool is_constant = true;
    template <class Type>
    void forward(ForwardArgs<Type> &args) {}
    void forward(ForwardArgs<Replay> &args);
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {}
    const char *op_name();
    void forward(ForwardArgs<Writer> &args);
  };
  struct DataOp : DynamicOutputOperator<0> {
    typedef DynamicOutputOperator<0> Base;
    static const bool is_linear = true;
    DataOp(Index n);
    template <class Type>
    void forward(ForwardArgs<Type> &args) {}
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {}
    const char *op_name();
    void forward(ForwardArgs<Writer> &args);
  };
  /** \brief Add zero allocated workspace to the tape

      Serves as a pre-allocated workspace for `Operator::forward_updating`
      operators. In particular

      - Operator outputs are not allowed to be remapped (ensured by 'dynamic')
      - Operator persists during forward replay
  */
  struct AllocOp : DynamicOutputOperator<0> {
    typedef DynamicOutputOperator<0> Base;
    static const bool add_forward_replay_copy = true;
    AllocOp(Index n);
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      for (Index i = 0; i < Base::noutput; i++) args.y(i) = Type(0);
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {}
    const char *op_name();
    void forward(ForwardArgs<Writer> &args);
    /** \brief Override a consequtive set of variables by a zero allocated
     * workspace */
    void operator()(Replay *x, Index n);
  };
  /** \brief Set allocated workspace to zero */
  struct ZeroOp : DynamicOperator<1, 0> {
    static const bool forward_updating = true;
    static const bool reverse_updating = true;
    static const bool add_forward_replay_copy = true;
    static const bool have_dependencies = true;
    static const bool implicit_dependencies = true;
    static const bool allow_remap = false;
    Index n;
    ZeroOp(Index n);
    void forward(ForwardArgs<Scalar> &args);
    void reverse(ReverseArgs<Scalar> &args);
    template <class Type>
    void forward(ForwardArgs<Type> &args) {
      TMBAD_ASSERT2(false, "Unsupported 'ZeroOp' method");
    }
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      TMBAD_ASSERT2(false, "Unsupported 'ZeroOp' method");
    }
    const char *op_name();
    void dependencies(Args<> &args, Dependencies &dep) const;
    void dependencies_updating(Args<> &args, Dependencies &dep) const;
    /** \brief Set allocated updatable workspace to zero */
    void operator()(Replay *x, Index n);
  };

  /** \brief Empty operator **without** inputs or outputs */
  struct NullOp : Operator<0, 0> {
    NullOp();
    const char *op_name();
    template <class T>
    void forward(ForwardArgs<T> &args) {}
    template <class T>
    void reverse(ReverseArgs<T> &args) {}
  };
  /** \brief Empty operator **with** inputs and outputs */
  struct NullOp2 : DynamicInputOutputOperator {
    NullOp2(Index ninput, Index noutput);
    const char *op_name();
    template <class T>
    void forward(ForwardArgs<T> &args) {}
    template <class T>
    void reverse(ReverseArgs<T> &args) {}
  };
  /** \brief Reference a variable on another tape

      \details References to other tapes are a handy alternative to
      independent variables, but there are caveats. One must note that
      these references are absolute pointers into an other tape's
      value space. If the other tape is optimized the value indices
      might get remapped and the absolute references would no longer
      be valid!
      It follows that we must have some strict limitations on their use:

      - References to other tapes are only allowed to exist
        temporarily, during tape construction.
      - If a child tape has references to a parent tape the parent
        tape is not complete until all references are 'resolved',
        i.e. the child tape has been replayed on top of the parent.
      - Atomic functions can never be allowed to contain references
        because atomic function replay does not change whats inside
        the atomic function. In other words, references would persist
        during replay and never get resolved.
  */
  struct RefOp : DynamicOperator<0, 1> {
    static const bool dynamic = true;
    global *glob;
    Index i;
    RefOp(global *glob, Index i);
    /** \brief RefOp copies value from other tape */
    void forward(ForwardArgs<Scalar> &args);
    /** \brief When replayed it 'resolves' references if applicable */
    void forward(ForwardArgs<Replay> &args);
    /** \brief Reverse mode updates are forbidden until all references are
     * resolved */
    template <class Type>
    void reverse(ReverseArgs<Type> &args) {
      TMBAD_ASSERT2(false,
                    "Reverse mode updates are forbidden until all references "
                    "are resolved");
    }
    /** \brief Reverse mode updates are allowed in replay mode */
    void reverse(ReverseArgs<Replay> &args);
    const char *op_name();
  };

  typedef Operator<1> UnaryOperator;
  typedef Operator<2> BinaryOperator;

  OperatorPure *Fuse(OperatorPure *Op1, OperatorPure *Op2);

  static bool fuse;

  /** \brief Enable/disable operator fusion \details This function is
      applicable if TMBad has been compiled with the preprocessor flag
      `-DFUSE`. The function sets/unsets the *global* `fuse` flag.
  */
  void set_fuse(bool flag);

  /** \brief Add `OperatorPure` to stack and trigger operator fusion
      if enabled */
  void add_to_opstack(OperatorPure *pOp);
  /** \brief Add nullary operator to the stack based on its **result**  */
  template <class OperatorBase>
  ad_plain add_to_stack(Scalar result = 0) {
    ad_plain ans;
    ans.index = this->values.size();

    this->values.push_back(result);

    Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
    add_to_opstack(pOp);

    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
    return ans;
  }
  /** \brief Add unary operator to the stack based on its **argument** */
  template <class OperatorBase>
  ad_plain add_to_stack(const ad_plain &x) {
    ad_plain ans;
    ans.index = this->values.size();

    this->values.push_back(OperatorBase().eval(x.Value()));

    this->inputs.push_back(x.index);

    Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
    add_to_opstack(pOp);

    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
    return ans;
  }
  /** \brief Add binary operator to the stack based on its two **arguments** */
  template <class OperatorBase>
  ad_plain add_to_stack(const ad_plain &x, const ad_plain &y) {
    ad_plain ans;
    ans.index = this->values.size();

    this->values.push_back(OperatorBase().eval(x.Value(), y.Value()));

    this->inputs.push_back(x.index);
    this->inputs.push_back(y.index);

    Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
    add_to_opstack(pOp);

    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
    return ans;
  }
  template <class OperatorBase>
  ad_segment add_to_stack(ad_segment lhs, ad_segment rhs,
                          ad_segment more = ad_segment()) {
    IndexPair ptr((Index)inputs.size(), (Index)values.size());
    Complete<OperatorBase> *pOp =
        this->template getOperator<OperatorBase>(lhs, rhs);
    size_t n = pOp->output_size();
    ad_segment ans(values.size(), n);
    inputs.push_back(lhs.index());
    inputs.push_back(rhs.index());
    if (more.size() > 0) inputs.push_back(more.index());
    opstack.push_back(pOp);
    values.resize(values.size() + n);
    ForwardArgs<Scalar> args(inputs, values, this);
    args.ptr = ptr;
    pOp->forward(args);

    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
    return ans;
  }

  template <class OperatorBase>
  ad_segment add_to_stack(Complete<OperatorBase> *pOp, ad_segment lhs,
                          ad_segment rhs = ad_segment()) {
    static_assert(
        OperatorBase::dynamic,
        "Unlikely that you want to use this method for static operators?");
    static_assert(
        OperatorBase::ninput == 0 || OperatorBase::implicit_dependencies,
        "Operators with pointer inputs should always implement "
        "'implicit_dependencies'");

    IndexPair ptr((Index)inputs.size(), (Index)values.size());
    size_t n = pOp->output_size();
    ad_segment ans(values.size(), n);
    TMBAD_ASSERT((Index)(lhs.size() > 0) + (Index)(rhs.size() > 0) ==
                 pOp->input_size());
    if (lhs.size() > 0) inputs.push_back(lhs.index());
    if (rhs.size() > 0) inputs.push_back(rhs.index());
    opstack.push_back(pOp);
    values.resize(values.size() + n);
    ForwardArgs<Scalar> args(inputs, values, this);
    args.ptr = ptr;
    pOp->forward(args);

    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
    return ans;
  }
  /** \brief Add vector operator to the stack based on its **vector argument**
   */
  template <class OperatorBase>
  std::vector<ad_plain> add_to_stack(OperatorPure *pOp,
                                     const std::vector<ad_plain> &x) {
    IndexPair ptr((Index)inputs.size(), (Index)values.size());
    size_t m = pOp->input_size();
    size_t n = pOp->output_size();
    ad_segment ans(values.size(), n);
    for (size_t i = 0; i < m; i++) inputs.push_back(x[i].index);
    opstack.push_back(pOp);
    values.resize(values.size() + n);
    ForwardArgs<Scalar> args(inputs, values, this);
    args.ptr = ptr;
    pOp->forward(args);

    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
    TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
    std::vector<ad_plain> out(n);
    for (size_t i = 0; i < n; i++) out[i].index = ans.index() + i;
    return out;
  }

  struct ad_plain {
    Index index;
    static const Index NA = (Index)-1;
    bool initialized() const;
    bool on_some_tape() const;
    /** \brief Compatibiltiy with ad_aug */
    void addToTape() const;
    /** \brief Get the tape of this ad_plain */
    global *glob() const;
    /** \brief Override this ad_plain
        \details Ignored
    */
    void override_by(const ad_plain &x) const;

    /** \brief Default CTOR \details Default constuction, along with
        copy construction and copy assignment, are **the only
        `ad_plain` operations that do not modify the tape**.
    */
    ad_plain();

    /** \brief CTOR from the `Scalar` */
    ad_plain(Scalar x);
    /** \brief CTOR from `global::ad_aug` */
    ad_plain(ad_aug x);

    /** \brief Copy operation */
    struct CopyOp : Operator<1> {
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0) {
        return x0;
      }
      Replay eval(Replay x0);
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        args.dx(0) += args.dy(0);
      }
      const char *op_name();
    };
    /** \brief Deep copy existing ad_plain. Result will be at the end
        of the active sequence.
        \details In contrast to the default copy CTOR this function
        puts an actual copy instruction on the tape. This is mainly
        useful to ensure a contiguous memory layout of selected
        variables.
    */
    ad_plain copy() const;
    /** \brief Copy value and set derivative to zero

        \details The purpose of this operator is to allow using
        temporaries without tracking their derivatives. The ValOp has
        the following properties:
        - Its reverse mode derivatives are zero.
        - Its ingoing edges are reported to be empty. It follows that sparse
       derivatives are zero.
        - Otherwise, it behaves like the copy operator `CopyOp`.
    */
    struct ValOp : Operator<1> {
      static const bool have_dependencies = true;
      static const bool have_eval = true;
      /** \brief Evaluation is as the CopyOp except in replay mode */
      template <class Type>
      Type eval(Type x0) {
        return x0;
      }
      Replay eval(Replay x0);
      /** \brief Derivatives in the dense case are zero */
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {}
      /** \brief Derivatives in the sparse case are untracked, i.e. zero.
          \note Inputs to `ValOp` are **not** marked for removal by
          the tape optimizer despite the empty `dependencies` (because
          the tape optimizer uses the boolean pattern which has the
          default `mark_dense` implementation).
      */
      void dependencies(Args<> &args, Dependencies &dep) const;
      const char *op_name();
    };
    /** \brief Deep copy existing ad_plain and set derivative to
        zero. Result will be at the end of the active sequence.
    */
    ad_plain copy0() const;

    template <bool left_var, bool right_var>
    struct AddOp_ : BinaryOperator {
      static const bool is_linear = true;
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 + x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        if (left_var) args.dx(0) += args.dy(0);
        if (right_var) args.dx(1) += args.dy(0);
      }
      const char *op_name() { return "AddOp"; }
      OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
        if (other == get_glob()->getOperator<MulOp>()) {
          return get_glob()->getOperator<Fused<AddOp_, MulOp> >();
        }
        return NULL;
      }
    };
    typedef AddOp_<true, true> AddOp;
    ad_plain operator+(const ad_plain &other) const;

    template <bool left_var, bool right_var>
    struct SubOp_ : BinaryOperator {
      static const bool is_linear = true;
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 - x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        if (left_var) args.dx(0) += args.dy(0);
        if (right_var) args.dx(1) -= args.dy(0);
      }
      const char *op_name() { return "SubOp"; }
    };
    typedef SubOp_<true, true> SubOp;
    ad_plain operator-(const ad_plain &other) const;

    template <bool left_var, bool right_var>
    struct MulOp_ : BinaryOperator {
      static const bool have_eval = true;
      static const bool is_linear = !left_var || !right_var;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 * x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        if (left_var) args.dx(0) += args.x(1) * args.dy(0);
        if (right_var) args.dx(1) += args.x(0) * args.dy(0);
      }
      const char *op_name() { return "MulOp"; }
    };
    typedef MulOp_<true, true> MulOp;
    ad_plain operator*(const ad_plain &other) const;
    ad_plain operator*(const Scalar &other) const;

    template <bool left_var, bool right_var>
    struct DivOp_ : BinaryOperator {
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0, Type x1) {
        return x0 / x1;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        Type tmp0 = args.dy(0) / args.x(1);
        if (left_var) args.dx(0) += tmp0;
        if (right_var) args.dx(1) -= args.y(0) * tmp0;
      }
      const char *op_name() { return "DivOp"; }
    };
    typedef DivOp_<true, true> DivOp;
    ad_plain operator/(const ad_plain &other) const;

    struct NegOp : UnaryOperator {
      static const bool is_linear = true;
      static const bool have_eval = true;
      template <class Type>
      Type eval(Type x0) {
        return -x0;
      }
      template <class Type>
      void reverse(ReverseArgs<Type> &args) {
        args.dx(0) -= args.dy(0);
      }
      const char *op_name();
    };
    ad_plain operator-() const;

    ad_plain &operator+=(const ad_plain &other);
    ad_plain &operator-=(const ad_plain &other);
    ad_plain &operator*=(const ad_plain &other);
    ad_plain &operator/=(const ad_plain &other);

    void Dependent();

    void Independent();
    Scalar &Value();
    Scalar Value() const;
    Scalar Value(global *glob) const;
    Scalar &Deriv();
  };
  /** \brief Previous ad context to be restored then this context ends
      \details The value `NULL` signifies that either
      - this glob is not in use **or**
      - this glob is the first in the context stack
  */
  global *parent_glob;
  /** \brief Is this glob present in the context stack? */
  bool in_use;
  /** \brief Enable ad calulations to be piped to this glob
      \note It is not allowed to start the same ad context several times
  */
  void ad_start();
  /** \brief Stop ad calculations from being piped to this glob. */
  void ad_stop();
  void Independent(std::vector<ad_plain> &x);
  /** \brief Contiguous set of variables on the *current* tape

      This class can represent vectors (or matrices) in a compact way
      assuming that numerical values are available on the tape as a
      contiguous *segment*. The class stores the offset and the size
      (or dimension) of the segment.
  */
  struct ad_segment {
    ad_plain x;
    size_t n;
    size_t c;
    /** \brief Construct empty object */
    ad_segment();
    /** \brief Construct vector like object */
    ad_segment(ad_plain x, size_t n);
    /** \brief Construct length one object (variable) */
    ad_segment(ad_aug x);
    /** \brief Construct length one object (constant) */
    ad_segment(Scalar x);
    /** \brief Construction from offset (index) and size */
    ad_segment(Index idx, size_t n);
    /** \brief Construct matrix like object */
    ad_segment(ad_plain x, size_t r, size_t c);
    /** \brief Construction based on values that might *not* be on the
        tape or might *not* satisfy the storage requirement. */
    ad_segment(Replay *x, size_t n, bool zero_check = false);
    bool identicalZero();
    bool all_on_active_tape(Replay *x, size_t n);
    bool is_contiguous(Replay *x, size_t n);
    bool all_zero(Replay *x, size_t n);
    bool all_constant(Replay *x, size_t n);
    size_t size() const;
    size_t rows() const;
    size_t cols() const;

    ad_plain operator[](size_t i) const;
    ad_plain offset() const;
    Index index() const;
  };
  /** \brief Augmented AD type

      \details `ad_aug` is an 'augmented' AD type which, in contrast
      to `ad_plain`, knows on which tape it belongs. This facilitates
      some extra features:

      Nested AD: An `ad_aug` may refer to other variables on
      different tapes as long as they can be found in the 'active
      context stack' (defined here: `TMBad::global`).

      In addition, `ad_aug` can keep tapes small by only adding
      operations when strictly necessary. For instance, it detects
      whether a variable is constant or identical zero to see if a
      reduction is possible to avoid adding the variable to the
      tape.

      The downside of `ad_aug` is that is takes up slightly more
      memory than `ad_plain`.
  */
  struct ad_aug {
    /** \brief If `taped_value` **is** initialized (see
        `ad_plain::initialize`) this is the value of `ad_aug`. */
    mutable ad_plain taped_value;
    /** \brief If `taped_value` is **not** initialized then this is
        the value of `ad_aug`. Otherwise, `value` is un-defined, and
        `glob` is the tape on which this `ad_aug` resides. */
    TMBAD_UNION_OR_STRUCT {
      Scalar value;
      mutable global *glob;
    }
    data;
    /** \brief Is this object on *some* (not necessarily active) tape? */
    bool on_some_tape() const;
    /** \brief Is this object on the current active tape? */
    bool on_active_tape() const;
    /** \brief Alias for `on_some_tape()` (for backward compatibility only) */
    bool ontape() const;
    /** \brief Is this object guarantied to be a constant?
        \note If `false` the object *may* or *may not* be constant.
    */
    bool constant() const;
    Index index() const;
    /** \brief Get the tape of this ad_aug
        \return Returns the tape address of this variable **if**
        variable belongs to *some* tape.  Otherwise `NULL` is
        returned.
    */
    global *glob() const;
    /** \brief Return the underlying scalar value of this `ad_aug`. */
    Scalar Value() const;
    /** \brief Default CTOR \details Leaves `taped_value` in
        uninitialized state to signify that this variable is
        constant. */
    ad_aug();
    /** \brief CTOR from scalar \details Leaves `taped_value` in
        uninitialized state to signify that this variable is
        constant. */
    ad_aug(Scalar x);
    /** \brief CTOR from `ad_plain` \details Leaves `value` uninitialized. */
    ad_aug(ad_plain x);
    /** \brief Force this variable to be put on the tape. \details If
        variable already is on the tape nothing happens. In particular
        the variable cannot be assumed to be the most recently
        added. */
    void addToTape() const;
    /** \brief Override this ad_plain and set glob to `get_glob()`
        \warning Internal use only
    */
    void override_by(const ad_plain &x) const;
    /** \brief Check if 'glob' exists in the active context stack */
    bool in_context_stack(global *glob) const;
    /** \brief Deep copy existing ad_aug Result will be last value of
        the current tape. */
    ad_aug copy() const;
    /** \brief Deep copy existing ad_aug *wihout derivatives*. */
    ad_aug copy0() const;
    /** \brief If `true` this variable is identical zero. If `false`
        nothing can be concluded. */
    bool identicalZero() const;
    /** \brief If `true` this variable is identical one. If `false`
        nothing can be concluded. */
    bool identicalOne() const;
    /** \brief If `true` 'this' and 'other' are both constants. If
        `false` nothing can be concluded (they might be constants on
        the tape) */
    bool bothConstant(const ad_aug &other) const;
    /** \brief If `true` 'this' and 'other' are identical. If `false`
        nothing can be concluded (they might be equal, but different,
        expressions on the tape) */
    bool identical(const ad_aug &other) const;
    static const Index updbit = Index(1) << (sizeof(Index) * 8 - 1);
    /** \brief Set / unset the 'updatable bit' of this `ad_aug` */
    void setUpdatable(bool flag);
    /** \brief Is this `ad_aug` updatable? */
    bool updatable() const;
    /** \brief Addition \details Included reductions:
        - 0 + x = x
        - x + 0 = x
    */
    ad_aug operator+(const ad_aug &other) const;
    /** \brief Subtraction \details Included reductions:
        - x - 0 = x
        - 0 - x = -x
        - x - x = 0
    */
    ad_aug operator-(const ad_aug &other) const;
    /** \brief Negation */
    ad_aug operator-() const;
    /** \brief Multiplication \details Included reductions:
        - 0 * x = 0
        - x * 0 = 0
        - 1 * x = x
        - x * 1 = x
    */
    ad_aug operator*(const ad_aug &other) const;
    /** \brief Division \details Included reductions:
        - 0 / x = 0
        - x / 1 = x
    */
    ad_aug operator/(const ad_aug &other) const;
    /** \brief Compound assignment taking advantage of `operator+`
        reductions */
    ad_aug &operator+=(const ad_aug &other);
    /** \brief Compound assignment taking advantage of `operator-`
        reductions */
    ad_aug &operator-=(const ad_aug &other);
    /** \brief Compound assignment taking advantage of `operator*`
        reductions */
    ad_aug &operator*=(const ad_aug &other);
    /** \brief Compound assignment taking advantage of `operator/`
        reductions */
    ad_aug &operator/=(const ad_aug &other);
    /** \brief Set this `ad_aug` as dependent */
    void Dependent();
    /** \brief Set this `ad_aug` as independent */
    void Independent();
    Scalar &Value();
    Scalar &Deriv();
  };
  void Independent(std::vector<ad_aug> &x);
};

typedef global::ad_aug ad_aug;

template <class T>
inline T &UpdatingAccess<T>::operator+=(const T &other) {
  x += other;
  return x;
}
template <class T>
inline T &UpdatingAccess<T>::operator-=(const T &other) {
  x -= other;
  return x;
}

template <bool accumulate>
struct AccOp : global::Operator<2, 0> {
  static const bool is_linear = true;
  static const bool forward_updating = true;

  static const bool reverse_updating = false;
  static const bool have_dependencies = true;
  template <class T>
  void forward(ForwardArgs<T> &args) {
    T *x = args.x_ptr(0);
    T *y = args.x_ptr(1);
    accumulate ? x[0] += y[0] : x[0] -= y[0];
  }
  template <class T>
  void reverse(ReverseArgs<T> &args) {
    T *dx = args.dx_ptr(0);
    T *dy = args.dx_ptr(1);
    accumulate ? dy[0] += dx[0] : dy[0] -= dx[0];
  }
  void forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
  const char *op_name() { return (accumulate ? "AccOp" : "DecOp"); }
  void operator()(ad_aug &x, const ad_aug &y) {
    typedef global::ad_plain ad_plain;
    global *glob = get_glob();

    Index i0 = ad_plain(x).index;
    Index i1 = ad_plain(y).index;

    glob->inputs.push_back(i0);
    glob->inputs.push_back(i1);

    global::Complete<AccOp> *pOp = glob->template getOperator<AccOp>();
    glob->add_to_opstack(pOp);
    accumulate ? glob->values[i0] += glob->values[i1]
               : glob->values[i0] -= glob->values[i1];
  }
  void dependencies(Args<> &args, Dependencies &dep) const {
    dep.push_back(args.input(1));
  }
  void dependencies_updating(Args<> &args, Dependencies &dep) const {
    dep.push_back(args.input(0));
  }
};

template <>
inline ad_aug &UpdatingAccess<ad_aug>::operator+=(const ad_aug &other) {
  if (this->x.updatable()) {
    AccOp<true>()(this->x, global::ad_plain(other));
  } else {
    this->x += other;
  }
  return this->x;
}
template <>
inline ad_aug &UpdatingAccess<ad_aug>::operator-=(const ad_aug &other) {
  if (this->x.updatable()) {
    AccOp<false>()(this->x, global::ad_plain(other));
  } else {
    this->x -= other;
  }
  return this->x;
}

template <class S, class T>
std::ostream &operator<<(std::ostream &os, const std::pair<S, T> &x) {
  os << "(" << x.first << ", " << x.second << ")";
  return os;
}

std::ostream &operator<<(std::ostream &os, const global::ad_plain &x);
std::ostream &operator<<(std::ostream &os, const global::ad_aug &x);

/** \brief Enable weak comparison operators of an ad type
    \details By weak comparison we mean that these comparisons are
    performed on the numerical values and *not* on expression
    trees. One must in general be careful using comparison operators
    with ad types because
    - Comparison (branching) can ruin differentiability.
    - Branching requires retaping. That is, we must re-build the
    expression tree for each function evaluation to be sure the
    branching is correctly differentiated.
*/
template <class T>
struct adaptive : T {
  INHERIT_CTOR(adaptive, T)
  bool operator==(const T &other) const {
    return this->Value() == other.Value();
  }
  bool operator!=(const T &other) const {
    return this->Value() != other.Value();
  }
  bool operator>=(const T &other) const {
    return this->Value() >= other.Value();
  }
  bool operator<=(const T &other) const {
    return this->Value() <= other.Value();
  }
  bool operator<(const T &other) const { return this->Value() < other.Value(); }
  bool operator>(const T &other) const { return this->Value() > other.Value(); }

  adaptive operator+(const T &other) const {
    return adaptive(T(*this) + other);
  }
  adaptive operator-(const T &other) const {
    return adaptive(T(*this) - other);
  }
  adaptive operator*(const T &other) const {
    return adaptive(T(*this) * other);
  }
  adaptive operator/(const T &other) const {
    return adaptive(T(*this) / other);
  }

  adaptive operator-() const { return adaptive(-(T(*this))); }
};

typedef global::ad_plain ad_plain;
typedef global::Replay Replay;
typedef adaptive<ad_aug> ad_adapt;
typedef global::OperatorPure OperatorPure;
typedef global::operation_stack operation_stack;
typedef global::print_config print_config;
/** \brief Construct `ad_plain` from index
    \details `ad_plain` cannot have CTOR from `Index` because it would
    conflict with `Scalar` CTOR. The class `ad_plain_index` is needed to
    allow CTOR from `Index`.
*/
struct ad_plain_index : ad_plain {
  ad_plain_index(const Index &i);
  ad_plain_index(const ad_plain &x);
};
struct ad_aug_index : ad_aug {
  ad_aug_index(const Index &i);
  ad_aug_index(const ad_aug &x);
  ad_aug_index(const ad_plain &x);
};

template <class T>
void Independent(std::vector<T> &x) {
  for (size_t i = 0; i < x.size(); i++) x[i].Independent();
}
template <class T>
void Dependent(std::vector<T> &x) {
  for (size_t i = 0; i < x.size(); i++) x[i].Dependent();
}
template <class T>
Scalar Value(T x) {
  return x.Value();
}
Scalar Value(Scalar x);

/** \brief Is this ad vector available as a contiguous block on the tape?
    \details Template type 'V::value_type' can be
    - ad_plain
    - ad_aug
    - ad_adapt
*/
template <class V>
bool isContiguous(V &x) {
  bool ok = true;
  Index j_previous;
  for (size_t i = 0; i < (size_t)x.size(); i++) {
    if (!x[i].on_some_tape()) {
      ok = false;
      break;
    }
    Index j = ad_plain(x[i]).index;
    if (i > 0) {
      if (j != j_previous + 1) {
        ok = false;
        break;
      }
    }
    j_previous = j;
  }
  return ok;
}
/** \brief Get contiguous (deep) copy of this vector
    \details Template type 'V::value_type' can be
    - ad_plain
    - ad_aug
    - ad_adapt
*/
template <class V>
V getContiguous(const V &x) {
  V y(x.size());
  for (size_t i = 0; i < (size_t)x.size(); i++) y[i] = x[i].copy();
  return y;
}
/** \brief Make contiguous ad vector
    \details Template type 'V::value_type' can be
    - ad_plain
    - ad_aug
    - ad_adapt
*/
template <class V>
void forceContiguous(V &x) {
  if (!isContiguous(x)) x = getContiguous(x);
}
ad_aug operator+(const double &x, const ad_aug &y);
ad_aug operator-(const double &x, const ad_aug &y);
ad_aug operator*(const double &x, const ad_aug &y);
ad_aug operator/(const double &x, const ad_aug &y);

bool operator<(const double &x, const ad_adapt &y);
bool operator<=(const double &x, const ad_adapt &y);
bool operator>(const double &x, const ad_adapt &y);
bool operator>=(const double &x, const ad_adapt &y);
bool operator==(const double &x, const ad_adapt &y);
bool operator!=(const double &x, const ad_adapt &y);
using ::round;
using ::trunc;
using std::ceil;
using std::floor;
Writer floor(const Writer &x);
struct FloorOp : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return floor(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain floor(const ad_plain &x);
ad_aug floor(const ad_aug &x);
Writer ceil(const Writer &x);
struct CeilOp : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return ceil(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain ceil(const ad_plain &x);
ad_aug ceil(const ad_aug &x);
Writer trunc(const Writer &x);
struct TruncOp : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return trunc(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain trunc(const ad_plain &x);
ad_aug trunc(const ad_aug &x);
Writer round(const Writer &x);
struct RoundOp : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return round(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain round(const ad_plain &x);
ad_aug round(const ad_aug &x);

double sign(const double &x);
Writer sign(const Writer &x);
struct SignOp : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return sign(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain sign(const ad_plain &x);
ad_aug sign(const ad_aug &x);

double ge0(const double &x);
double lt0(const double &x);
Writer ge0(const Writer &x);
struct Ge0Op : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return ge0(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain ge0(const ad_plain &x);
ad_aug ge0(const ad_aug &x);
Writer lt0(const Writer &x);
struct Lt0Op : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return lt0(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain lt0(const ad_plain &x);
ad_aug lt0(const ad_aug &x);

template <class T>
T zeroDeriv(T x) {
  return x;
}
Writer zeroDeriv(const Writer &x);
struct ZderivOp : global::UnaryOperator {
  static const bool have_eval = true;
  static const bool is_zero_deriv = true;
  template <class Type>
  Type eval(Type x) {
    return zeroDeriv(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  const char *op_name();
};
ad_plain zeroDeriv(const ad_plain &x);
ad_aug zeroDeriv(const ad_aug &x);

struct SderivOp : global::ad_plain::CopyOp {
  static const bool is_zero_deriv = true;
  const char *op_name();
};
ad_plain sparseDeriv(const ad_plain &x);
double sparseDeriv(const double &x);
using ::expm1;
using ::fabs;
using ::log1p;
using std::acos;
using std::acosh;
using std::asin;
using std::asinh;
using std::atan;
using std::atanh;
using std::cos;
using std::cosh;
using std::exp;
using std::log;
using std::sin;
using std::sinh;
using std::sqrt;
using std::tan;
using std::tanh;

Writer fabs(const Writer &x);
struct AbsOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return fabs(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * sign(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain fabs(const ad_plain &x);
ad_aug fabs(const ad_aug &x);
ad_adapt fabs(const ad_adapt &x);
Writer cos(const Writer &x);
ad_aug cos(const ad_aug &x);
Writer sin(const Writer &x);
struct SinOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sin(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * cos(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain sin(const ad_plain &x);
ad_aug sin(const ad_aug &x);
ad_adapt sin(const ad_adapt &x);
Writer cos(const Writer &x);
struct CosOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return cos(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * -sin(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain cos(const ad_plain &x);
ad_aug cos(const ad_aug &x);
ad_adapt cos(const ad_adapt &x);
Writer exp(const Writer &x);
struct ExpOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return exp(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * args.y(0);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain exp(const ad_plain &x);
ad_aug exp(const ad_aug &x);
ad_adapt exp(const ad_adapt &x);
Writer log(const Writer &x);
struct LogOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return log(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / args.x(0);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain log(const ad_plain &x);
ad_aug log(const ad_aug &x);
ad_adapt log(const ad_adapt &x);
Writer sqrt(const Writer &x);
struct SqrtOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sqrt(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(0.5) / args.y(0);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain sqrt(const ad_plain &x);
ad_aug sqrt(const ad_aug &x);
ad_adapt sqrt(const ad_adapt &x);
Writer tan(const Writer &x);
struct TanOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return tan(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (cos(args.x(0)) * cos(args.x(0)));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain tan(const ad_plain &x);
ad_aug tan(const ad_aug &x);
ad_adapt tan(const ad_adapt &x);
Writer cosh(const Writer &x);
ad_aug cosh(const ad_aug &x);
Writer sinh(const Writer &x);
struct SinhOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return sinh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * cosh(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain sinh(const ad_plain &x);
ad_aug sinh(const ad_aug &x);
ad_adapt sinh(const ad_adapt &x);
Writer cosh(const Writer &x);
struct CoshOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return cosh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * sinh(args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain cosh(const ad_plain &x);
ad_aug cosh(const ad_aug &x);
ad_adapt cosh(const ad_adapt &x);
Writer tanh(const Writer &x);
struct TanhOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return tanh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (cosh(args.x(0)) * cosh(args.x(0)));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain tanh(const ad_plain &x);
ad_aug tanh(const ad_aug &x);
ad_adapt tanh(const ad_adapt &x);
Writer expm1(const Writer &x);
struct Expm1 : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return expm1(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * args.y(0) + Type(1.);
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain expm1(const ad_plain &x);
ad_aug expm1(const ad_aug &x);
ad_adapt expm1(const ad_adapt &x);
Writer log1p(const Writer &x);
struct Log1p : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return log1p(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (args.x(0) + Type(1.));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain log1p(const ad_plain &x);
ad_aug log1p(const ad_aug &x);
ad_adapt log1p(const ad_adapt &x);
Writer asin(const Writer &x);
struct AsinOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return asin(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) +=
        args.dy(0) * Type(1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain asin(const ad_plain &x);
ad_aug asin(const ad_aug &x);
ad_adapt asin(const ad_adapt &x);
Writer acos(const Writer &x);
struct AcosOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return acos(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) +=
        args.dy(0) * Type(-1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain acos(const ad_plain &x);
ad_aug acos(const ad_aug &x);
ad_adapt acos(const ad_adapt &x);
Writer atan(const Writer &x);
struct AtanOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return atan(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (Type(1.) + args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain atan(const ad_plain &x);
ad_aug atan(const ad_aug &x);
ad_adapt atan(const ad_adapt &x);
Writer asinh(const Writer &x);
struct AsinhOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return asinh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) +=
        args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) + Type(1.));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain asinh(const ad_plain &x);
ad_aug asinh(const ad_aug &x);
ad_adapt asinh(const ad_adapt &x);
Writer acosh(const Writer &x);
struct AcoshOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return acosh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) +=
        args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) - Type(1.));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain acosh(const ad_plain &x);
ad_aug acosh(const ad_aug &x);
ad_adapt acosh(const ad_adapt &x);
Writer atanh(const Writer &x);
struct AtanhOp : global::UnaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x) {
    return atanh(x);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * Type(1.) / (Type(1) - args.x(0) * args.x(0));
  }
  void reverse(ReverseArgs<Scalar> &args);
  const char *op_name();
};
ad_plain atanh(const ad_plain &x);
ad_aug atanh(const ad_aug &x);
ad_adapt atanh(const ad_adapt &x);

template <class T>
T abs(const T &x) {
  return fabs(x);
}
using std::atan2;
Writer atan2(const Writer &x1, const Writer &x2);
struct Atan2 : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    return atan2(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * args.x(1) /
                  (args.x(0) * args.x(0) + args.x(1) * args.x(1));
    args.dx(1) += args.dy(0) * -args.x(0) /
                  (args.x(0) * args.x(0) + args.x(1) * args.x(1));
  }
  const char *op_name();
};
ad_plain atan2(const ad_plain &x1, const ad_plain &x2);
ad_aug atan2(const ad_aug &x1, const ad_aug &x2);
ad_adapt atan2(const ad_adapt &x1, const ad_adapt &x2);
using std::max;
Writer max(const Writer &x1, const Writer &x2);
struct MaxOp : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    return max(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * ge0(args.x(0) - args.x(1));
    args.dx(1) += args.dy(0) * lt0(args.x(0) - args.x(1));
  }
  const char *op_name();
};
ad_plain max(const ad_plain &x1, const ad_plain &x2);
ad_aug max(const ad_aug &x1, const ad_aug &x2);
ad_adapt max(const ad_adapt &x1, const ad_adapt &x2);

using std::min;
Writer min(const Writer &x1, const Writer &x2);
struct MinOp : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    return min(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    args.dx(0) += args.dy(0) * ge0(args.x(1) - args.x(0));
    args.dx(1) += args.dy(0) * lt0(args.x(1) - args.x(0));
  }
  const char *op_name();
};
ad_plain min(const ad_plain &x1, const ad_plain &x2);
ad_aug min(const ad_aug &x1, const ad_aug &x2);
ad_adapt min(const ad_adapt &x1, const ad_adapt &x2);
using std::pow;

template <class T>
T asConstant(T x) {
  return x;
}
ad_aug asConstant(ad_aug x);
Writer pow(const Writer &x1, const Writer &x2);
template <bool left_var = true, bool right_var = true>
struct PowOp : global::BinaryOperator {
  static const bool have_eval = true;
  template <class Type>
  Type eval(Type x1, Type x2) {
    if (!left_var) x1 = asConstant(x1);
    if (!right_var) x2 = asConstant(x2);
    return pow(x1, x2);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    Type X1 = args.x(0);
    Type X2 = args.x(1);
    Type Y = args.y(0);
    if (!left_var) X1 = asConstant(X1);
    if (!right_var) X2 = asConstant(X2);
    if (left_var) args.dx(0) += args.dy(0) * (X2 * pow(X1, X2 - Type(1.)));
    if (right_var) args.dx(1) += args.dy(0) * (Y * log(X1));
  }
  const char *op_name() { return "PowOp"; }
  ad_plain operator()(const ad_plain &x1, const ad_plain &x2) {
    return get_glob()->add_to_stack<PowOp>(x1, x2);
  }
};
ad_aug pow(const ad_aug &x1, const ad_aug &x2);
ad_adapt F(const ad_adapt &x1, const ad_adapt &x2);
Replay CondExpEq(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpEqOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpEq(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpEq(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpEq(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpNe(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpNeOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpNe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpNe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpNe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpGt(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpGtOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpGt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpGt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpGt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpLt(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpLtOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpLt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpLt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpLt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpGe(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpGeOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpGe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpGe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpGe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);
Replay CondExpLe(const Replay &x0, const Replay &x1, const Replay &x2,
                 const Replay &x3);
struct CondExpLeOp : global::Operator<4, 1> {
  void forward(ForwardArgs<Scalar> &args);
  void reverse(ReverseArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  void reverse(ReverseArgs<Replay> &args);
  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    TMBAD_ASSERT(false);
  }
  const char *op_name();
};
Scalar CondExpLe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
                 const Scalar &x3);
ad_plain CondExpLe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
                   const ad_plain &x3);
ad_aug CondExpLe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
                 const ad_aug &x3);

template <class Info>
struct InfoOp : global::DynamicOperator<-1, 0> {
  Index n;
  Info info;
  InfoOp(Index n, Info info) : n(n), info(info) {}
  static const bool elimination_protected = true;
  static const bool add_forward_replay_copy = true;
  static const bool have_input_size_output_size = true;
  template <class Type>
  void forward(ForwardArgs<Type> &args) {}
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {}
  Index input_size() const { return n; }
  Index output_size() const { return 0; }
  const char *op_name() { return "InfoOp"; }
  void print(global::print_config cfg) {
    Rcout << cfg.prefix << info << std::endl;
  }
  void *operator_data() { return &info; }
};
template <class Info>
void addInfo(const std::vector<ad_aug> &x, const Info &info) {
  global::Complete<InfoOp<Info> >(x.size(), info)(x);
}
template <class Info>
void addInfo(const std::vector<double> &x, const Info &info) {}

struct SumOp : global::DynamicOperator<-1, 1> {
  static const bool is_linear = true;
  static const bool have_input_size_output_size = true;
  static const bool add_forward_replay_copy = true;
  size_t n;
  Index input_size() const;
  Index output_size() const;
  SumOp(size_t n);
  template <class Type>
  void forward(ForwardArgs<Type> &args) {
    args.y(0) = 0;
    for (size_t i = 0; i < n; i++) {
      args.y(0) += args.x(i);
    }
  }
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    for (size_t i = 0; i < n; i++) {
      args.dx(i) += args.dy(0);
    }
  }
  const char *op_name();
};
template <class T>
T sum(const std::vector<T> &x) {
  return global::Complete<SumOp>(x.size())(x)[0];
}

ad_plain logspace_sum(const std::vector<ad_plain> &x);
struct LogSpaceSumOp : global::DynamicOperator<-1, 1> {
  size_t n;
  static const bool have_input_size_output_size = true;
  Index input_size() const;
  Index output_size() const;
  LogSpaceSumOp(size_t n);
  void forward(ForwardArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    for (size_t i = 0; i < n; i++) {
      args.dx(i) += exp(args.x(i) - args.y(0)) * args.dy(0);
    }
  }
  const char *op_name();
};
ad_plain logspace_sum(const std::vector<ad_plain> &x);
template <class T>
T logspace_sum(const std::vector<T> &x_) {
  std::vector<ad_plain> x(x_.begin(), x_.end());
  return logspace_sum(x);
}

ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
                             const std::vector<Index> &stride, size_t n);
struct LogSpaceSumStrideOp : global::DynamicOperator<-1, 1> {
  std::vector<Index> stride;
  size_t n;
  static const bool have_input_size_output_size = true;

  Index number_of_terms() const;
  template <class Type>
  Type &entry(Type **px, size_t i, size_t j) const {
    return px[j][0 + i * stride[j]];
  }
  template <class Type>
  Type rowsum(Type **px, size_t i) const {
    size_t m = stride.size();
    Type s = (Scalar)(0);
    for (size_t j = 0; j < m; j++) {
      s += entry(px, i, j);
    }
    return s;
  }
  Index input_size() const;
  Index output_size() const;
  LogSpaceSumStrideOp(std::vector<Index> stride, size_t n);
  void forward(ForwardArgs<Scalar> &args);
  void forward(ForwardArgs<Replay> &args);
  template <class Type>
  void reverse(ReverseArgs<Type> &args) {
    size_t m = stride.size();
    std::vector<Type *> wrk1(m);
    std::vector<Type *> wrk2(m);
    Type **px = &(wrk1[0]);
    Type **pdx = &(wrk2[0]);
    for (size_t i = 0; i < m; i++) {
      px[i] = args.x_ptr(i);
      pdx[i] = args.dx_ptr(i);
    }
    for (size_t i = 0; i < n; i++) {
      Type s = rowsum(px, i);
      Type tmp = exp(s - args.y(0)) * args.dy(0);
      for (size_t j = 0; j < m; j++) {
        entry(pdx, i, j) += tmp;
      }
    }
  }
  /** \brief Dependencies (including indirect)
      \details Stride based operations assume a certain memory layout
      and therefore have *indirect dependencies*.
  */
  void dependencies(Args<> &args, Dependencies &dep) const;
  /** \brief `dependencies` has been implemented */
  static const bool have_dependencies = true;
  /** \brief Add marker routines based on `dependencies` */
  static const bool implicit_dependencies = true;
  /** \brief It is **not* safe to remap the inputs of this operator */
  static const bool allow_remap = false;
  const char *op_name();

  void forward(ForwardArgs<Writer> &args);
  void reverse(ReverseArgs<Writer> &args);
};
ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
                             const std::vector<Index> &stride, size_t n);
template <class T>
T logspace_sum_stride(const std::vector<T> &x_,
                      const std::vector<Index> &stride, size_t n) {
  std::vector<ad_plain> x(x_.begin(), x_.end());
  return logspace_sum_stride(x, stride, n);
}
}  // namespace TMBad
#endif  // HAVE_GLOBAL_HPP
