GAMSによって配布されている CONOPTは GRG の確立された実装のようですが、無料ではありません (ただし、デモで十分かもしれません)。
Alglibには非線形レーベンバーグ・マルカート アルゴリズムが実装されており、GPL / 商用ライセンスが付与されています。
以下の alglib を使用したサンプル コード:
/*
 * Simple optimiser example
 *
 * nl_opt.cpp
 *
 * Compile with eg 'g++ -I../tools/alglib/src ../tools/alglib/src/ap.cpp ../tools/alglib/src/alglibinternal.cpp ../tools/alglib/src/linalg.cpp ../tools/alglib/src/alglibmisc.cpp ../tools/alglib/src/solvers.cpp ../tools/alglib/src/optimization.cpp nl_opt.cpp -o opt'
 *
 */
#include "stdafx.h"
#include <iostream>
#include <cmath>
#include "optimization.h"
using namespace std;
double fn(double a1, double a2, double a3, double x, double A, double B)
{
    return A * exp(-x*(a1*B*B+a2*B+a3));
}
struct problem
{
    double *m_a1s;
    double *m_a2s;
    double *m_a3s;
    double *m_xs;
    double *m_ys;
    int m_n;
    problem(double *a1s, double *a2s, double *a3s, double *xs, double *ys, int n) 
        : m_a1s(a1s), m_a2s(a2s), m_a3s(a3s), m_xs(xs), m_ys(ys), m_n(n)
    {
    }
    void fn_vec(const alglib::real_1d_array &c_var, alglib::real_1d_array &fi, void *ptr)
    {
        double sum = 0.0;
        for(int i = 0; i < m_n; ++i)
        {
            double yhat = fn(m_a1s[i], m_a2s[i], m_a3s[i], m_xs[i], c_var[0], c_var[1]);
            double err_sq = (m_ys[i] - yhat) * (m_ys[i] - yhat);
            sum += err_sq;
        }
        fi[0] = sum;
    }
};
problem *g_p;
void fn_vec(const alglib::real_1d_array &c_var, alglib::real_1d_array &fi, void *ptr)
{
    g_p->fn_vec(c_var, fi, ptr);
}
int main()
{
    cout << "Testing non-linear optimizer..." << endl;
    int n = 5;
    double a1s[] = {2.42, 4.78, 7.25, 9.55, 11.54};
    double a2s[] = {4.25, 5.27, 6.33, 7.32, 8.18};
    double a3s[] = {3.94, 4.05, 4.17, 4.28, 4.37};
    double xs[] = {0.024, 0.036, 0.048, 0.06, 0.072};
    double ys[] = {80, 70, 50, 40, 45};
    double initial[] = {150, 1.75};
    double ss_init = 0.0;
    cout << "Initial problem:" << endl;
    for(int i = 0; i < n; ++i)
    {
        double yhat = fn(a1s[i], a2s[i], a3s[i], xs[i], initial[0], initial[1]);
        double err_sq = (ys[i] - yhat) * (ys[i] - yhat);
        ss_init += err_sq;
        cout << a1s[i] << "\t" << a2s[i] << "\t" << a3s[i] << "\t" 
            << xs[i] << "\t" << ys[i] << "\t" << yhat << "\t" << err_sq << endl;
    }
    cout << "Error: " << ss_init << endl;
    // create problem
    problem p(a1s, a2s, a3s, xs, ys, n);
    g_p = &p;
    // setup solver
    alglib::real_1d_array x = "[150.0, 1.75]";
    double epsg = 0.00000001;
    double epsf = 0;
    double epsx = 0;
    alglib::ae_int_t maxits = 0;
    alglib::minlmstate state;
    alglib::minlmreport report;
    alglib::minlmcreatev(2, x, 0.0001, state);
    alglib::minlmsetcond(state, epsg, epsf, epsx, maxits);
    // optimize
    alglib::minlmoptimize(state, fn_vec);
    alglib::minlmresults(state, x, report);
    cout << "Results:" << endl;
    cout << report.terminationtype << endl;
    cout << x.tostring(2).c_str() << endl;
    double ss_end = 0.0;
    for(int i = 0; i < n; ++i)
    {
        double yhat = fn(a1s[i], a2s[i], a3s[i], xs[i], x[0], x[1]);
        double err_sq = (ys[i] - yhat) * (ys[i] - yhat);
        ss_end += err_sq;
        cout << a1s[i] << "\t" << a2s[i] << "\t" << a3s[i] << "\t"
             << xs[i] << "\t" << ys[i] << "\t" << yhat << "\t" << err_sq << endl;
    }
    cout << "Error: " << ss_end << endl;
    return 0;
}
これにより、サンプル出力が得られます。
./opt 
Testing non-linear optimizer...
Initial problem:
2.42    4.25    3.94    0.024   80  95.5553 241.968
4.78    5.27    4.05    0.036   70  54.9174 227.485
7.25    6.33    4.17    0.048   50  24.8537 632.338
9.55    7.32    4.28    0.06    40  9.3038  942.257
11.54   8.18    4.37    0.072   45  3.06714 1758.36
Error: 3802.41
Results:
2
[92.22,0.57]
2.42    4.25    3.94    0.024   80  77.6579 5.48528
4.78    5.27    4.05    0.036   70  67.599  5.76475
7.25    6.33    4.17    0.048   50  56.6216 43.8456
9.55    7.32    4.28    0.06    40  46.0026 36.0314
11.54   8.18    4.37    0.072   45  36.6279 70.0922
Error: 161.219