program rdfrnt
*     *************************************************************
*
*     A second-order code for the 2-D R-D equation with convection.
*   
*     u_t + v dot grad u = eps div ( a grad u ) + 1/eps f(u)
*
*     where f is the kpp reaction
*
*           f(u) = u*(1-u)
*
*     and the velocity field v is given and satisfies
*
*           div v = 0.
*
*     The domain is [0,L]x[-1,1] and we can prescribe both the Dirichlet
*     and mixed (Dirichlet at x=0,L and periodic in y) by specifying
*     the integer type to be:
*  
*     1   Dirichlet: channel flow (bubble), u=0 on boundary;
*     2   mixed: channel flow (almost planar front),
*            u=1, at x=0, and u=0 at x=L, periodic in y.
*
*     *************************************************************
      character*1 cont
      integer nu,liw,kp,ku,mmax,mf,mdif,iopt,ipflag,
     +        lenk,iterm,lrw,nk,k,mx,my,mxy,nx,ny,nxy,
     +        np,type
      parameter( mx=600,my=300, mxy=mx*my )
      parameter( nu=2*mxy, liw=30, mmax=10 )
      parameter( lenk=236+14*mxy, lrw=4+4*mxy+lenk )
      integer iwork(liw)
      double precision uf(nu),savf(mxy),su(mxy),rwork(lrw),
     +                 pf(0:mx+1,0:my+1),pex(mx,my),
     +                 ue(0:mx,my),ve(mx,0:my),
     +                 uc(mx,my),  vc(mx,my),
     +                 pv(0:mx,my),ph(mx,0:my),
     +                 px(mx,my+1),py(mx,my+1),
     +                 a,b,c,xlam,xeta,eps,epsmac,ftol,stptol,h,dt,
     +                 atotal,hinv,res,t,rms,dth,uvm,cfl,pi,
     +                 tend,rini,thl,cut,frto,frtn,spe,alpha,amp
      parameter( type=2)
      parameter( epsmac=0.1D-8, cut=0.5D0 )
      common/epscal/xlam,xeta
      common/ijsize/nx,ny
      common/posini/rini,thl
      common/valupi/pi
      common/meshsz/h,hinv
      common/meanfd/amp
      external fnonld,fnonlm,jacobd,jacobm,pset,psol
      data su/mxy*1.D0/, t,ftol,stptol/3*0.D0/,
     +     mf,iopt,mdif,ipflag/3,0,1,0/, ku/10/
      cont = 'n'
*
*     read input data:
      read *, nx,ny
      read *, eps,alpha,rini,thl,cfl
      read *, amp,rms
      read *, tend,np
      read(5,'(a)') cont
*      open(7,file='area_t',status='unknown')
      pi = 4.D0*datan(1.D0)
      nxy = nx*ny
      h = 2.D0/dfloat(ny)
      hinv = 1.D0/h
*
*     initialize and plot the solution
      call initia(pf,ue,ve,uc,vc,nx,ny)
      call posfrt(pf,frto,nx,ny)
      call output(pf,nx,ny,ku)
*
*
*     read the velocity field
      uvm=2.D0
      call readuf(ue,ve,uc,vc,uvm,nx,ny,rms,t,alpha,eps,h) 
*     call readuf(ue,ve,uc,vc,uvm,nx,ny,rms)
c      call ushift(ue,uc,nx,ny)
c      call shearf(ue,ve,uc,vc,uvm,nx,ny,rms)
*
      dt = dmin1(0.5D0*h,cfl*h/uvm)
      nk = idint(tend/dt)
      kp = nk/np
      print 105, dt
      dth = dt/h
      xlam = 0.5D0*eps*dt/h/h
      xeta = 0.5D0*dt/eps
*
*     main loop to solve the equation in time:
*     first, form the rhs of the equations; then, use nksol to solve
*     the nonlinear system; after this solution is updated by renewp.
      do 50 k=1,nk
*        calculate the rhs; first the diffusion and reaction term
         call knowtm(pf,pex,nx,ny,type)
*         if( dabs(rms) .le. epsmac ) goto 51
*        then the convection,
         if( type .eq. 1) then
            call conved(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny)
         else
            call convem(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny)
         endif
 51      call feqrhs(nxy,nu,uf,pf,pex,nx,ny)
*        solve the nonlinear system by nksol,
         if( type .eq. 1) then
         call nksol(nxy, nu, uf, savf, fnonld, jacobd, su, su, ftol,
     +              stptol, rwork, lrw, iwork, liw, iopt, iterm,
     +              pset, psol, mf, mdif, ipflag)
         else
         call nksol(nxy, nu, uf, savf, fnonlm, jacobm, su, su, ftol,
     +              stptol, rwork, lrw, iwork, liw, iopt, iterm,
     +              pset, psol, mf, mdif, ipflag)
         endif
*        update the solution and enforce the boundary condition.
         call renewp(uf,pf,nu,nx,ny,type)
         call posfrt(pf,frtn,nx,ny)
         spe = (frtn - frto)/dt
         frto = frtn
         write(21,101) t,frtn
         write(22,101) t,spe
*        compute the area bounded by the level set u=cut (0 < cut < 1)
*         call pharea(pf,cut,nx,ny,atotal)
*         write (7,101) t,atotal
         t = t + dt
         call readuf(ue,ve,uc,vc,uvm,nx,ny,rms,t,alpha,eps,h) 
         if(mod(k,kp) .eq. 0) then
            call residu(savf,nxy,h,res)
            ku = ku+1
            print 102,t,ku,res
            call output(pf,nx,ny,ku)
         endif
 50   continue
 101  format(f12.8,2x,f12.8)
 102  format(' t=',f8.4,' write to unit ',i3,' res= ',e12.6)
 105  format(' dt = ',f10.8)
      end

      subroutine fnonld(n, nu, u, savf)
*     ***************************************************
*     establish the nonlinear system of equations for nksol
*     to solve.
*     ***************************************************
      integer n,nu,nx,ny,i,j,k
      double precision u(nu),savf(n)
      double precision xlam,xeta,fret
      common/epscal/xlam,xeta
      common/ijsize/nx,ny
      savf(1) = (1.D0 + 4.D0*xlam)*u(1) - 
     +          xlam*( u(ny+1) + u(2) ) - 
     +          xeta*fret(u(1)) - 
     +          u(n+1)
      do 10 j=2,ny-1
         savf(j) = (1.D0 + 4.D0*xlam)*u(j) -
     +             xlam*( u(ny+j) + u(j-1) + u(j+1) ) -
     +             xeta*fret(u(j)) -
     +             u(n+j)
 10   continue
      savf(ny) = (1.D0 + 4.D0*xlam)*u(ny) - 
     +           xlam*( u(2*ny) + u(ny-1) ) -
     +           xeta*fret(u(ny)) -
     +           u(n+ny)
      k=ny
      do 20 i=2,nx-1
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) + u(k+ny) + u(k+1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
      do 25 j=2,ny-1
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) +
     +                    u(k+ny) +
     +                    u(k-1) +
     +                    u(k+1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
 25   continue
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) + u(k+ny) + u(k-1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
 20   continue
      k=k+1
      savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +          xlam*( u(k-ny) + u(k+1) ) - 
     +          xeta*fret(u(k)) - 
     +          u(n+k)
      do 30 j=2,ny-1
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) + u(k-1) + u(k+1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
 30   continue
      k=k+1
      savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +          xlam*( u(k-ny) + u(k-1) ) -
     +           xeta*fret(u(k)) -
     +           u(n+k)
      end


      subroutine jacobd(n, nu, u, savf, v, z)
*     ***************************************************
*     optional routine to provide the jacobian to the nksol.
*     z(k) = sum_j df_k/du_j (v)
*     ***************************************************
      integer n,nu,i,j,k,nx,ny
      double precision u(nu), savf(n), v(n), z(n),
     +                 xlam,xeta,fretd
      common/epscal/xlam,xeta
      common/ijsize/nx,ny
      z(1) = (1.D0 + 4.D0*xlam)*v(1) -
     +          xlam*( v(ny+1) + v(2) ) - 
     +          xeta*fretd(u(1))*v(1)
      do 10 j=2,ny-1
         z(j) = (1.D0 + 4.D0*xlam)*v(j) -
     +          xlam*( v(ny+j) + v(j-1) + v(j+1) ) -
     +          xeta*fretd(u(j))*v(j)
 10   continue
      z(ny) = (1.D0 + 4.D0*xlam)*v(ny) -
     +        xlam*( v(2*ny) + v(ny-1) ) -
     +           xeta*fretd(u(ny))*v(ny)
      k=ny
      do 20 i=2,nx-1
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k+ny) + v(k+1) ) -
     +          xeta*fretd(u(k))*v(k)
      do 25 j=2,ny-1
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k+ny) +
     +                 v(k-1) + v(k+1) ) -
     +          xeta*fretd(u(k))*v(k)
 25   continue
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k+ny) + v(k-1) ) -
     +          xeta*fretd(u(k))*v(k)
 20   continue
      k=k+1
      z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +       xlam*( v(k-ny) + v(k+1) ) - 
     +       xeta*fretd(u(k))*v(k)
      do 30 j=2,ny-1
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k-1) + v(k+1) ) -
     +          xeta*fretd(u(k))*v(k)
 30   continue
      k=k+1
      z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +       xlam*( v(k-ny) + v(k-1) ) -
     +       xeta*fretd(u(k))*v(k)
      end
      subroutine fnonlm(n, nu, u, savf)
*     ***************************************************
*     establish the nonlinear system of equations for nksol
*     to solve.
*     ***************************************************
      integer n,nu,nx,ny,i,j,k
      double precision u(nu),savf(n)
      double precision xlam,xeta,fret
      common/epscal/xlam,xeta
      common/ijsize/nx,ny
*     first column
      savf(1) = (1.D0 + 4.D0*xlam)*u(1) - 
     +          xlam*( u(ny+1) + u(2) + u(ny) + 1.D0 ) - 
     +          xeta*fret(u(1)) - 
     +          u(n+1)
      do 10 j=2,ny-1
         savf(j) = (1.D0 + 4.D0*xlam)*u(j) -
     +             xlam*( u(ny+j) + u(j-1) + 
     +                    u(j+1) +1.D0 ) -
     +             xeta*fret(u(j)) -
     +             u(n+j)
 10   continue
      savf(ny) = (1.D0 + 4.D0*xlam)*u(ny) - 
     +           xlam*( u(2*ny) + u(ny-1) + 
     +                  u(1) + 1.D0 ) -
     +           xeta*fret(u(ny)) -
     +           u(n+ny)
*     2nd through the next to the last columns.
      k=ny
      do 20 i=2,nx-1
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) + u(k+ny) + 
     +                    u(k+1) + u(k+ny-1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
      do 25 j=2,ny-1
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) +
     +                    u(k+ny) +
     +                    u(k-1) +
     +                    u(k+1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
 25   continue
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) + u(k+ny) + 
     +                    u(k-1) + u(k-ny+1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
 20   continue
*     last column
      k=k+1
      savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +          xlam*( u(k-ny) + u(k+1) + u(k+ny-1) ) - 
     +          xeta*fret(u(k)) - 
     +          u(n+k)
      do 30 j=2,ny-1
         k=k+1
         savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +             xlam*( u(k-ny) + u(k-1) + u(k+1) ) -
     +             xeta*fret(u(k)) -
     +             u(n+k)
 30   continue
      k=k+1
      savf(k) = (1.D0 + 4.D0*xlam)*u(k) -
     +          xlam*( u(k-ny) + u(k-1) + u(k-ny+1) ) -
     +           xeta*fret(u(k)) -
     +           u(n+k)
      end


      subroutine knowtm(uold,ukw,nx,ny,type)
*     *********************************************************
*     calculate the diffusion and reaction terms at the previous
*     time step and add them up, multiply by dt/2.
*     This term contributes to the rhs of the Crank-Nicholson
*     and also to the Taylor expansion in the flux calcuation.
*     *********************************************************
      integer i,j,nx,ny,type
      double precision uold(0:nx+1,0:ny+1),ukw(nx,ny),
     +                 xlam,xeta,fret
      common/epscal/xlam,xeta
      call boundy(uold,nx,ny,type)
      do 20 j=1,ny
      do 20 i=1,nx
         ukw(i,j) = xlam*( uold(i-1,j) +
     +                     uold(i+1,j) +
     +                     uold(i,j-1) +
     +                     uold(i,j+1) -
     +                     4.D0*uold(i,j) ) +
     +              xeta*fret(uold(i,j))
 20   continue
      end

      subroutine feqrhs(n,nu,u,uold,pex,nx,ny)
*     ****                    ****                    ****
*     give an initial guess and form the forcing term in the equation.
*     ****                    ****                    ****
      integer n,nu,i,j,k,nx,ny
      double precision u(nu),uold(0:nx+1,0:ny+1),pex(nx,ny)
      k = 0
      do 5 i=1,nx
      do 5 j=1,ny
         k = k+1
         u(k) = uold(i,j)
         u(n+k) = uold(i,j) + pex(i,j)
 5    continue
      end

      subroutine renewp(uf,pf,nu,nx,ny,type)
*     ****                    ****                    ****
*     update the solution and enforce the periodic condition.
*     ****                    ****                    ****
      integer nu,nx,ny,i,j,k,type
      double precision uf(nu),pf(0:nx+1,0:ny+1)
      k = 0
      do 10 i=1,nx
      do 10 j=1,ny
         k = k+1
         pf(i,j) = uf(k)
 10   continue
      call boundy(pf,nx,ny,type)
      end

      subroutine residu(savf,n,h,res)
*     ****                    ****                    ****
*     calculate the residual of the nonlinear equations.
*     ****                    ****                    ****
      integer n,j
      double precision savf(n),h,res
      res = 0.D0
      do 10 j=1,n
         res = res + savf(j)*savf(j)
 10   continue
      res = h*dsqrt(res)
      end

      subroutine output(pf,nx,ny,ku)
      integer ku,nx,ny,i,j
      double precision pf(0:nx+1,0:ny+1)
      write(ku,103) ((pf(i,j),i=1,nx),j=1,ny)
 103  format(e14.8)
      end

      subroutine contin(pf,nx,ny)
      integer nx,ny,i,j
      double precision pf(0:nx+1,0:ny+1)
      read(7,103) ((pf(i,j),i=1,nx),j=1,ny)
      do 10 j=0,ny+1
         pf(0,j) = 0.D0
         pf(nx+1,j) = 1.D0
 10   continue
      do 20 i=1,nx
         pf(i,0) = pf(i,ny)
         pf(i,ny+1) = pf(i,1)
 20   continue
 103  format(e14.8)
      end

      function fret(u)
      double precision fret,u
      fret = u*(1.D0-u)
      end

      function fretd(u)
      double precision fretd,u
      fretd = 1.D0 - 2.D0*u
      end

      subroutine conved(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny)
*     **********************************************************
*
*     nonsplit second-order upwind scheme to calculate the flux term
*
*     on input, pex is 1/2( grad^2 u + 1/f(u)), and on output
*          pex = pex - (u cdot pf)^{n+1/2}
*
*     velocity field given on a staggered grid:
*
*                --- ve(i,j) --           --- ph(i,j) ---
*               |              |         |               |
*               |              |         |               |
*             ue   u(v)c(i,j) ue(i,j)   pv    pf(i,j)  pv(i,j)
*               |              |         |               |
*               |              |         |               |
*                ----- ve -----           ----- ph ------
*
*     **********************************************************
      integer i,j,nx,ny
      double precision dth,slimit
      double precision ue(0:nx,ny),ve(nx,0:ny),
     +                 uc(nx,ny),  vc(nx,ny),
     +                 pv(0:nx,ny),ph(nx,0:ny),
     +                 px(nx,ny),py(nx,ny),
     +                 pf(0:nx+1,0:ny+1),pex(nx,ny)
*     construct 1st derivatives with limiter in the x (normal) direction 
*     and upwind in the y (transverse) direction
      do 5 j=1,ny
      do 5 i=1,nx
         px(i,j) = slimit(pf(i,j)-pf(i-1,j),pf(i+1,j)-pf(i,j))
         if ( vc(i,j) .gt. 0.D0) then
            py(i,j) = pf(i,j) - pf(i,j-1)
         else
            py(i,j) = pf(i,j+1) - pf(i,j)
         end if
 5    continue
*     approximate vertical edge values.
      do 10 j=1,ny
      do 10 i=1,nx-1
         if (ue(i,j) .gt. 0.D0) then
             pv(i,j) = pf(i,j)   + 0.5D0*( (1.D0 -
     +                 dth*uc(i,j) )*px(i,j) -
     +                 dth*vc(i,j)  *py(i,j) ) +
     +                 pex(i,j)
         else
             pv(i,j) = pf(i+1,j) - 0.5D0*( (1.D0 +
     +                 dth*uc(i+1,j) )*px(i+1,j) +
     +                 dth*vc(i+1,j)  *py(i+1,j) ) +
     +                 pex(i+1,j)
         end if
   10 continue
      do 12 j=1,ny
          pv(0,j) = 0.D0
          pv(nx,j) = 0.D0
 12   continue
*     construct 1st derivatives with limiter in the y (normal) direction 
*     and upwind in the x (transverse) direction
      do 15 j=1,ny
      do 15 i=1,nx   
*     approximate horizontal edge values.
         py(i,j) = slimit(pf(i,j)-pf(i,j-1),pf(i,j+1)-pf(i,j))
         if (uc(i,j) .gt. 0.D0) then
             px(i,j) = pf(i,j) - pf(i-1,j)
         else
             px(i,j) = pf(i+1,j) - pf(i,j)
         end if
 15   continue
      do 20 j=1,ny-1
      do 20 i=1,nx
         if (ve(i,j) .gt. 0.D0) then
             ph(i,j) = pf(i,j)   + 0.5D0*( (1.D0 -
     +                 dth*vc(i,j) )*py(i,j) -
     +                 dth*uc(i,j)  *px(i,j) ) +
     +                 pex(i,j)
         else
             ph(i,j) = pf(i,j+1) - 0.5D0*( (1.D0 +
     +                 dth*vc(i,j+1) )*py(i,j+1) +
     +                 dth*uc(i,j+1)  *px(i,j+1) ) +
     +                 pex(i,j+1)
         end if
   20 continue
      do 25 i=1,nx
         ph(i,0) = 0.D0
         ph(i,ny) = 0.D0
 25   continue
*     calculate fluxes and subtract it from the forcing term.
      do 30 j=1,ny
      do 30 i=1,nx
         pex(i,j) = pex(i,j) - dth*( 
     +              ue(i,j)*pv(i,j) - ue(i-1,j)*pv(i-1,j) +
     +              ve(i,j)*ph(i,j) - ve(i,j-1)*ph(i,j-1) )
   30 continue
      end
      subroutine convem(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny)
*     **********************************************************
*
*     nonsplit second-order upwind scheme to calculate the flux term
*
*     on input, pex is dt/2( eps grad^2 u + 1/eps f(u)), and on output
*          pex = pex - (u cdot pf)^{n+1/2}
*
*     velocity field given on a staggered grid:
*
*                --- ve(i,j) --           --- ph(i,j) ---
*               |              |         |               |
*               |              |         |               |
*             ue   u(v)c(i,j) ue(i,j)   pv    pf(i,j)  pv(i,j)
*               |              |         |               |
*               |              |         |               |
*                ----- ve -----           ----- ph ------
*
*     Dirichlet in x, periodic in y
*     **********************************************************
      integer i,j,nx,ny
      double precision dth,slimit
      double precision ue(0:nx,ny),ve(nx,0:ny),
     +                 uc(nx,ny),  vc(nx,ny),
     +                 pv(0:nx,ny),ph(nx,0:ny),
     +                 px(nx,ny),py(nx,ny),
     +                 pf(0:nx+1,0:ny+1),pex(nx,ny)
*     construct 1st derivatives with limiter in the x (normal) direction 
*     and upwind in the y (transverse) direction
      do 5 j=1,ny
      do 5 i=1,nx
         px(i,j) = slimit(pf(i,j)-pf(i-1,j),pf(i+1,j)-pf(i,j))
         if ( vc(i,j) .gt. 0.D0) then
            py(i,j) = pf(i,j) - pf(i,j-1)
         else
            py(i,j) = pf(i,j+1) - pf(i,j)
         end if
 5    continue
*     approximate vertical edge values.
      do 10 j=1,ny
      do 10 i=1,nx-1
         if (ue(i,j) .gt. 0.D0) then
             pv(i,j) = pf(i,j)   + 0.5D0*( (1.D0 -
     +                 dth*uc(i,j) )*px(i,j) -
     +                 dth*vc(i,j)  *py(i,j) ) +
     +                 pex(i,j)
         else
             pv(i,j) = pf(i+1,j) - 0.5D0*( (1.D0 +
     +                 dth*uc(i+1,j) )*px(i+1,j) +
     +                 dth*vc(i+1,j)  *py(i+1,j) ) +
     +                 pex(i+1,j)
         end if
   10 continue
      do 12 j=1,ny
          pv(0,j) = 1.D0
          pv(nx,j) = 0.D0
 12   continue
*     construct 1st derivatives with limiter in the y (normal) direction 
*     and upwind in the x (transverse) direction
      do 15 j=1,ny
      do 15 i=1,nx   
*     approximate horizontal edge values.
         py(i,j) = slimit(pf(i,j)-pf(i,j-1),pf(i,j+1)-pf(i,j))
         if (uc(i,j) .gt. 0.D0) then
             px(i,j) = pf(i,j) - pf(i-1,j)
         else
             px(i,j) = pf(i+1,j) - pf(i,j)
         end if
 15   continue
      do 20 j=1,ny-1
      do 20 i=1,nx
         if (ve(i,j) .gt. 0.D0) then
             ph(i,j) = pf(i,j)   + 0.5D0*( (1.D0 -
     +                 dth*vc(i,j) )*py(i,j) -
     +                 dth*uc(i,j)  *px(i,j) ) +
     +                 pex(i,j)
         else
             ph(i,j) = pf(i,j+1) - 0.5D0*( (1.D0 +
     +                 dth*vc(i,j+1) )*py(i,j+1) +
     +                 dth*uc(i,j+1)  *px(i,j+1) ) +
     +                 pex(i,j+1)
         end if
   20 continue
      j=ny
      do 21 i=1,nx
         if (ve(i,j) .gt. 0.D0) then
             ph(i,j) = pf(i,j)   + 0.5D0*( (1.D0 -
     +                 dth*vc(i,j) )*py(i,j) -
     +                 dth*uc(i,j)  *px(i,j) ) +
     +                 pex(i,j)
         else
             ph(i,j) = pf(i,1) - 0.5D0*( (1.D0 +
     +                 dth*vc(i,1) )*py(i,1) +
     +                 dth*uc(i,1)  *px(i,1) ) +
     +                 pex(i,1)
         end if
   21 continue
      do 25 i=1,nx
         ph(i,0) = ph(i,ny)
 25   continue
*     calculate fluxes and subtract it from the forcing term.
      do 30 j=1,ny
      do 30 i=1,nx
         pex(i,j) = pex(i,j) - dth*( 
     +              ue(i,j)*pv(i,j) - ue(i-1,j)*pv(i-1,j) +
     +              ve(i,j)*ph(i,j) - ve(i,j-1)*ph(i,j-1) )
   30 continue
      end

      function slimit(ul,ur)
*     ***********************************************************
*
*     limiter definition in the interior
*
*     ul,ur         ->         values on the left and right sides
*     slimit        <-         resolved value
*
*     ***********************************************************
      double precision ul,ur,dut,slimit
      if ( ul*ur .le. 0.D0 ) then
         slimit = 0.D0
      else
         dut = 0.5D0*(ul+ur)
         slimit = dsign( dmin1( 
     +                   dabs(dut),
     +                   2.D0*dabs(ul),
     +                   2.D0*dabs(ur) ),
     +            dut )
      end if
      end

      subroutine initia(pf,ue,ve,uc,vc,nx,ny)
      integer i,j,nx,ny
      double precision pf(0:nx+1,0:ny+1),ue(0:nx,ny),ve(nx,0:ny),
     +                 uc(nx,ny),vc(nx,ny)
      double precision rini,xl,yl,x,y,r,frtini,pi,h,hinv
      common/meshsz/h,hinv
      common/valupi/pi
      do 30 i=0,nx+1
         x = (dble(i)-0.5D0)*h
      do 30 j=0,ny+1
         y = -1.D0 + (dble(j)-0.5D0)*h
c         pf(i,j) = frtini(x-0.5*dsin(pi*y))
         pf(i,j) = frtini(x)
 30   continue
      do 40 j=1,ny
      do 40 i=1,nx
         uc(i,j) = 0.D0
         vc(i,j) = 0.D0
 40   continue
      do 50 j=1,ny
      do 50 i=0,nx
         ue(i,j) = 0.D0
 50   continue
      do 60 j=0,ny
      do 60 i=1,nx
         ve(i,j) = 0.D0
 60   continue
      end

      function frtini(r)
*     ************************************************************
*     This function gives the initial front profile by connecting
*     0 to 1 by half period of sin(x).
*         rini:  initial front position,
*         th:    initial front thickness.
*     ************************************************************
      double precision r,rini,thl,frtini,hdel,pi
      common/posini/rini,thl
      common/valupi/pi
      hdel = 0.5D0*thl
      if ( r-rini .ge. hdel) then
         frtini = 0.D0
      elseif ( r-rini .le. -hdel ) then
         frtini = 1.D0
      else
         frtini = 0.5D0*(1.D0 - dsin((r-rini)*pi/thl))
      endif
      end
      subroutine shearf(ue,ve,uc,vc,uvm,nx,ny,rms)
*     ***************************************************
*     read the velocity field data
*     ***************************************************
      integer nx,ny,i,j,n_peri,k
      double precision ue(0:nx,ny),ve(nx,0:ny),
     +                 uc(nx,ny),vc(nx,ny)
      double precision uvm,rms,uy,pih,piy,ucoef
      uvm = 0.D0
      pih = 8.D0*datan(1.D0)/dfloat(ny)
      ucoef = dsqrt(2.D0)*rms
      do 10 j=1,ny
         piy = (dfloat(j)-0.5D0)*pih
         uy = ucoef*dsin(piy)
         uvm = dmax1(dabs(uy),uvm)
      do 10 i=0,2*ny
         ue(i,j) = uy
 10   continue
      do 20 j=0,ny
      do 20 i=1,2*ny
         ve(i,j) = 0.D0
 20   continue
      do  5 k=1,n_peri-1
      do 30 j=1,ny
      do 30 i=1,2*ny
         ue(2*k*ny+i,j) = ue(i,j)
 30   continue
      do 40 j=0,ny
      do 40 i=1,2*ny
         ve(2*k*ny+i,j) = 0.D0
 40   continue
 5    continue
      do 60 j=1,ny
      do 60 i=1,nx
         uc(i,j) = ue(i,j)
         vc(i,j) = 0.D0
 60   continue
      return
      end


      subroutine readuf(ue,ve,uc,vc,uvm,nx,ny,rms,t,alpha,eps,h)
*     ***************************************************
*     read the velocity field data
*     ***************************************************
      integer nx,ny,i,j,n_peri,k,kv_cell
      double precision ue(0:nx,ny),ve(nx,0:ny),
     +                 uc(nx,ny),vc(nx,ny)
      double precision uvm,rms,u,v,t,eps,alpha,h,fr,yy,ym,
     +                 amp
      common/meanfd/amp
      uvm = 0.D0
      fr= 16.D0*datan(1.D0)
      yy= -1.D0 + 0.5D0*h
      do 15 j=1,ny
        ym = yy + amp*t
      do 10 i=0,nx
c       ue(i,j) = ue(i,j) + dsin(eps**(-alpha) * t)
        ue(i,j) = -2.D0 + rms*dsin(fr*ym)
 10   continue
      yy=yy+h
 15   continue
      uvm = 2.D0 + rms
      do 25 j=0,ny
      do 20 i=1,nx
          ve(i,j) = 0.
 20   continue
 25   continue
      do 65 j=1,ny
      do 60 i=1,nx
         uc(i,j) = 0.5D0*(ue(i-1,j)+ue(i,j))
         vc(i,j) = 0.5D0*(ve(i,j-1)+ve(i,j))
 60   continue
 65   continue
      end
      subroutine posfrt(pf,frt,nx,ny)
*     use the moment to calculate the front position
      integer nx,ny,i,jj,nms
      double precision pf(0:nx+1,0:ny+1)
      double precision frt,ax,a,h,hinv,x,du
      common/meshsz/h,hinv
      frt = 0.D0
      nms = 0
      do 5 jj=1,ny
         if(mod(jj,5) .eq. 0) then
            nms = nms + 1
            a = pf(1,jj)-1.D0
            ax = 0.D0
            do 10 i=1,nx-1
               du = pf(i+1,jj) - pf(i,jj)
               x = dfloat(i)*h
               a = a + du
               ax = ax + du*x
 10         continue
            du = - pf(nx,jj)
            a = a + du
            ax = ax + du*dfloat(nx)*h
            frt = frt + ax/a
        endif
 5    continue
      frt = frt/dfloat(nms)
      end


      subroutine ushift(ue,uc,nx,ny)
*     add the constant velocity (2,0) to work in the moving coordinates.
      integer nx,ny,i,j
      double precision ue(0:nx,ny),uc(nx,ny)
      do 10 j=1,ny
      do 12 i=1,nx
         uc(i,j) = uc(i,j) - 2.D0
 12   continue
      do 14 i=0,nx
         ue(i,j) = ue(i,j) - 2.D0
 14   continue
 10   continue
      end


      subroutine nksol (n, nu, u, savf, f, jac, su, sf, ftol, stptol,
     *                  rwork, lrw, iwork, liw, iopt, iterm, pset,
     *                  psol, mf, mdif, ipflag)
c-----------------------------------------------------------------------
c this is the february 26, 1988 version of
c nksol.. a nonlinear krylov solver for nonlinear systems of
c         equations of the form f(u) = 0.
c
c this version is in double precision.
c
c authors..
c
c         peter n. brown
c         computing and mathematics research division
c         lawrence livermore national laboratory
c         livermore, ca  94550
c and
c         youcef saad
c         center for supercomputer research and development
c         university of illinois
c         urbana, il  61801
c
c references..
c 1. peter n. brown and youcef saad
c    hybrid krylov methods for nonlinear systems of equations
c    llnl report ucrl-97645, november 1987.
c
c 2. john e. dennis and robert b. schnabel
c    numerical methods for unconstrained optimization and nonlinear
c    equations, prentice-hall, englewood cliffs, nj, 1983.
c    (this reference contains the basic algorithms for the
c    stopping tests, dogleg and linesearch strategies used
c    in nksol.)
c
c-----------------------------------------------------------------------
c Modifications:  
c
c [29-Mar-1993]  Removal of implicit data typing.
c                This version should now compile with the fortran
c                options -u -C.  The call lists now have one additional
c                integer, nu following n.  
c
c                nu - length of u array
c                -u is the explicit data typing option
c                -C is the array bound checking
c
c [12-May-1993]  Addition of two stopping criterion for the linear
c                krylov iteration.  The previously unused input variable 
c                iwork(9) now holds the option (described below).
c
c                Author - Jeff McGough
c
c                This code compiles on:  DEC station 5000
c                                        SPARC station 1
c                                        SGI  Indigo
c                Author - Jeff McGough
c
c-----------------------------------------------------------------------
c introduction.
c
c nksol solves nonlinear systems f(u)=0 rewritten in the form
c
c              sf*f((su-inverse)*ubar) = 0, ubar = su*u,
c
c where f and u are n-vectors, and sf and su are diagonal scaling
c matrices with positive diagonal entries.  nksol uses an inexact
c newton method as the basic nonlinear iteration, where the newton
c equations are solved only approximately by a linear krylov iteration,
c coupled with either a linesearch or dogleg global strategy.  the
c user may optionally choose either arnoldi-s method (with linesearch
c backtracking) or the generalized minimum residual method (gmres)
c (with either the linesearch or dogleg strategy) as the krylov
c iteration technique, with or without preconditioning.
c
c the scaling matrices su and sf should be chosen so that the vector
c su*u has all its components with roughly the same magnitude when u
c is close to a root of f, and similarly for sf*f(u) when u is not too
c near a root of f.
c
c
c nksol generates a sequence of approximations u(k) to a root of f.
c the stopping criteria for the nonlinear iteration are the
c following..
c
c 1.  maxnorm( sf*f(u(k)) ) .le. ftol,
c
c where maxnorm() is the maximum norm function and ftol is a
c user-supplied stopping tolerance.
c
c 2.  max(abs(u(k,i)-u(k-1,i))/max(abs(u(k,i)),1/su(i))) .le. stptol,
c    i=1,...,n
c
c where u(j,i) is the i-th component of u(j) (j=k,k-1) , su(i) is
c the i-th diagonal element of the matrix su, and stptol is a user-
c supplied stopping tolerance.  the above test measures the length
c of the scaled distance between the last two iterates.
c
c
c the scaled/preconditioned newton equations are of the form
c
c     (sf*j*(p-inverse)*(su-inverse))*(su*p*x) = sf*b,
c
c where j = j(u) = df/du is the system jacobian matrix evaluated at the
c current iterate u, b = -f(u), and p is a preconditioner matrix
c (applied on the right).  the solution x is the approximate newton
c step.
c
c the linear krylov iteration generates a sequence x(m) of
c approximate solutions to the newton equations within an m-dimensional
c krylov subspace (m = 1,2,...).  the stopping test for the linear
c krylov iteration is
c
c     norm(sf*(b - j*x(m))) .le. etak*norm(sf*b),
c
c where norm() is the euclidean norm, b = -f(u(k)), j = j(u(k)),
c and by default etak = 0.5**k (or user choosen through iwork(9)). 
c the counter ncfl counts the number of times this condition 
c is not met within mmax linear iterations.  (see the optional inputs 
c and outputs sections below for the definitions of mmax and ncfl.)
c
c-----------------------------------------------------------------------
c full description of user interface to nksol.
c
c the user interface to nksol consists of the following parts.
c
c i.   the call sequence to subroutine nksol, which is a driver
c      routine for the solver.  this includes descriptions of both
c      the call sequence arguments and of user-supplied routines.
c      following these descriptions is a description of
c      optional inputs available through the call sequence, and then
c      a description of optional outputs (in the work arrays).
c
c ii.  description of two routines in the nksol package which
c      the user may replace with his own versions, if desired.
c      these relate to the above stopping criteria.
c
c-----------------------------------------------------------------------
c part i.  call sequence.
c
c the call sequence parameters used for input only are
c  n, nu, f, jac, su, sf, ftol, stptol, lrw, liw, iopt, pset, psol, mf,
c  mdif, ipflag,
c those used for both input and output are
c  u,
c and those used only for output are
c  savf, iterm.
c the work arrays rwork and iwork are also used for conditional and
c optional inputs and optional outputs.  (the term output here refers
c to the return from subroutine nksol to the user-s calling program.)
c
c the legality of input parameters will be thoroughly checked on each
c call to nksol.
c
c the descriptions of the call arguments are as follows.
c
c n      = integer scalar containing the size of the nonlinear system,
c          and the length of arrays savf, su and sf.
c
c nu     = integer scalar containing the length of array u.
c
c u      = real array of length nu containing the initial 
c          guess to a root of f(u) = 0 on input, and the approximate 
c          solution on return.
c
c          this array is passed as the u argument in all calls to
c          f, jac, pset and psol.  hence its length may exceed n, and
c          locations u(n+1),... may be used to store other real data
c          and pass it to f (or jac), pset, and/or psol.
c
c savf   = real array of length n containing f(u) on return from
c          nksol, and undefined on input.
c
c f      = the name of the user-supplied subroutine for defining the
c          nonlinear system f(u) = 0.  f is a vector-valued function
c          of the vector u.  subroutine f is to compute the function
c          f.  it is to have the form
c
c           subroutine f(n, nu, u, savf)
c           dimension u(nu), savf(n)
c
c          where n, nu, u are input and the array savf = f(u) is output.
c          subroutine f should not alter u.  f must be declared
c          external in the calling program.
c
c          subroutine f may access user-defined quantities in
c          u(n+1),... if u has length exceeding n (nu) in the calling
c          program.  see description of u above.
c
c jac    = the name of the optional user-supplied subroutine for
c          calculating j(u)*v, where j(u) is the jacobian matrix of
c          f evaluated at u.  subroutine jac returns j(u)*v for a
c          given vector v.  it is to have the form
c
c           subroutine jac (n, nu, u, savf, v, z)
c           dimension u(nu), savf(n), v(n), z(n)
c
c          where n, nu, u, savf and v are input, and z = j(u)*v as
c          output. the array u contains the last newton iterate and
c          savf = f(u).  subroutine jac should not alter u or savf.
c          jac must be declared external in the calling program, and
c          is only called when mdif=1.  when mdif=0, a dummy routine
c          may be supplied for jac.
c
c          subroutine jac may access user-defined quantities in
c          u(n+1),... if u has length exceeding n in the calling
c          program.  see description of u above.
c
c su     = real array of length n containing scale factors for the
c          solution vector u.
c
c sf     = real array of length n containing scale factors for the
c          function value f(u).
c
c          arrays su and sf can be the same array in the user-s
c          calling program.  if the user does not want to use scaling,
c          then su(i) and sf(i) should be loaded with 1 for all i.
c
c ftol   = real scalar containing the stopping tolerance on
c          maxnorm(sf*f(u)).  if ftol=zero on input, then
c          a default value of epsmch**(1/3) will be used, where
c          epsmch is the machine epsilon (or unit roundoff).
c
c stptol = real scalar containing the stopping tolerance on the
c          minimum scaled step u(k) - u(k-1).  if stptol=zero on
c          input, then a default value of epsmch**(2/3) will be
c          used, where epsmch is the machine epsilon (or unit
c          roundoff).
c
c mf     = integer scalar containing the nonlinear iteration
c          method flag.
c          mf = 1 means the dogleg global strategy with gmres is used.
c          mf = 2 means the linesearch backtracking with descent
c                 direction chosen by arnoldi is used.
c          mf = 3 means the linesearch backtracking with descent
c                 direction chosen by gmres is used.
c
c rwork  = a real working array (double precision).
c          the length of rwork must be at least
c                4 + 4*n + lenk + lenwmp,
c          where
c           lenk = 4 + (4+mmax)*n + mmax**2,               if mf=2,
c           lenk = 6 + 4*n + (n+1)*mmax + 2*mmax*(mmax+1), if mf=1 or 3.
c          lenwmp is the length of the user-defined workspace for the
c          preconditioner routines pset and psol.  for the default
c          value of mmax(=10),
c           lenk = 104 + 14*n,  if mf = 2, and
c           lenk = 236 + 14*n,  if mf = 1 or 3.
c
c lrw    = integer scalar containing the length of the array rwork,
c          as declared by the user.  (this will be checked by the
c          solver).
c
c iwork  = an integer work array.  the length of iwork must be at least
c                20 + mmax + lenimp,
c          where lenimp is the length of the user-defined integer work
c          space for the preconditioner routines pset and psol.
c          for the default value of mmax(=10), the length of iwork
c          must be at least 30 + lenimp.
c
c liw    = integer scalar containing the length of the array iwork,
c          as declared by the user.  (this will be checked by the
c          solver.)
c
c iopt   = an integer flag to specify whether or not any optional
c          inputs are being used.
c          iopt=0 means there are no optional inputs.
c                 default values will be used in all cases.
c          iopt .ne. 0 means one or more optional inputs are being
c                      used.  see the optional inputs section below
c                      for more information.
c
c iterm  = output flag.
c          iterm= 1 means maxnorm(sf*f(u)) .le. ftol, where
c                   maxnorm() is the maximum norm function.  u
c                   is probably an approximate root of f.
c          iterm= 2 means the scaled distance between the last two
c                   steps is less than stptol.  u may be an
c                   approximate root of f, but it is also possible
c                   that the algorithm is making very slow progress
c                   and is not near a root, or that stptol is too
c                   large.
c          iterm= 3 means iret=1 was returned from either the dogdrv
c                   or lnsrch module, and that the last global step
c                   failed to reduce norm(f) sufficiently.  either u
c                   is close to a root of f and no more accuracy is
c                   possible, or the finite-difference approximation
c                   to j*v is inaccurate, or stptol is too large.
c                   if the ncfl optional output value (see below) is
c                   close to the nni value, it may be the case that
c                   the krylov iteration is converging very slowly.
c                   in this case, the user may want to use
c                   preconditioning and/or increase the mmax
c                   value (i.e., increase the maximum dimension of the
c                   krylov subspace.)
c          iterm= 4 means that the maximum allowable number of
c                   nonlinear iterations has been reached.  this
c                   is currently set at 200, but may be changed as
c                   an optional input.  see below.
c          iterm= 5 means 5 consecutive steps of length stepmx (the
c                   maximum stepsize limit) have been taken.  either
c                   norm(f) asymptotes from above to a finite value
c                   in some direction, or stepmx is too small.  stepmx
c                   is computed internally as
c                      stepmx = 1000*max(norm(su*u0),norm(su)),
c                   where u0 is the initial guess, and norm() is the
c                   euclidean norm.  norm(su) here means the euclidean
c                   norm of the n-dimensional array su.  stepmx may
c                   also be set as an optional input.  see below.
c          iterm= 6 means that more than 10 failures occurred when
c                   trying to satisfy the beta-condition in the
c                   linesearch algorithm.  it is likely that the
c                   iteration is making poor progress.
c          iterm= 7 means that there was a breakdown in the krylov
c                   iteration.  this will likely only occur when
c                   the jacobian matrix j or j*(p-inverse) is ill-
c                   conditioned.  if this error return occurs with
c                   mf=2, try either mf=1 or mf=3 instead.
c          iterm= 8 means there was a nonrecoverable error in pset
c                   causing the iteration to halt.
c          iterm= 9 means there was a nonrecoverable error in psol
c                   causing the iteration to halt.
c          iterm=-1 means the mf value was illegal.  the allowable
c                   mf values are 1, 2 or 3.
c          iterm=-2 means the mdif value was illegal.  the allowable
c                   mdif values are 0 and 1.
c          iterm=-3 means the ipflag value was illegal.  the allowable
c                   ipflag values are 0 and 1.
c          iterm=-4 means that an optional input value in iwork was
c                   negative.  only nonnegative values are allowed.
c          iterm=-5 means that the stepmx optional input value was
c                   negative.  stepmx must be .ge. 0.
c          iterm=-6 means that the eta optional input value was
c                   negative.  eta must be .ge. 0.
c          iterm=-7 means that the tau optional input value was
c                   negative.  tau must be .ge. 0.
c          iterm=-8 means that there was insufficient length declared
c                   for the rwork array.  see the lenrw optional output
c                   value for the minimum needed length of rwork.
c          iterm=-9 means that there was insufficient length declared
c                   for the iwork array.  see the leniw optional output
c                   value for the minimum needed length of iwork.
c
c pset   = the name of the optional user-supplied subroutine for
c          calculating any matrix data associated with the
c          preconditioner p.  it is to have the form
c
c         subroutine pset (n, nu, u, savf, su, sf, wk, f, wmp, iwmp, ier)
c         dimension u(nu), savf(n), su(n), sf(n), wmp(1), iwmp(1)
c
c          where n, nu, u, and savf are input.  f is the name of the
c          user-supplied subroutine defining f(u).  any matrix data
c          loaded by this routine must be stored in the real and integer
c          arrays wmp and iwmp.  these arrays are passed unaltered
c          to subroutine psol.  the array u contains the last newton
c          iterate and savf = f(u).  wk is a work array of length n
c          for use in this routine.  subroutine pset should not alter
c          u or savf, and is called infrequently in an effort to use
c          preconditioner data evaluated at an earlier newton iterate.
c          pset must be declared external in the calling program,
c          and is only called when ipflag=1.  when ipflag=0, a
c          dummy routine may be supplied for pset.  on return pset
c          should set the error flag as follows..
c             ier = 0 if pset was successful.
c             ier.ne.0 if an unrecoverable error occurred.  for
c                      example, the preconditioner may be singular.
c                      in this case the nonlinear iteration is halted.
c          note: the lengths of arrays wmp and iwmp must be supplied
c                as optional inputs in the iwork array.  see the
c                optional inputs section below.
c
c          subroutine pset may access user-defined quantities in
c          u(n+1),... if u has length exceeding n in the calling
c          program.  see description of u above.
c
c psol   = the name of the optional user-supplied subroutine for solving
c          the linear system p*x = c.  it uses matrix data loaded
c          in subroutine pset and passed via the arrays wmp and iwmp.
c          it is to have the form
c
c         subroutine psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier)
c         dimension u(nu), savf(n), wk(n), wmp(1), iwmp(1)
c
c          where n, nu, u, savf, wmp and iwmp are input.  x contains the
c          right hand side on input and the solution on exit.  f is the
c          name of the user-supplied routine defining f(u), and jac is
c          the name of the user-supplied routine for calculating j(u)*v.
c          wk is a real work array of length n available for use in
c          psol.  psol should not alter u or savf, and is only called
c          when ipflag=1.  when ipflag=0, a dummy routine may be
c          supplied for psol.  on return, psol should set the error flag
c          ier as follows..
c             ier = 0 if psol was successful.
c             ier.lt.0 if an unrecoverable error occurred.  in this
c                      case the nonlinear iteration is halted.
c             ier.gt.0 if a recoverable error occurred, possibly caused
c                      by the preconditioner being out of date.  a call
c                      to pset is done, and the solution of the current
c                      linear system is retried.
c          note: the lengths of arrays wmp and iwmp must be supplied
c                as optional inputs in the iwork array.  see the
c                optional inputs section below.
c
c          subroutine psol may access user-defined quantities in
c          u(n+1),... if u has length exceeding n in the calling
c          program.  see description of u above.
c
c mdif   = integer scalar containing the method flag for the
c          j*v calculation.
c          mdif=1 means a user-supplied routine jac is used for
c                 calculating the product j*v in the krylov iteration.
c          mdif=0 means an internally generated j*v is used in the
c                 krylov iteration.  this is done by using an f
c                 evaluation and a difference-quotient.
c
c ipflag = integer scalar indicating if preconditioning is being used.
c          ipflag=0 means no preconditioning is used.
c          ipflag=1 means preconditioning is done (on the right only).
c
c-----------------------------------------------------------------------
c optional inputs..
c
c the following is a list of the optional inputs provided for in the
c call sequence.  for each such input variable, this table lists its
c name as used in this documentation, its location in the call sequence,
c its meaning, and the default value.  the use of any of these inputs
c requires iopt .ne. 0, and in that case all of these inputs are
c examined.  a value of zero for any of these optional inputs will
c cause the default value to be used.  thus to use a subset of the
c optional inputs, simply preload locations 1 to 8 in iwork to 0, and
c locations 1 to 3 in rwork to 0.0.  then set those of interest
c to nonzero values.
c
c name    location      meaning and default value
c
c mmax    iwork(1)  maximum dimension of krylov subspace in the linear
c                   iteration.  the default value of mmax is 10.
c
c         iwork(2)  not used
c
c lenwmp  iwork(3)  length of the real work array wmp for preconditioner
c                   matrix data storage used in subroutines pset and
c                   psol.  the default value of lenwmp is 0.
c
c lenimp  iwork(4)  length of the integer work array iwmp for
c                   preconditioner matrix data storage used in
c                   subroutines pset and psol.  the default value
c                   of lenimp is 0.
c
c iprint  iwork(5)  flag indicating whether optional statistics are
c                   desired.
c                   iprint=0 means no statistics are printed.
c                            this is the default.
c                   iprint=1 means the nonlinear iteration count,
c                            norm(sf*f(u)) and number of calls to f are
c                            printed for each nonlinear iterate.
c                   iprint=2 includes the statistics for iprint=1, plus
c                            additional statistics from the krylov
c                            iteration and dogleg/lnsrch strategies.
c                            for the dogleg statistics, tau is the
c                            trust region size, cpl is the length of
c                            of the step to the cauchy point, and gml
c                            is the length of the gmres step.
c                            for the lnsrch statistics, f1 refers to
c                            0.5*norm(sf*f(u))**2 and f1new refers to
c                            0.5*norm(sf*f(unew))**2.
c
c iunit   iwork(6)  i/o unit number for optional statistics and error
c                   messages.  the default value of iunit is 6.
c
c iermsg  iwork(7)  flag indicating whether or not error messages are
c                   desired.  iermsg .ne. 0 turns off the printing
c                   of error messages.  the default value of iermsg
c                   is 0.
c
c itermx  iwork(8)  maximum allowable number of nonlinear iterations.
c                   the default value of itermx is 200.
c
c linerr  iwork(9)  linear solver error tolerance scheme.  the default
c                   value for linerr is 1.  
c                   linerr=1  etak = 0.5**k (the default).
c                   linerr=2  etak = min{||F(x_k)||,1/(k+1)}.
c                   linerr=3  etak = min{etak~,.99} where
c                             etak~ = |(||F(x_k)|| - res)|/||F(x_k-1)||
c                             res = residual for previous linear solve
c
c stepmx  rwork(1)  maximum allowable length of a newton step.  the
c                   default value of stepmx is
c                     stepmx = 1000*max(norm(su*u0),norm(su)),
c                   where u0 is the initial guess, and norm() is the
c                   euclidean norm.  norm(su) here means the euclidean
c                   norm of the n-dimensional array su.
c
c eta     rwork(2)  the relative error in computing f(u).  this
c                   value is used in the difference-quotient option
c                   for calculating j*v.  if the user-s f routine has
c                   large errors associated with the calculation of
c                   f(u), then supplying an accurate eta value will
c                   enhance the accuracy of the difference-quotient.
c                   the default value of eta is the machine epsilon.
c
c tau     rwork(3)  real scalar containing the initial trust region
c                   size for the dogleg strategy.  tau is the radius
c                   of the m-dimensional ball (m = dimension of the
c                   krylov subspace) in which the nonlinear function
c                       f1(x) = 0.5*norm( sf*f(u+x) )**2
c                   is well approximated by the local quadratic model
c                       g(x) = 0.5*norm( sf*(f(u) + j(u)*x) )**2
c                   for x in the krylov subspace, norm() the
c                   euclidean norm, and u the current newton iterate.
c                   if tau=zero on input, then an initial value for
c                   it will be chosen internally.
c                   (tau is not used with the linesearch strategy).
c
c-----------------------------------------------------------------------
c optional outputs.
c
c as optional additional output from nksol, the variables listed
c below are quantities related to the performance of nksol
c which are available to the user.  these are communicated by way of
c the work arrays, but also have internal mnemonic names as shown.
c except where stated otherwise, all of these outputs are defined
c on return from nksol.  on an illegal input return (iterm .lt. 0)
c they will be unchanged from their existing values (if any),
c except possibly for lenrw and leniw, and the integer counter values
c will have been reset to 0.
c
c name    location      meaning
c
c nni     iwork(10) the total number of nonlinear iterations.
c
c nli     iwork(11) the total number of linear krylov iterations.
c
c nfe     iwork(12) the total number of calls to the f routine.
c
c nje     iwork(13) the total number of calls to the jac routine.
c
c npe     iwork(14) the total number of calls to pset.
c
c nps     iwork(15) the total number of calls to psol.
c
c ncfl    iwork(16) the total number of times the error tolerance
c                   for the linear krylov iteration was not met
c                   in mmax iterations.
c
c nbcf    iwork(17) the total number of times the beta condition
c                   could not be met in the linesearch algorithm.
c                   the nonlinear iteration is halted if this value
c                   ever exceeds 10.
c
c nb      iwork(18) the total number of backtracks in the linesearch
c                   algorithm, and the number of extra f evaluations
c                   used by the dogleg strategy. nb is calculated as
c                   nb = nfe - nni - (nli - nje) - 1.
c
c lenrw   iwork(19) required minimum length of the rwork array.
c
c leniw   iwork(20) required minimum length of the iwork array.
c
c stepmx  rwork(1)  maximum allowable length of a newton step.  the
c                   default value of stepmx is
c                     stepmx = 1000*max(norm(su*u0),norm(su)),
c                   where u0 is the initial guess, and norm() is the
c                   euclidean norm.  if a value of stepmx .gt. 0 was
c                   input, it is not changed on output.
c
c fnrm    rwork(2)  the scaled norm norm(sf*f(u(k))) for the final
c                   computed iterate u(k).
c
c tau     rwork(3)  real scalar containing the last used trust region
c                   size for the dogleg strategy.
c
c-----------------------------------------------------------------------
c part ii.  optionally replaceable solver routines.
c
c below are descriptions of two routines in the nksol package which
c relate to the stopping criteria for the nonlinear iteration.  either
c routine may be replaced by a user-supplied version.  however, since
c such a replacement may have a major impact on performance, it should
c be done only when absolutely necessary, and only with great caution.
c (note.. the means by which the package version of a routine is
c superseded by the user-s version may be system-dependent.)
c
c (a) snrmf.
c the following subroutine is called to compute the scaled norm of
c f(u) at the current iterate u..
c     subroutine snrmf(n, savf, sf, fnorm)
c     dimension savf(n), sf(n)
c where n and sf are as in the nksol call sequence, savf contains
c f(u) with u the current iterate, and fnorm = norm(sf*f(u)) where
c norm() is the euclidean norm.  the package version of this routines
c uses the scaled norm of sf*f(u) defined in stopping criterion 1 above.
c
c if the user supplies this subroutine, it must return in fnorm
c the user-defined scaled norm of f(u).  if the user-s scaled norm
c of sf*f(u) is simply norm(sf*f(u)), then there is nothing for the
c new routine to compute since fnorm already has the desired value
c on input.
c
c (b) slngth.
c the following subroutine is called to compute the relative scaled
c length of the current step from u to unew = u + s..
c     subroutine slngth(n, nu, u, s, su, rlngth)
c     dimension u(nu), s(n), su(n)
c where n, nu, u and su are as in the nksol call sequence, u is the
c current newton iterate, and s contains the current step from u
c to unew.  rlngth is undefined on input.  the package version of
c this routine computes the relative scaled length defined in stopping
c criterion 2 above.
c
c if the user supplies this subroutine, it must return in rlngth
c the user-defined relative scaled length of the step s.
c
c-----------------------------------------------------------------------
c other routines in the package..
c
c nkstop  stopping routine for the nonlinear iteration.
c nkstp0  stopping routine for the initial guess.
c slngth  computes the relative scaled length of a step in maxnorm.
c snrmf   computes the maxnorm of sf*f(u).
c model   driver for approximate solution of newton equations.
c solpk   interfaces to either spiom or spigmr.
c spiom   contains the main iteration loop for arnoldi.
c spigmr  contains the main iteration loop for gmres.
c atv     calculates j*v for a given vector v.
c orthog  orthogonalizes a new vector against older basis vectors.
c dhefa   computes an lu decomposition of a hessenberg matrix.
c dhesl   solves a hessenberg linear system, using lu factors.
c dheqr   computes a qr decomposition of a hessenberg matrix.
c dhels   solves a hessenberg least-squares system, using qr factors.
c d1mach  computes machine epsilon in double precision.
c vnorm   computes a scaled euclidean norm of a vector.
c dogdrv  driver for the dogleg strategy.
c dogstp  computes the dogleg step for the current trust region.
c trgupd  decides if step is successfull, and updates trust region size.
c lnsrch  driver for linesearch backtracking strategy.
c errgen  outputs error messages.
c dnrm2   computes the l2 norm of a vector.
c dcopy   copies a vector to another vector.
c dswap   swaps two vectors.
c ddot    computes the dot product of two vectors.
c daxpy   adds a scalar times a vector to a vector.
c dscal   scales a vector by a scalar.
c idamax  finds the element of largest magnitude in a vector.
c note.. dnrm2, dcopy, dswap, ddot, daxpy, dscal, and idamax are from
c the blas collection (basic linear algebra modules).
c intrinsic fortran routines.. dabs, dsqrt, dmin1, dmax1, dfloat, dsign.
c-----------------------------------------------------------------------

      external f
      external jac
      external pset
      external psol

c passed variables

      integer iprint
      integer iunit
      integer iermsg
      integer n
      integer nu
      integer lrw
      integer iwork
      integer liw
      integer iopt
      integer iterm
      integer mf
      integer mdif
      integer ipflag
      integer ierr
      integer lenwmp
      integer lenimp
      integer itermx
      integer i

      double precision savf
      double precision u
      double precision rwork
      double precision su
      double precision sf
      double precision stptol
      double precision ftol

c local variables

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer iret
      integer lup
      integer lx
      integer luprv
      integer lfprv
      integer lwm
      integer lenk
      integer lenwm
      integer lenrw
      integer leniw
      integer liwm
      integer leniwm
      integer nbcfmx
      integer iter
      integer ncscmx
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf
      integer linerr

      double precision epsmch
      double precision fnrm
      double precision fnrmold
      double precision f1nrm
      double precision f1nrmp
      double precision unrm
      double precision stepmx
      double precision d1mach
      double precision vnorm
      double precision epsfac
      double precision epsfac1
      double precision sunrm
      double precision dnrm2
      double precision tau

      dimension savf(n)
      dimension u(nu)
      dimension rwork(lrw)
      dimension su(n)
      dimension sf(n)
      dimension iwork(liw)

      double precision zero
      double precision one
      double precision two
      double precision three

      logical mxtkn

c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------
      double precision eps, sqteta, rhom
      common /nks001/ eps, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
c-----------------------------------------------------------------------
c     nks002 common block.
c-----------------------------------------------------------------------
      common /nks002/ iprint, iunit, iermsg
c-----------------------------------------------------------------------
c     nks003 common block.
c-----------------------------------------------------------------------
      double precision pthrsh
      common /nks003/ pthrsh
c
      save
      data zero/0.0d0/,one/1.0d0/,two/2.0d0/,three/3.0d0/
c-----------------------------------------------------------------------
c zero counters in common block nks001.
c-----------------------------------------------------------------------
      nni = 0
      nli = 0
      npe = 0
      nps = 0
      ncfl = 0
      nbcf = 0
      nfe = 0
      nje = 0
c-----------------------------------------------------------------------
c load values in nks002 common block.
c-----------------------------------------------------------------------
      iprint = 0
      iunit = 6
      iermsg = 0
c-----------------------------------------------------------------------
c load pthrsh value in nks003 common block.
c-----------------------------------------------------------------------
      pthrsh = two
c=======================================================================
c block a: initialization section.
c=======================================================================
c-----------------------------------------------------------------------
c     compute machine epsilon.
c-----------------------------------------------------------------------
      epsmch = d1mach(4)
c-----------------------------------------------------------------------
c     initialize parameters.  check for illegal input.
c-----------------------------------------------------------------------
      if (nu.lt.n) then
         iterm = -1
         ierr = 130
         call errgen(ierr,zero,zero,0,0)
        go to 500
      endif

      if ( (mf .lt. 1) .or. (mf .gt. 3) ) then
c illegal value of method flag mf.
        iterm = -1
        ierr = 10
        call errgen(ierr,zero,zero,0,0)
        go to 500
      endif
c-----------------------------------------------------------------------
c load common variables methn and methk.
c methn =  1, dogleg global strategy used.
c          2, linesearch backtracking global strategy used.
c methk =  1, arnoldi algorithm used.
c          2, gmres algorithm used.
c-----------------------------------------------------------------------
      if (mf .eq. 1) then
          methn = 1
          methk = 2
        else
          methn = 2
          methk = mf - 1
       endif
      if ( (mdif .lt. 0) .or. (mdif .gt. 1) ) then
c illegal value of method flag mdif.
        iterm = -2
        ierr = 20
        call errgen(ierr,zero,zero,0,0)
        go to 500
        endif
c load common variable mfdif.
      mfdif = 2 - mdif
      if ( (ipflag .lt. 0) .or. (ipflag .gt. 1) ) then
c illegal value of ipflag.
        iterm = -3
        ierr = 30
        call errgen(ierr,zero,zero,0,0)
        go to 500
        endif
c load common variable ipflg.
      ipflg = ipflag
c
      linerr = 1
      lenwmp = 0
      lenimp = 0
      mmax = 10
      kmp = mmax
      stepmx = zero
      itermx = 0
      sqteta = zero
      tau = zero
c check for optional inputs.
      if (iopt .ne. 0) then
c first check for illegal values.
        do 10 i = 1,9
           if (iwork(i) .lt. 0) then
              iterm = -4
              ierr = 40
              call errgen(ierr,zero,zero,i,iwork(i))
              go to 500
           endif
 10     continue
        do 20 i = 1,3
           if (rwork(i) .lt. zero) then
              iterm = -(i+4)
              ierr = 50
              call errgen(ierr,rwork(i),zero,i,0)
              go to 500
           endif
 20     continue
        if (iwork(1) .gt. 0) mmax = iwork(1)
        if (iwork(2) .gt. 0) kmp  = iwork(2)
        if (iwork(3) .gt. 0) lenwmp = iwork(3)
        if (iwork(4) .gt. 0) lenimp = iwork(4)
        if (iwork(5) .gt. 0) iprint = iwork(5)
        if (iwork(6) .gt. 0) iunit = iwork(6)
        if (iwork(7) .gt. 0) iermsg = iwork(7)
        if (iwork(8) .gt. 0) itermx = iwork(8)
        if (iwork(9) .gt. 0) linerr = iwork(9)
        if (rwork(1) .gt. zero) stepmx = rwork(1)
        if (rwork(2) .gt. zero) sqteta = dsqrt(rwork(2))
        if (rwork(3) .gt. zero) tau = rwork(3)
      endif
      mmax = min0(mmax,n)
      kmp = mmax
      lup = 4
      lx = lup + n
      luprv = lx + n
      lfprv = luprv + n
      lwm = lfprv + n
      if (methk .eq. 1) then
          lenk = 1 + 3*n + 3 + n*mmax + n + mmax**2
        else if (methk .eq. 2) then
          lenk = 1 + 3*n + 3 + n*mmax + n + 2 + 2*mmax*(mmax+1) + mmax
        endif
      lenwm = lenk + lenwmp
      if (lenwmp .ne. 0) then
          locwmp = lenk + 1
        else
          locwmp = lenk
        endif
      lenrw = lwm + lenwm
      if (lrw .lt. lenrw) then
c insufficient storage in rwork.
        iterm = -8
        ierr = 100
        call errgen(ierr,zero,zero,lrw,lenrw)
        go to 500
        endif
c
      leniw = 20 + mmax + lenimp
      if (liw .lt. leniw) then
c insufficient storage in iwork
        iterm = -9
        ierr = 110
        call errgen(ierr,zero,zero,liw,leniw)
        go to 500
        endif
      liwm = 21
      leniwm = mmax + lenimp
      if (lenimp .ne. 0) then
          locimp = mmax + 1
        else
          locimp = mmax
        endif
      if (itermx .eq. 0) itermx = 200
      nbcfmx = 10
      if (stptol .eq. zero) stptol = epsmch**(two/three)
      if (stepmx .eq. zero) then
        unrm = vnorm(n,u,su)
        sunrm = dnrm2(n,su,1)
        stepmx = 1.d3*dmax1(unrm,sunrm)
        endif
      if (ftol .eq. zero) ftol = epsmch**(one/three)
      if (sqteta .eq. zero) sqteta = dsqrt(epsmch)
c load initial trust region size tau.
      if (tau .le. zero) then
        tau = -one
        endif
      iter = 0
c-----------------------------------------------------------------------
c     call subroutine f to evaluate f at the initial guess.
c-----------------------------------------------------------------------
      call f(n, nu, u, savf)
      nfe = nfe + 1
      fnrm = vnorm(n,savf,sf)
      f1nrm = fnrm*fnrm/two
      fnrmold = fnrm
c-----------------------------------------------------------------------
c     test to see if initial conditions satisfy stopping criterion.
c-----------------------------------------------------------------------
      call nkstp0(n, savf, sf, ftol, fnrm, iterm)
      if (iterm .ne. 0) go to 500
      ncscmx = 0
c=======================================================================
c block b: iteration section.
c=======================================================================
 100  continue
      iter = iter + 1
c-----------------------------------------------------------------------
c     call model to calculate the approximate newton step using the
c     appropriate krylov algorithm.
c-----------------------------------------------------------------------
c     modification [12-May-1993] error tol set to h. walker criterion
c-----------------------------------------------------------------------
c
c     original criterion - superlinear convergence
c
      if(linerr.eq.1) then
         epsfac = 0.5d0**(iter)
      endif
c
c     ds83 criterion - quadratic convergence   
c
      if(linerr.eq.2) then
         epsfac1 = 1.0d0/dble(iter+1)
         epsfac = dmin1(epsfac1,fnrm)
      endif
c
c     walker criterion - ?? convergence
c
      if(linerr.eq.3) then
         if(iter.eq.1) then
            epsfac = 0.5d0
         else
            epsfac1 = (fnrm - dabs(rhom))/fnrmold
            epsfac = dmin1(epsfac1,0.99d0)
         endif
      endif
      eps = (epsmch + epsfac)*fnrm
      call model(n,nu,rwork(lwm),lenwm,iwork(liwm),leniwm,u,savf,
     *            rwork(lx),f,jac,su,sf,pset,psol)
      if (iersl .ne. 0) then
        iterm = iersl
        go to 500
        endif
c-----------------------------------------------------------------------
c     call dogdrv or lnsrch to get an acceptable step x.
c-----------------------------------------------------------------------
      if (methn .eq. 1) then
          call dogdrv (n,nu,rwork(lwm),lenwm,iwork(liwm),leniwm,u,
     *                savf,f1nrm,rwork(lx),su,sf,stepmx,stptol,tau,iret,
     *                rwork(luprv),rwork(lfprv),rwork(lup),f1nrmp,
     *                mxtkn,f,jac,psol)
          if (iersl .ne. 0) then
            iterm = iersl
            go to 500
            endif
        else
          call lnsrch(n,nu,u,savf,f1nrm,rwork(lx),su,sf,stepmx,
     *                stptol,iret,rwork(lup),f1nrmp,mxtkn,f,jac)
          if (nbcf .gt. nbcfmx) then
            iterm = 6
            ierr = 120
            call errgen(ierr,zero,zero,nbcf,nbcfmx)
            go to 500
            endif
        endif
c-----------------------------------------------------------------------
c     call nkstop to check if tolerances are met.
c-----------------------------------------------------------------------
      fnrmold = fnrm
      fnrm = dsqrt(two*f1nrmp)
      call nkstop(n,nu,u,rwork(lup),savf,fnrm,su,sf,stptol,
     *       rwork(lx),ftol,iret,iter,itermx,mxtkn,ncscmx,iterm)
c
      do 300 i = 1,n
        u(i) = rwork(i+lup-1)
 300    continue
      f1nrm = f1nrmp
      if (iprint .ge. 1) write(iunit,400) iter,fnrm,nfe
 400  format(' iter= ',i4,' fnrm= ',g26.16,' nfe= ',i6)
      if (iterm .eq. 0) go to 100
c-----------------------------------------------------------------------
c     load optional outputs into iwork array and return.
c-----------------------------------------------------------------------
 500  continue
      iwork(10) = nni
      iwork(11) = nli
      iwork(12) = nfe
      iwork(13) = nje
      iwork(14) = npe
      iwork(15) = nps
      iwork(16) = ncfl
      iwork(17) = nbcf
      iwork(18) = nfe - nni - (nli - nje) - 1
      iwork(19) = lenrw
      iwork(20) = leniw
      rwork(1) = stepmx
      rwork(2) = fnrm
      rwork(3) = tau
      return
c----------------------- end of subroutine nksol -----------------------
      end
      subroutine nkstop(n, nu, u, unew, savf, fnorm, su, sf, stptol,
     *    wk, ftol, iret, iter, itermx, mxtkn, ncscmx, iterm)
c-----------------------------------------------------------------------
c this routine decides whether to terminate nksol based on one of
c several stopping criteria described below.  (see description of
c iterm.)
c
c on entry
c
c   n      = size of the nonlinear system the length of arrays
c            unew, savf, su and sf.
c
c   nu     = length of array u.
c
c   u      = real array containing the previous approximate root.
c
c   unew   = real array containing the current approximate root.
c
c   savf   = real array containing the n-vector f(unew).
c
c   fnorm  = real scalar containing norm(sf*f(unew)), where norm()
c            denotes the euclidean norm.  (currently, not used, but
c            available if the user supplies his own stopping routine.)
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c   stptol = user-supplied tolerance on the minimum step length
c            s = unew-u.
c
c   wk     = real array of length n used for work space by nkstop.
c
c   ftol   = user-supplied tolerance on norm(sf*f(unew)).
c
c   iret   = output flag from either the dogdrv or lnsrch module.
c
c   iter   = current nonlinear iteration count.
c
c   itermx = maximum allowable number of nonlinear iterations.
c
c   mxtkn  = logical flag returned from either lnsrch or dogdrv
c            module indicating if the last step taken was of maximum
c            length stepmx.
c
c   ncscmx = number of consecutive steps on which the stepsize
c            was of maximum length stepmx.
c
c on return
c
c   ncscmx = number of consecutive steps on which the maximum
c            allowable stepsize length stepmx was taken.  if
c            mxtkn=.true., then ncscmx has been incremented by
c            one, and ncscmx is reset to zero if mxtkn=.false.
c
c   iterm  = output flag.
c            iterm=0 means no termination criteria satisfied,
c                    and the iteration continues.
c            iterm=1 means maxnorm(sf*f(unew)) .le. ftol, where
c                    maxnorm() is the maximum norm function.  unew
c                    is probably an approximate root of f.
c            iterm=2 means the scaled distance between the last two
c                    steps is less than stptol.  unew may be an
c                    approximate root of f, but it is also possible
c                    that the algorithm is making very slow progress
c                    and is not near a root, or that stptol is too
c                    large.
c            iterm=3 means iret=1 was returned from either the dogdrv
c                    or lnsrch module, and that the last global step
c                    failed to reduce norm(f) sufficiently.  either u
c                    is close to a root of f and no more accuracy is
c                    possible, or the finite-difference approximation
c                    to j*v is inaccurate, or stptol is too large.
c            iterm=4 means that the maximum allowable number of
c                    nonlinear iterations has been exceeded.
c            iterm=5 means 5 consecutive steps of length stepmx (the
c                    maximum stepsize limit) have been taken.  either
c                    norm(f) asymptotes from above to a finite value
c                    in some direction, or stepmx is too small.
c
c-----------------------------------------------------------------------

      integer n
      integer nu
      integer iret
      integer iter
      integer itermx
      integer ncscmx
      integer iterm
      integer i

      double precision u
      double precision unew
      double precision savf
      double precision fnorm
      double precision su
      double precision sf
      double precision stptol
      double precision ftol
      double precision fmax
      double precision rlngth
      double precision wk
      double precision thsnd

      dimension u(nu)
      dimension unew(n)
      dimension savf(n)
      dimension su(n)
      dimension sf(n)
      dimension wk(n)

      logical mxtkn

c-----------------------------------------------------------------------
c     nks003 common block.
c-----------------------------------------------------------------------
      double precision pthrsh
      common /nks003/ pthrsh
      save
      data thsnd/1000.0d0/
c
      iterm = 0
c check for error return from dogdrv or lnsrch.
      if (iret .eq. 1) then
        iterm = 3
        go to 999
        endif
c check tolerance on norm(sf*f(unew)).
      fmax = fnorm
      call snrmf(n, savf, sf, fmax)
      if (fmax .le. ftol) then
        iterm = 1
        go to 999
        endif
c check for scaled distance between last two steps too small.
      do 200 i = 1,n
        wk(i) = unew(i) - u(i)
 200    continue
      call slngth(n, nu, u, wk, su, rlngth)
      if (rlngth .le. stptol) then
        iterm = 2
        go to 999
        endif
c check for maximum number of iterates exceeded.
      if (iter .ge. itermx) then
        iterm = 4
        go to 999
        endif
c check for consecutive number of steps taken of size stepmx.
c if mstkn=.false., set ncscmx = 0.
      if (mxtkn) then
          ncscmx = ncscmx + 1
        else
          ncscmx = 0
        endif
      if (ncscmx .eq. 5) iterm = 5
c load threshold for re-evaluating preconditioner.
      pthrsh = fmax/(thsnd*ftol) + rlngth
c
 999  continue
      return
c----------------------- end of subroutine nkstop ----------------------
      end

c-----------------------------------------------------------------------
      subroutine slngth(n, nu, u, s, su, rlngth)
c-----------------------------------------------------------------------
c this routine calculates the scaled steplength of the current step s.
c
c on entry
c
c   n      = size of the nonlinear system, and the length of arrays
c            u, unew, savf, su and sf.
c
c   nu     = length of array u.
c
c   u      = real array containing the previous approximate root.
c
c   s      = real array containing the current step from u to unew.
c
c   su     = real array containing scaling factors for u.
c
c on return
c
c   rlngth = real scalar containing the scaled steplength.
c-----------------------------------------------------------------------

      integer i
      integer n
      integer nu

      double precision u
      double precision s
      double precision su
      double precision one
      double precision rlngth
      double precision temp
      double precision zero

      dimension u(nu)
      dimension s(n)
      dimension su(n)
      data one/1.0d0/, zero/0.0d0/
c
      rlngth = zero
      do 10 i = 1,n
        temp = one/su(i)
        temp = dmax1(dabs(u(i)),temp)
        rlngth = dmax1(dabs(s(i))/temp,rlngth)
 10     continue
      return
c----------------------- end of subroutine slngth ----------------------
      end

c-----------------------------------------------------------------------
      subroutine snrmf(n, savf, sf, fnorm)
c-----------------------------------------------------------------------
c this routine calculates the scaled norm of f(u) for the current
c iterate u.
c
c on entry
c
c   n      = size of the nonlinear system, and the length of arrays
c            savf and sf.
c
c   savf   = real array containing the n-vector f(u).
c
c   sf     = real array containing scaling factors for f(u).
c
c   fnorm  = real scalar containing norm(sf*f(unew)), where norm()
c            denotes the euclidean norm.  (currently, not used, but
c            available if the user supplies his own version of this
c            routine.)
c
c on return
c
c   fnorm = real scalar containing the scaled norm of f(u).
c-----------------------------------------------------------------------
      integer n
      integer i

      double precision savf
      double precision sf
      double precision zero
      double precision temp
      double precision fnorm
      double precision fmax

      dimension savf(n)
      dimension sf(n)

      save
      data zero/0.0d0/
c
      fmax = zero
      do 10 i = 1,n
        temp = sf(i)*dabs(savf(i))
        fmax = dmax1(temp,fmax)
 10     continue
      fnorm = fmax
      return
c----------------------- end of subroutine snrmf -----------------------
      end

c-----------------------------------------------------------------------
      subroutine nkstp0(n, savf, sf, ftol, fnorm, iterm)
c-----------------------------------------------------------------------
c this routine decides whether to terminate nksol at iteration zero
c because u0 is an approximate root of f(u).
c
c on entry
c
c   n     = size of the nonlinear system, and the length of arrays
c           savf and sf.
c
c   savf  = real array containing the n-vector f(u0).
c
c   ftol  = real scalar containing the user-s stopping tolerance.
c
c   fnorm = real scalar containing norm(sf*f(u0)), where norm()
c           denotes the euclidean norm.  (currently, not used, but
c           available if the user supplies his own stopping routine.)
c
c on return
c
c   iterm = output flag.
c           iterm=0 means u0 is not an approximate root of f.
c           iterm=1 means u0 is an approximate root of f.
c
c-----------------------------------------------------------------------

      integer n
      integer iterm

      double precision savf
      double precision sf
      double precision ftol
      double precision fmax
      double precision p01
      double precision fnorm

      dimension savf(n)
      dimension sf(n)

      save
      data p01/1.d-2/
c
      fmax = fnorm
      call snrmf(n, savf, sf, fmax)
      iterm = 0
      if (fmax .le. p01*ftol) iterm = 1
c
      return
c----------------------- end of subroutine nkstp0 ----------------------
      end

c-----------------------------------------------------------------------
      double precision function d1mach (idum)
      integer idum
c-----------------------------------------------------------------------
c this routine computes the unit roundoff of the machine in double
c precision.  this is defined as the smallest positive machine number
c u such that  1.0d0 + u .ne. 1.0d0 (in double precision).
c-----------------------------------------------------------------------

      double precision u
      double precision comp

      u = 1.0d0
 10   u = u*0.5d0
      comp = 1.0d0 + u
      if (comp .ne. 1.0d0) go to 10
      d1mach = u*2.0d0
      return
c----------------------- end of function d1mach ------------------------
      end

      double precision function vnorm (n, v, s)
c-----------------------------------------------------------------------
c this function routine computes the scaled euclidean norm
c of the vector of length n contained in the array v, with scale factors
c contained in the array s of length n..
c   vnorm = dsqrt( sum( v(i)*s(i) )**2 )
c-----------------------------------------------------------------------
      integer n
      integer i

      double precision v
      double precision s
      double precision sum

      dimension v(n)
      dimension s(n)

      sum = 0.0d0
      do 10 i = 1,n
 10     sum = sum + (v(i)*s(i))**2
      vnorm = dsqrt(sum)
      return
c----------------------- end of function vnorm -------------------------
      end

c-----------------------------------------------------------------------
      subroutine model(n, nu, wm, lenwm, iwm, leniwm, u, savf, x, f,
     *                  jac, su, sf, pset, psol)
c-----------------------------------------------------------------------
c this routine interfaces to subroutine solpk for the approximate
c solution of the newton equations in the newton iteration.
c
c on entry
c
c   n      = size of the nonlinear system, and the length of arrays
c            unew, savf, su and sf.
c
c   nu     = length of array u.
c
c   u      = real array containing the previous approximate root.
c
c   savf   = real array containing the n-vector f(u).
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c   wm     = real work space containing data for the krylov algorithm
c            (krylov basis vectors, hessenberg matrix, user-supplied
c            work space for preconditioning, etc.).
c
c   lenwm  = length of work array wm.
c
c   iwm    = integer work space for the krylov algorithm (pivot
c            information for the lu-factorization of the hessenberg
c            matrix, user-supplied work space for preconditioning,
c            etc.).
c
c   leniwm = length of work array iwm.
c
c on return
c
c   x      = the approximate solution vector of the newton equations.
c
c   iersl  = output flag (in common)..
c            iersl = 0 means no trouble occured.
c            iersl = 7 means the krylov solver suffered a breakdown, and
c                      so the solution x is undefined.
c            iersl = 8 means there was a nonrecoverable error in pset
c                      and the nonlinear iteration is halted.
c            iersl = 9 means there was a nonrecoverable error in psol
c                      and the nonlinear iteration is halted.
c
c-----------------------------------------------------------------------
      external f
      external jac
      external pset
      external psol

      integer nu
      integer i
      integer n
      integer lenwm
      integer iwm
      integer leniwm
      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf
      integer ipcur
      integer ier

      double precision wm
      double precision x  
      double precision su
      double precision sf
      double precision savf
      double precision u
      double precision onept5

      dimension wm(lenwm)
      dimension iwm(leniwm)
      dimension x(n)
      dimension u(nu)
      dimension savf(n)
      dimension su(n)
      dimension sf(n)

c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------
      double precision eps, sqteta, rhom
      common /nks001/ eps, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
c-----------------------------------------------------------------------
c     nks003 common block.
c-----------------------------------------------------------------------
      double precision pthrsh
      common /nks003/ pthrsh
c
      save
      data onept5/1.5d0/
c-----------------------------------------------------------------------
c     call user-supplied routine pset to load preconditioner matrix
c     data if threshold achieved.  set ipcur=1 if p is re-evaluated.
c-----------------------------------------------------------------------
 10   continue
      ipcur = 0
      if ( (pthrsh .gt. onept5) .and. (ipflg .ne. 0) ) then
        ier = 0
        call pset (n, nu, u, savf, su, sf, x, f, wm(locwmp), 
     *             iwm(locimp), ier)
        npe = npe + 1
        ipcur = 1
        if (ier .ne. 0) then
          iersl = 8
          return
          endif
        endif
c-----------------------------------------------------------------------
c     load x with -f(u).
c-----------------------------------------------------------------------
      do 100 i = 1,n
 100    x(i) = -savf(i)
c-----------------------------------------------------------------------
c     call solpk to solve j*x = -f using the appropriate krylov
c     algorithm.
c-----------------------------------------------------------------------
      call solpk (n,nu,wm,lenwm,iwm,leniwm,u,savf,x,su,sf,f,jac,psol)
      if (iersl .lt. 0) then
c nonrecoverable error from psol.  set iersl and return.
         iersl = 9
         return
         endif
      if ( (iersl .gt. 0) .and. (ipflg .ne. 0) ) then
        if (ipcur .eq. 0) go to 10
        endif
c iersl=1 on return from solpk means there was a breakdown in the
c krylov iteration.  set iersl=7 to halt iteration and return.
      if (iersl .eq. 1) iersl = 7
c
      return
c----------------------- end of subroutine model -----------------------
      end

c-----------------------------------------------------------------------
      subroutine solpk (n, nu, wm, lenwm, iwm, leniwm, u, savf, x,
     *                   su, sf, f, jac, psol)
      external f
      external jac
      external psol

      integer n
      integer nu
      integer lenwm
      integer iwm
      integer leniwm

      double precision wm
      double precision x
      double precision su
      double precision sf
      double precision savf
      double precision u

      dimension u(nu)
      dimension savf(n)
      dimension su(n)
      dimension sf(n)
      dimension wm(lenwm)
      dimension iwm(leniwm)
      dimension x(n)
c-----------------------------------------------------------------------
c this routine interfaces to either subroutine spiom or spigmr for
c the solution of the linear system j*x = -f arising from a newton
c iteration.
c
c on entry
c
c   n      = size of the nonlinear system, and the length of arrays
c            u, unew, savf, su and sf.
c
c   nu     = length of array u past n.
c
c   u      = real array containing the previous approximate root.
c
c   savf   = real array containing the n-vector f(u).
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c   wm     = real work space containing data for the krylov algorithm
c            (krylov basis vectors, hessenberg matrix, user-supplied
c            work space for preconditioning, etc.).
c
c   lenwm  = length of work array wm.
c
c   iwm    = integer work space for the krylov algorithm (pivot
c            information for the lu-factorization of the hessenberg
c            matrix, user-supplied work space for preconditioning,
c            etc.).
c
c   leniwm = length of work array iwm.
c
c on return
c
c   x      = the approximate solution vector of the newton equations.
c
c   iersl  = output flag (in common)..
c            iersl = 0 means no trouble occured.
c            iersl = 1 means the krylov solver suffered a breakdown, and
c                      so the solution x is undefined.
c            iersl =-1 means there was a nonrecoverable error in psol.
c                      this forces the iteration to halt.
c
c this routine also uses the common variables
c eps, locwmp, locimp, kmp, mmax, methk, nni, nli, npe, nps, ncfn, ncfl
c-----------------------------------------------------------------------
c methods used so far:
c 1. methk = 1      ------> spiom
c 2. methk = 2      ------> gmres
c-----------------------------------------------------------------------

      integer i
      integer ib
      integer iflag
      integer ihes
      integer iwk
      integer npsl
      integer iv
      integer mmaxp1
      integer miom
      integer ihsv
      integer iq
      integer mgmr


      double precision bnrm

c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf


      double precision eps, sqteta, rhom

      common /nks001/ eps, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
      save
c
      iersl = 0
      go to (100,200), methk
c-----------------------------------------------------------------------
c use the spiom algorithm to solve the linear system j*x = -f.
c-----------------------------------------------------------------------
 100  continue
      iv = 3
      ib = iv + n*mmax
      ihes = ib + n
      iwk = ihes + mmax*mmax
      do 110 i = 1,n
 110    wm(i+ib-1) = x(i)
      call spiom (n, nu, u, savf, wm(ib), su, sf, mmax, kmp, eps,
     *   f, jac, psol, npsl, x, wm(iv), wm(ihes), iwm, miom,
     *   wm(locwmp), iwm(locimp), wm(iwk), ipflg, iflag, rhom)
      nni = nni + 1
      nli = nli + miom
      nps = nps + npsl
      if (iflag .ne. 0) ncfl = ncfl + 1
      if (iflag .ge. 2) iersl = 1
      if (iflag .lt. 0) iersl = -1
      return
c-----------------------------------------------------------------------
c use the spigmr algorithm to solve the linear system j*x = -f.
c-----------------------------------------------------------------------
 200  continue
      mmaxp1 = mmax + 1
      iv = 3
      ib = iv + n*mmax
      ihes = ib + n + 1
      ihsv = ihes + mmax*(mmaxp1+1) + 1
      iwk = ihsv + mmax*mmaxp1
      iq = iwk + n
      do 210 i = 1,n
 210     wm(i+ib-1) = x(i)
      call spigmr (n, nu, u, savf, wm(ib), su, sf, mmax, mmaxp1, kmp,
     *   eps, f, jac, psol, npsl, x, wm(iv), wm(ihes), wm(iq),
     *   wm(ihsv), mgmr, wm(locwmp), iwm(locimp), wm(iwk), methn,
     *   bnrm, ipflg, iflag, rhom)
      nni = nni + 1
      nli = nli + mgmr
      nps = nps + npsl
      if (iflag .ne. 0) ncfl = ncfl + 1
      if (iflag .ge. 2) iersl = 1
      if (iflag .lt. 0) iersl = -1
      if (iersl .eq. 0) then
        call dcopy (mmax, wm(ib), 1, wm(ihes), 1)
        wm(1) = bnrm
        iwm(1) = mgmr
        endif
      return
c----------------------- end of subroutine solpk -----------------------
      end

c-----------------------------------------------------------------------
      subroutine spiom (n, nu, u, savf, b, su, sf, mmax, iomp, eps,
     *            f, jac, psol, npsl, x, v, hes, ipvt, miom, wmp,
     *            iwmp, wk, ipflg, iflag, rho)

      external f
      external jac
      external psol

      integer n
      integer nu
      integer mmax
      integer iomp
      integer ipvt
      integer miom
      integer iflag
      integer iprint
      integer iunit
      integer iermsg
      integer npsl
      integer iwmp
      integer ipflg

      double precision u
      double precision savf
      double precision b
      double precision su
      double precision sf
      double precision x
      double precision eps
      double precision v
      double precision hes
      double precision wmp
      double precision wk

      dimension u(nu)
      dimension savf(n)
      dimension b(n)
      dimension su(n)
      dimension sf(n)
      dimension x(n)
      dimension v(n,mmax+1)
      dimension hes(mmax,mmax)
      dimension wmp(1)
      dimension wk(n)
      dimension iwmp(1)
      dimension ipvt(mmax)

c-----------------------------------------------------------------------
c this routine solves the linear system a * x = b using a scaled
c preconditioned version of the incomplete orthogonalization method.
c an initial guess of x = 0 is assumed.
c-----------------------------------------------------------------------
c
c on entry
c
c   n      = problem size, passed to f, jac and psol.  also, the order
c            of the matrix a, and the lengths of the vectors u, savf,
c            b, su, sf, and x.
c
c   nu     = length of array u.
c
c   u      = array containing current approximate solution to f(u) = 0.
c
c   savf   = array containing current value of f(u).
c
c   b      = the right hand side of the system a*x = b.
c            b is also used as work space when computing the
c            final approximation.
c
c   su     = array of length n containing scale factors for u.
c
c   sf     = array of length n containing scale factors for f.
c
c   mmax   = the maximum allowable order of the matrix hes.
c
c   iomp   = the number of previous vectors the new vector vnew
c            must be made orthogonal to.  iomp .le. mmax.
c
c   eps    = tolerance on residuals b-a*x in scaled euclidean norm.
c
c   wmp    = real work array used by preconditioner psol.
c
c   iwmp   = integer work array used by preconditioner psol.
c
c   ipflg  = integer flag
c            ipflg=0 means no preconditioning is done.
c            ipflg.ne.0 means preconditioning is being performed.
c
c on return
c
c   x      = the final computed approximation to the solution
c            of the system a*x = b.
c
c   rho    = the scaled euclidean norm of the residual associated
c            with x.
c
c   v      = the n x (miom+1) array containing the miom
c            orthogonal vectors v(*,1) to v(*,miom).
c
c   hes    = the lu factorization of the miom x miom upper
c            hessenberg matrix whose entries are the
c            scaled inner products of a*v(*,k) and v(*,i).
c
c   ipvt   = an integer array containg pivoting information.  it
c            is loaded in dhefa and used in dhesl.
c
c   miom   = the number of iterations performed, and current
c            order of the upper hessenberg matrix hes.
c
c   npsl   = the number of calls to psol.
c
c   iflag  = integer output flag..
c            iflag=0 means convergence in miom iterations, miom.le.mmax.
c            iflag=1 means the convergence test did not pass in mmax
c                    iterations.
c            iflag=2 means there was a recoverable error in psol
c                    caused by the preconditioner being out of date.
c            iflag=-1 means there was a nonrecoverable error in psol.
c
c note: the x array is used as work space in routines psol and atv.
c-----------------------------------------------------------------------

      integer i
      integer info
      integer j
      integer k
      integer m
      integer mm1
      integer ier

      double precision bnrm
      double precision prod
      double precision rho
      double precision snormw
      double precision dnrm2
      double precision tem

c-----------------------------------------------------------------------
c     nks002 common block.
c-----------------------------------------------------------------------
      common /nks002/ iprint, iunit, iermsg
      save
c
      iflag = 0
      miom = 0
      npsl = 0
c zero out the hes array. ----------------------------------------------
      do 15 j = 1,mmax
        do 10 i = 1,mmax
 10       hes(i,j) = 0.0d0
 15     continue
c-----------------------------------------------------------------------
c the initial residual is the vector b.  apply scaling to b, and
c then normalize as v(*,1).
c-----------------------------------------------------------------------
      do 20 i = 1,n
 20     v(i,1) = b(i)*sf(i)
      bnrm = dnrm2(n, v, 1)
      tem = 1.0d0/bnrm
      call dscal (n, tem, v(1,1), 1)
c-----------------------------------------------------------------------
c main loop to compute the vectors v(*,2) to v(*,mmax).
c the running product prod is needed for the convergence test.
c-----------------------------------------------------------------------
      prod = 1.0d0
      if (iprint .gt. 1) write(iunit,30)
 30   format(' ------ in routine spiom ------ ')
      do 90 m = 1,mmax
        miom = m
c-----------------------------------------------------------------------
c call routine atv to compute vnew = a*v(m).
c call routine orthog to orthogonalize the new vector vnew = v(*,m+1).
c call routine dhefa to update the factors of hes.
c-----------------------------------------------------------------------
        call atv (n, nu, u, savf, v(1,m), su, sf, x, f, jac, psol,
     *            v(1,m+1), wk, wmp, iwmp, ier, npsl)
        if (ier .ne. 0) go to 300
        call orthog (v(1,m+1),v,hes,n,m,mmax,mmax,iomp,snormw)
        call dhefa (hes, mmax, m, ipvt, info, m)
        mm1 = m - 1
        if (m .gt. 1 .and. ipvt(mm1) .eq. mm1) prod = prod*hes(m,mm1)
        if (info .ne. m) go to 50
c-----------------------------------------------------------------------
c the last pivot in hes was found to be zero.
c if vnew = 0 or l = mmax, take an error return with iflag = 2.
c otherwise, continue the iteration without a convergence test.
c-----------------------------------------------------------------------
        if (snormw .eq. 0.0d0) go to 120
        if (m .eq. mmax) go to 120
        go to 60
c-----------------------------------------------------------------------
c update rho, the estimate of the norm of the residual b - a*x(m).
c test for convergence.  if passed, compute approximation x(m).
c if failed and m .lt. mmax, then continue iterating.
c-----------------------------------------------------------------------
 50     continue
        rho = bnrm*snormw*dabs(prod/hes(m,m))
        if (iprint .gt. 1) write(iunit,55) m, rho, eps
 55     format(' m , res, eps ',i4,e25.16,1x,e25.16)
        if (rho .le. eps) go to 200
        if (m .eq. mmax) go to 150
c if l .lt. mmax, store hes(m+1,m) and normalize the vector v(*,m+1).
 60     continue
        hes(m+1,m) = snormw
        tem = 1.0d0/snormw
        call dscal (n, tem, v(1,m+1), 1)
 90     continue
c-----------------------------------------------------------------------
c l has reached mmax without passing the convergence test..
c compute a solution anyway and return with iflag = 1.
c otherwise, return with iflag = 2.
c-----------------------------------------------------------------------
 120  continue
      iflag = 2
      return
 150  iflag = 1
c-----------------------------------------------------------------------
c compute the approximation x(m) to the solution.
c since the vector x was used as work space in routine orthog,
c and since initial guess of the newton correction is zero, x must be
c set back to zero.
c-----------------------------------------------------------------------
 200  continue
      m = miom
      do 210 k = 1,m
 210    b(k) = 0.0d0
      b(1) = bnrm
      call dhesl(hes,mmax,m,ipvt,b)
      do 220 k = 1,n
 220    x(k) = 0.0d0
      do 230 i = 1,m
        call daxpy (n, b(i), v(1,i), 1, x, 1)
 230    continue
      do 240 i = 1,n
 240    x(i) = x(i)/su(i)
      if (ipflg .eq. 1) then
        ier = 0
        call psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier)
        npsl = npsl + 1
        if (ier .ne. 0) go to 300
        endif
      return
c-----------------------------------------------------------------------
c this block handles error returns forced by routine psol.
c-----------------------------------------------------------------------
 300  continue
      if (ier .lt. 0) iflag = -1
      if (ier .gt. 0) iflag = 3
      return
c----------------------- end of subroutine spiom -----------------------
      end

c----------------------- end of subroutine spiom -----------------------
      subroutine atv (n, nu, u, savf, v, su, sf, ftem, f, jac, psol,
     *                 z, vtem, wmp, iwmp, ier, npsl)
      external f
      external jac
      external psol

      integer n
      integer nu
      integer iwmp
      integer ier
      integer npsl
      integer i

      double precision u
      double precision savf
      double precision v
      double precision su
      double precision sf
      double precision ftem
      double precision vtem
      double precision z
      double precision wmp

      dimension u(nu)
      dimension savf(n)
      dimension v(n)
      dimension su(n)
      dimension sf(n)
      dimension ftem(n)
      dimension vtem(n)
      dimension z(n)
      dimension iwmp(1)
      dimension wmp(1)

c-----------------------------------------------------------------------
c this routine computes the product
c
c        sf*j*(p-inverse)*(su-inverse*v),
c
c where su and sf are diagonal scaling matrices, j is the jacobian
c matrix, and p is a preconditioning matrix.  the product is stored
c in z.  this is computed either by a call to jac and psol, or by a
c difference quotient, which involves calls to f and psol.
c-----------------------------------------------------------------------
c
c on entry
c
c   n      = problem size, passed to f, jac and psol.
c
c   nu     = length of array u.
c
c   u      = array containing current approximate to a solution of
c            f(u) = 0.
c
c   savf   = array containing current value of f(u).
c
c   v      = real array of length n.
c
c   ftem   = work array of length n.
c
c   vtem   = work array of length n used to store the unscaled
c            version of v.
c
c   su     = array of length n containing scale factors for u.
c
c   sf     = array of length n containing scale factors for f.
c
c   wmp    = real work array used by preconditioner psol.
c
c   iwmp   = integer work array used by preconditioner psol.
c
c on return
c
c   z      = array of length n containing desired scaled
c            matrix-vector product.
c
c in addition, this routine uses the common variables
c mfdif, nfe, and nje.
c-----------------------------------------------------------------------
c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------

      double precision delt
      double precision sqteta
      double precision rhom

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf

      common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
c
      double precision sigma,tmp,utv,vtv,suitv,ddot,zero
      save
      data zero/0.0d0/
c
      if (mfdif .eq. 2) go to 200
c-----------------------------------------------------------------------
c mfdif = 1.  call user-supplied routine for computing j*v.
c-----------------------------------------------------------------------
      do 110 i = 1,n
 110    vtem(i) = v(i)/su(i)
      if (ipflg .eq. 1) then
        ier = 0
        call psol (n, nu, u, savf, f, jac, z, wmp, iwmp, vtem, ier)
        npsl = npsl + 1
        if (ier .ne. 0) return
        endif
      call jac (n, nu, u, savf, vtem, z)
      nje = nje + 1
      do 120 i = 1,n
 120    z(i) = z(i)*sf(i)
      return
c-----------------------------------------------------------------------
c mfdif = 2.  internally generated vector j*v.
c-----------------------------------------------------------------------
 200  continue
c set vtem = (su-inverse) * v.
      do 210 i = 1,n
        vtem(i) = v(i)/su(i)
 210    continue
      if (ipflg .eq. 0) then
c ipflg = 0.  save u in z and increment u by sigma*vtem.
          do 215 i = 1,n
 215        z(i) = u(i)*su(i)
          utv = ddot(n, z, 1, v, 1)
          suitv = zero
          do 220 i = 1,n
 220        suitv = suitv + dabs(v(i))
c vtv = 1 in this case.
          sigma = sqteta*dmax1(dabs(utv),suitv)
          sigma = dsign(sigma,utv)
          do 230 i = 1,n
 230        z(i) = u(i)
          do 240 i = 1,n
 240        u(i) = z(i) + sigma*vtem(i)
        else
c ipflg = 1. apply inverse of right preconditioner
          ier = 0
          call psol (n, nu, u, savf, f, jac, ftem, wmp, iwmp, vtem, ier)
          npsl = npsl + 1
          if (ier .ne. 0) return
          vtv = zero
          suitv = zero
          do 260 i = 1,n
            tmp = vtem(i)*su(i)
            z(i) = tmp*su(i)
            vtv = vtv + tmp*tmp
 260        suitv = suitv + dabs(tmp)
          utv = ddot(n, u, 1, z, 1)
          sigma = sqteta*dmax1(dabs(utv),suitv)/vtv
          sigma = dsign(sigma,utv)
          do 270 i = 1,n
 270        z(i) = u(i)
          do 280 i = 1,n
 280        u(i) = z(i) + sigma*vtem(i)
        endif
      call f(n, nu, u, ftem)
      nfe = nfe + 1
      do 290 i = 1,n
 290    u(i) = z(i)
      do 300 i = 1,n
 300    z(i) = (ftem(i) - savf(i))/sigma
      do 310 i = 1,n
 310    z(i) = z(i)*sf(i)
      return
c----------------------- end of subroutine atv -------------------------
      end

c-----------------------------------------------------------------------
      subroutine orthog (vnew, v, hes, n, m, mmax, ldhes, iomp, snormw)

      integer n
      integer m
      integer mmax
      integer iomp
      integer ldhes

      double precision vnew
      double precision v
      double precision hes
      double precision snormw

      dimension vnew(n)
      dimension v(n,mmax)
      dimension hes(ldhes,mmax)
c-----------------------------------------------------------------------
c this routine orthogonalizes the vector vnew against the previous
c iomp vectors in the v array.  it uses a modified gram-schmidt
c orthogonalization procedure with conditional reorthogonalization.
c this is the version of 25 september 1985.
c-----------------------------------------------------------------------
c
c   npsl   = the number of calls to psol.
c on entry
c
c   vnew   = the vector of length n containing a scaled product
c            of the jacobian and the vector v(*,m).
c
c   v      = the n x m array containing the previous m
c            orthogonal vectors v(*,1) to v(*,m).
c
c   hes    = an m x m upper hessenberg matrix containing,
c            in hes(i,k), k.lt.m, scaled inner products of
c            a*v(*,k) and v(*,i).
c
c   ldhes  = the leading dimension of the hes array.
c
c   n      = the order of the matrix a, and the length of vnew.
c
c   m      = the current order of the matrix hes.
c
c   mmax   = the maximum allowable order of the matrix hes.
c
c   iomp   = the number of previous vectors the new vector vnew
c            must be made orthogonal to (iomp .le. mmax).
c
c on return
c
c   vnew   = the new vector orthogonal to v(*,1) to v(*,m).
c
c   hes    = upper hessenberg matrix with column m filled in with
c            scaled inner products of a*v(*,m) and v(*,i).
c
c   snormw = l-2 norm of vnew.
c
c-----------------------------------------------------------------------
      integer i, i0
      double precision arg, ddot, dnrm2, sumdsq, tem, vnrm
      save
c get norm of unaltered vnew for later use. ----------------------------
      vnrm = dnrm2 (n, vnew, 1)
c-----------------------------------------------------------------------
c do modified gram-schmidt on vnew = a*v(m).
c scaled inner products give new column of hes.
c projections of earlier vectors are subtracted from vnew.
c-----------------------------------------------------------------------
      i0 = max0(1,m-iomp+1)
      do 10 i = i0,m
        hes(i,m) = ddot(n, v(1,i), 1, vnew, 1)
        tem = -hes(i,m)
        call daxpy (n, tem, v(1,i), 1, vnew, 1)
 10     continue
c-----------------------------------------------------------------------
c compute snormw = norm of vnew.
c if vnew is small compared to its input value (in norm), then
c reorthogonalize vnew to v(*,1) through v(*,m).
c correct if relative correction exceeds 1000*(unit roundoff).
c finally, correct snormw using the dot products involved.
c-----------------------------------------------------------------------
      snormw = dnrm2 (n, vnew, 1)
      if (vnrm + 0.001d0*snormw .ne. vnrm) return
      sumdsq = 0.0d0
      do 30 i = i0,m
        tem = -ddot(n, v(1,i), 1, vnew, 1)
        if (hes(i,m) + 0.001d0*tem .eq. hes(i,m)) go to 30
        hes(i,m) = hes(i,m) - tem
        call daxpy (n, tem, v(1,i), 1, vnew, 1)
        sumdsq = sumdsq + tem**2
 30     continue
      if (sumdsq .eq. 0.0d0) return
      arg = dmax1(0.0d0,snormw**2 - sumdsq)
      snormw = dsqrt(arg)
c
      return
c----------------------- end of subroutine orthog ----------------------
      end
      subroutine dhefa (a, lda, n, ipvt, info, job)
      integer lda, n, ipvt(n), info, job
      double precision a(lda,n)
c-----------------------------------------------------------------------
c     this routine is a modification of the linpack routine sgefa and
c     performs an lu decomposition of an upper hessenberg matrix a.
c     there are two options available..
c
c          (1)  performing a fresh factorization
c          (2)  updating the lu factors by adding a row and a
c               column to the matrix a.
c-----------------------------------------------------------------------
c     dhefa factors an upper hessenberg matrix by elimination.
c
c     on entry
c
c        a       real(lda, n)
c                the matrix to be factored.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                the order of the matrix  a .
c
c        job    integer
c                job = 1    means that a fresh factorization of the
c                           matrix a is desired.
c                job .ge. 2 means that the current factorization of a
c                           will be updated by the addition of a row
c                           and a column.
c     on return
c
c        a       an upper triangular matrix and the multipliers
c                which were used to obtain it.
c                the factorization can be written  a = l*u  where
c                l  is a product of permutation and unit lower
c                triangular matrices and  u  is upper triangular.
c
c        ipvt    integer(n)
c                an integer vector of pivot indices.
c
c        info    integer
c                = 0  normal value.
c                = k  if  u(k,k) .eq. 0.0 .  this is not an error
c                     condition for this subroutine, but it does
c                     indicate that dhesl will divide by zero if called.
c
c     modification of linpack. this version dated 07/20/83 .
c     peter brown, university of houston, lawrence livermore  lab.
c
c     subroutines and functions
c
c     blas daxpy,idamax
c
c-----------------------------------------------------------------------
c     internal variables
c
      double precision t
      integer idamax, j, k, km1, kp1, l, nm1
      save
c
      if (job .gt. 1) go to 80
c-----------------------------------------------------------------------
c a new facorization is desired.  this is essentially the linpack
c code with the exception that we know there is only one nonzero
c element below the main diagonal.
c-----------------------------------------------------------------------
c
c     gaussian elimination with partial pivoting
c
      info = 0
      nm1 = n - 1
      if (nm1 .lt. 1) go to 70
      do 60 k = 1, nm1
         kp1 = k + 1
c
c        find l = pivot index
c
         l = idamax(2,a(k,k),1) + k - 1
         ipvt(k) = l
c
c        zero pivot implies this column already triangularized
c
         if (a(l,k) .eq. 0.0d0) go to 40
c
c           interchange if necessary
c
            if (l .eq. k) go to 10
               t = a(l,k)
               a(l,k) = a(k,k)
               a(k,k) = t
   10       continue
c
c           compute multipliers
c
            t = -1.0d0/a(k,k)
            a(k+1,k) = a(k+1,k)*t
c
c           row elimination with column indexing
c
            do 30 j = kp1, n
               t = a(l,j)
               if (l .eq. k) go to 20
                  a(l,j) = a(k,j)
                  a(k,j) = t
   20          continue
               call daxpy(n-k,t,a(k+1,k),1,a(k+1,j),1)
   30       continue
         go to 50
   40    continue
            info = k
   50    continue
   60 continue
   70 continue
      ipvt(n) = n
      if (a(n,n) .eq. 0.0d0) info = n
      return
c-----------------------------------------------------------------------
c the old factorization of a will be updated.  a row and a column
c has been added to the matrix a.
c n-1 is now the old order of the matrix.
c-----------------------------------------------------------------------
  80  continue
      nm1 = n - 1
c-----------------------------------------------------------------------
c perform row interchanges on the elements of the new column, and
c perform elimination operations on the elements using the multipliers.
c-----------------------------------------------------------------------
      if (nm1 .le. 1) go to 105
      do 100 k = 2,nm1
        km1 = k - 1
        l = ipvt(km1)
        t = a(l,n)
        if (l .eq. km1) go to 90
          a(l,n) = a(km1,n)
          a(km1,n) = t
  90    continue
        a(k,n) = a(k,n) + a(k,km1)*t
 100    continue
 105  continue
c-----------------------------------------------------------------------
c complete update of factorization by decomposing last 2x2 block.
c-----------------------------------------------------------------------
      info = 0
c
c        find l = pivot index
c
         l = idamax(2,a(nm1,nm1),1) + nm1 - 1
         ipvt(nm1) = l
c
c        zero pivot implies this column already triangularized
c
         if (a(l,nm1) .eq. 0.0d0) go to 140
c
c           interchange if necessary
c
            if (l .eq. nm1) go to 110
               t = a(l,nm1)
               a(l,nm1) = a(nm1,nm1)
               a(nm1,nm1) = t
  110       continue
c
c           compute multipliers
c
            t = -1.0d0/a(nm1,nm1)
            a(n,nm1) = a(n,nm1)*t
c
c           row elimination with column indexing
c
               t = a(l,n)
               if (l .eq. nm1) go to 120
                  a(l,n) = a(nm1,n)
                  a(nm1,n) = t
  120          continue
               a(n,n) = a(n,n) + t*a(n,nm1)
         go to 150
  140    continue
            info = nm1
  150    continue
      ipvt(n) = n
      if (a(n,n) .eq. 0.0d0) info = n
      return
c----------------------- end of subroutine dhefa -----------------------
      end
      subroutine dhesl(a,lda,n,ipvt,b)
      integer lda, n, ipvt(n)
      double precision a(lda,n), b(n)
c-----------------------------------------------------------------------
c this is essentially the linpack routine sgesl except for changes
c due to the fact that a is an upper hessenberg matrix.
c-----------------------------------------------------------------------
c     dhesl solves the real system a * x = b
c     using the factors computed by dhefa.
c
c     on entry
c
c        a       real(lda, n)
c                the output from dhefa.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                the order of the matrix  a .
c
c        ipvt    integer(n)
c                the pivot vector from dhefa.
c
c        b       real(n)
c                the right hand side vector.
c
c
c     on return
c
c        b       the solution vector  x .
c
c
c     modification of linpack. this version dated 07/20/83 .
c     peter brown, university of houston, lawrence livermore  lab.
c
c     subroutines and functions
c
c     blas daxpy
c-----------------------------------------------------------------------
c     internal variables
c
      double precision t
      integer k,kb,l,nm1
      save
c
      nm1 = n - 1
c
c         solve  a * x = b
c        first solve  l*y = b
c
         if (nm1 .lt. 1) go to 30
         do 20 k = 1, nm1
            l = ipvt(k)
            t = b(l)
            if (l .eq. k) go to 10
               b(l) = b(k)
               b(k) = t
   10       continue
            b(k+1) = b(k+1) + t*a(k+1,k)
   20    continue
   30    continue
c
c        now solve  u*x = y
c
         do 40 kb = 1, n
            k = n + 1 - kb
            b(k) = b(k)/a(k,k)
            t = -b(k)
            call daxpy(k-1,t,a(1,k),1,b(1),1)
   40    continue
      return
c----------------------- end of subroutine dhesl -----------------------
      end
      subroutine spigmr (n, nu, u, savf, b, su, sf, mmax, mmaxp1, igmp,
     *                   eps, f, jac, psol, npsl, x, v, hes, q, hessv,
     *                   mgmr, wmp, iwmp, wk, methn, bnrm, ipflg, iflag,
     *                   rho)
      external f
      external jac
      external psol
c-----------------------------------------------------------------------
c this routine solves the linear system a * x = b using a scaled
c preconditioned version of the generalized minimum residual method.
c-----------------------------------------------------------------------
c
c on entry
c
c   n      = problem size, passed to f, jac and psol.  also, the order
c            of the matrix a, and the lengths of the vectors savf,
c            b, su, sf, and x.
c
c   nu     = length of array u.
c
c   u      = array containing current approximate solution to
c            f(u) = 0.
c
c   savf   = array containing current value of f(u).
c
c   b      = the right hand side of the system a*x = b.
c            b is also used as work space when computing the
c            final approximation.
c
c   su     = array of length n containing scale factors for u.
c
c   sf     = array of length n containing scale factors for f.
c
c   x      = the initial guess of the solution of the system
c            a*x = b.
c
c   eps    = tolerance on residuals b-a*x in scaled euclidean norm.
c
c   mmax   = the maximum allowable order of the matrix h.
c
c   igmp   = the number of previous vectors the new vector vnew
c            must be made orthogonal to.  igmp .le. mmax.
c
c   wmp    = real work array used by preconditioner psol.
c
c   iwmp   = integer work array used by preconditioner psol.
c
c   methn  = integer flag indicating which global strategy is
c            being used (=1 dogleg, =2 linesearch backtrack).
c            when methn=1, the x array is computed in dogstp, and
c            only the ygm value is returned in b.  otherwise, x
c            contains the gmres solution.
c
c   ipflg  = integer flag
c            ipflg=0 means no preconditioning is done.
c            ipflg.ne.0 means preconditioning is being performed.
c
c on return
c
c   x      = the final computed approximation to the solution
c            of the system a*x = b (when methn = 2).
c
c   rho    = the scaled euclidean norm of the residual associated
c            with x.
c
c   b      = the ygm value (when methn = 1).
c
c   bnrm   = the scaled euclidean norm of the right hand
c            side in a*x = b.
c
c   mgmr   = the number of iterations performed and
c            the current order of the upper hessenberg
c            matrix hes.
c
c   npsl   = the number of calls to psol.
c
c   v      = the n x (mgmr+1) array containing the mgmr
c            orthogonal vectors v(*,1) to v(*,mgmr).
c
c   hes    = the upper triangular factor of the qr decomposition
c            of the (mgmr+1) x mgmr upper hessenberg matrix whose
c            entries are the scaled inner-products of a*v(*,i)
c            and v(*,k).
c
c   hessv  = the (mgmr+1) x mgmr upper hessenberg matrix whose
c            entries are the scaled inner-products of a*v(*,i)
c            and v(*,k).
c
c   q      = a real array containing the components of the givens
c            rotations used in the qr decomposition of hes.  it
c            is loaded in dheqr and used in dhels.
c
c   iflag  = integer output flag..
c            iflag=0 means convergence in mgmr iterations, mgmr.le.mmax.
c            iflag=1 means the convergence test did not pass in mmax
c                    iterations.
c            iflag=2 means there was a recoverable error in psol
c                    caused by the preconditioner being out of date.
c            iflag=-1 means there was a nonrecoverable error in psol.
c
c note: the x array is used as work space in routines psol and atv.
c-----------------------------------------------------------------------

      double precision v
      double precision hes
      double precision hessv
      double precision su
      double precision sf
      double precision q
      double precision b
      double precision savf
      double precision x
      double precision u
      double precision wk
      double precision wmp
      double precision dnrm2

      integer n
      integer nu
      integer iwmp
      integer mmax
      integer mmaxp1
      integer iprint
      integer iunit
      integer iermsg
      integer ier
      integer npsl
      integer methn
      integer ipflg
      integer j
      integer i
      integer iflag
      integer info
      integer igmp
      integer k
      integer mgmr
      integer m
      integer mp1


      dimension v(n,mmaxp1)
      dimension hes(mmaxp1,mmax)
      dimension hessv(mmaxp1,mmax)
      dimension su(n)
      dimension sf(n)
      dimension q(2*mmax)
      dimension b(n)
      dimension savf(n)
      dimension x(n)
      dimension u(nu)
      dimension wmp(1)
      dimension wk(n)
      dimension iwmp(1)

      double precision bnrm
      double precision eps
      double precision prod
      double precision rho
      double precision snormw
      double precision tem

c-----------------------------------------------------------------------
c     nks002 common block.
c-----------------------------------------------------------------------
      common /nks002/ iprint, iunit, iermsg
      save
c
      iflag = 0
      mgmr = 0
      npsl = 0
c zero out the hes and hessv arrays ------------------------------------
      do 15 j = 1,mmax
        do 10 i = 1,mmaxp1
          hes(i,j) = 0.0d0
          hessv(i,j) = 0.0d0
 10       continue
 15     continue
c-----------------------------------------------------------------------
c the initial residual is the vector b.  apply scaling to b, and
c then normalize as v(*,1).
c-----------------------------------------------------------------------
      do 20 i = 1,n
 20     v(i,1) = b(i)*sf(i)
      bnrm = dnrm2(n, v, 1)
      tem = 1.0d0/bnrm
      call dscal (n, tem, v(1,1), 1)
c-----------------------------------------------------------------------
c main loop to compute the vectors v(*,2) to v(*,mmax).
c the running product prod is needed for the convergence test.
c-----------------------------------------------------------------------
      prod = 1.0d0
      if (iprint .gt. 1) write(iunit,30)
 30   format(' ------ in routine spigmr ------')
      do 100 m = 1,mmax
        mgmr = m
c-----------------------------------------------------------------------
c call routine atv to compute vnew = a*v(m).
c call routine orthog to orthogonalize the new vector vnew = v(*,m+1).
c call routine dheqr to update the factors of hes.
c-----------------------------------------------------------------------
        call atv (n, nu, u, savf, v(1,m), su, sf, x, f, jac, psol,
     *            v(1,m+1), wk, wmp, iwmp, ier, npsl)
        if (ier .ne. 0) go to 300
        call orthog (v(1,m+1),v,hes,n,m,mmax,mmaxp1,igmp,snormw)
        hes(m+1,m) = snormw
        hessv(m+1,m) = snormw
        do 60 i = 1,mgmr
          hessv(i,m) = hes(i,m)
 60       continue
        call dheqr(hes,mmaxp1,m,q,info,m)
        if (info .eq. m) go to 105
c-----------------------------------------------------------------------
c update rho, the estimate of the norm of the residual b - a*xl.
c-----------------------------------------------------------------------
        prod = prod*q(2*m)
        rho = dabs(prod*bnrm)
c-----------------------------------------------------------------------
c test for convergence.  if passed, compute approximation xl.
c if failed and m .lt. mmax continue iterating.
c-----------------------------------------------------------------------
        if (iprint .gt. 1) write(iunit,65) m, rho, eps
 65     format(' m , res, eps ',i4,e25.16,1x,e25.16)
        if (rho .le. eps) go to 200
        if (m .eq. mmax) go to 110
c-----------------------------------------------------------------------
c rescale so that the norm of v(1,m+1) is one.
c-----------------------------------------------------------------------
        tem = 1.0d0/snormw
        call dscal (n, tem, v(1,m+1), 1)
 100    continue
 105  continue
      iflag = 2
      return
 110  iflag = 1
c-----------------------------------------------------------------------
c compute the approximation xl to the solution.
c since the vector x was used as work space in routine orthog and since
c the initial guess of the newton correction is zero, x must be set
c back to zero.
c-----------------------------------------------------------------------
 200  continue
      m = mgmr
      mp1 = m + 1
      do 210 k = 1,mp1
 210    b(k) = 0.0d0
      b(1) = bnrm
      call dhels(hes,mmaxp1,m,q,b)
      if (methn .eq. 2) then
        do 220 k = 1,n
 220      x(k) = 0.0d0
        do 230 i = 1,m
          call daxpy(n,b(i),v(1,i),1,x,1)
 230      continue
        do 240 i = 1,n
 240      x(i) = x(i)/su(i)
        if (ipflg .eq. 1) then
          ier = 0
          call psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier)
          npsl = npsl + 1
          if (ier .ne. 0) go to 300
          endif
        endif
      return
c-----------------------------------------------------------------------
c this block handles error returns forced by routine psol.
c-----------------------------------------------------------------------
 300  continue
      if (ier .lt. 0) iflag = -1
      if (ier .gt. 0) iflag = 3
c
      return
c----------------------- end of subroutine spigmr ----------------------
      end

c-----------------------------------------------------------------------
      subroutine dheqr(a, lda, n, q, info, ijob)
c-----------------------------------------------------------------------
c     this routine performs a qr decomposition of an upper
c     hessenberg matrix a.  there are two options available:
c
c          (1)  performing a fresh decomposition
c          (2)  updating the qr factors by adding a row and a
c               column to the matrix a.
c-----------------------------------------------------------------------
c     dheqr decomposes an upper hessenberg matrix by using givens
c     rotations.
c
c     on entry
c
c        a       real(lda, n)
c                the matrix to be decomposed.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                a is an (n+1) by n hessenberg matrix.
c
c        ijob    integer
c                = 1     means that a fresh decomposition of the
c                        matrix a is desired.
c                .ge. 2  means that the current decomposition of a
c                        will be updated by the addition of a row
c                        and a column.
c     on return
c
c        a       the upper triangular matrix r.
c                the factorization can be written q*a = r, where
c                q is a product of givens rotations and r is upper
c                triangular.
c
c        q       real(2*n)
c                the factors c and s of each givens rotation used
c                in decomposing a.
c
c        info    integer
c                = 0  normal value.
c                = k  if  a(k,k) .eq. 0.0 .  this is not an error
c                     condition for this subroutine, but it does
c                     indicate that dhels will divide by zero
c                     if called.
c
c     this version dated 1/13/86.
c     peter brown, university of houston, lawrence livermore  lab.
c
c     subroutines and functions
c
c     dabs, dsqrt
c
c-----------------------------------------------------------------------
      integer lda
      integer n
      integer info
      integer ijob
      double precision a(lda,n)
      double precision q(2*n)
      double precision c
      double precision s
      double precision t
      double precision t1
      double precision t2
      integer i
      integer j
      integer k
      integer kp1
      integer iq
      integer nm1
      integer km1
      save
c
      if (ijob .gt. 1) go to 70
c-----------------------------------------------------------------------
c a new facorization is desired.
c-----------------------------------------------------------------------
c
c     qr decomposition without pivoting
c
      info = 0
      do 60 k = 1, n
         km1 = k - 1
         kp1 = k + 1
c
c           compute kth column of r.
c           first, multiply the kth column of a by the previous
c           k-1 givens rotations.
c
            if (km1 .lt. 1) go to 20
            do 10 j = 1, km1
              i = 2*(j-1) + 1
              t1 = a(j,k)
              t2 = a(j+1,k)
              c = q(i)
              s = q(i+1)
              a(j,k) = c*t1 - s*t2
              a(j+1,k) = s*t1 + c*t2
   10         continue
c
c           compute givens components c and s
c
   20       continue
            iq = 2*km1 + 1
            t1 = a(k,k)
            t2 = a(kp1,k)
            if (t2 .ne. 0.0d0) go to 30
              c = 1.0d0
              s = 0.0d0
              go to 50
   30       continue
            if (dabs(t2) .lt. dabs(t1)) go to 40
              t = t1/t2
              s = -1.0d0/dsqrt(1.0d0+t*t)
              c = -s*t
              go to 50
   40       continue
              t = t2/t1
              c = 1.0d0/dsqrt(1.0d0+t*t)
              s = -c*t
   50       continue
            q(iq) = c
            q(iq+1) = s
            a(k,k) = c*t1 - s*t2
            if (a(k,k) .eq. 0.0d0) info = k
   60 continue
      return
c-----------------------------------------------------------------------
c the old factorization of a will be updated.  a row and a column
c has been added to the matrix a.
c n by n-1 is now the old size of the matrix.
c-----------------------------------------------------------------------
  70  continue
      nm1 = n - 1
c-----------------------------------------------------------------------
c multiply the new column by the n previous givens rotations.
c-----------------------------------------------------------------------
      do 100 k = 1,nm1
        i = 2*(k-1) + 1
        t1 = a(k,n)
        t2 = a(k+1,n)
        c = q(i)
        s = q(i+1)
        a(k,n) = c*t1 - s*t2
        a(k+1,n) = s*t1 + c*t2
 100    continue
c-----------------------------------------------------------------------
c complete update of decomposition by forming last givens rotation,
c and multiplying it times the column vector (a(n,n),a(np1,n)).
c-----------------------------------------------------------------------
      info = 0
      t1 = a(n,n)
      t2 = a(n+1,n)
      if (t2 .ne. 0.0d0) go to 110
        c = 1.0d0
        s = 0.0d0
        go to 130
 110  continue
      if (dabs(t2) .lt. dabs(t1)) go to 120
        t = t1/t2
        s = -1.0d0/dsqrt(1.0d0+t*t)
        c = -s*t
        go to 130
 120  continue
        t = t2/t1
        c = 1.0d0/dsqrt(1.0d0+t*t)
        s = -c*t
 130  continue
      iq = 2*n - 1
      q(iq) = c
      q(iq+1) = s
      a(n,n) = c*t1 - s*t2
      if (a(n,n) .eq. 0.0d0) info = n
      return
c----------------------- end of subroutine dheqr -----------------------
      end

      subroutine dhels(a, lda, n, q, b)
      integer lda
      integer n
      double precision a(lda,n)
      double precision b(n)
      double precision q(2*n)
c-----------------------------------------------------------------------
c this is part of the linpack routine sgesl with changes
c due to the fact that a is an upper hessenberg matrix.
c-----------------------------------------------------------------------
c     dhels solves the least squares problem
c
c           min (b-a*x,b-a*x)
c
c     using the factors computed by dheqr.
c
c     on entry
c
c        a       real(lda, n)
c                the output from dheqr which contains the upper
c                triangular factor r in the qr decomposition of a.
c
c        q       real(2*n)
c                the coefficients of the n givens rotations
c                used in the qr factorization of a.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                a is originally an (n+1) by n matrix.
c
c        b       real(n)
c                the right hand side vector.
c
c
c     on return
c
c        b       the solution vector  x .
c
c
c     modification of linpack. this version dated 1/13/86.
c     peter brown, university of houston, lawrence livermore  lab.
c
c     subroutines and functions
c
c     blas daxpy
c-----------------------------------------------------------------------
c
c     internal variables
c
      double precision c
      double precision s
      double precision t
      double precision t1
      double precision t2
      integer k
      integer kb
      integer kp1
      integer iq

      save
c
c         minimize (b-a*x,b-a*x)
c        first form q*b.
c
c          do 20 k = 1, n

c modified - jm

         do 20 k = 1, n-1
            kp1 = k + 1
            iq = 2*(k-1) + 1
            c = q(iq)
            s = q(iq+1)
            t1 = b(k)
            t2 = b(kp1)
            b(k) = c*t1 - s*t2
            b(kp1) = s*t1 + c*t2
   20    continue
c
c        now solve  r*x = q*b.
c
         do 40 kb = 1, n
            k = n + 1 - kb
            b(k) = b(k)/a(k,k)
            t = -b(k)
            call daxpy(k-1,t,a(1,k),1,b(1),1)
   40    continue
      return
c----------------------- end of subroutine dhels -----------------------
      end
      subroutine dogdrv (n, nu, wm, lenwm, iwm, leniwm, u, savf, f1nrm,
     *                   x, su, sf, stepmx, stptol, tau, iret, uprev,
     *                   fprev, unew, f1new, mxtkn, f, jac, psol)
c-----------------------------------------------------------------------
c this is the real version of subroutine dogdrv, which is the driver for
c the dogleg step.  its purpose is to find a unew on the dogleg curve
c such that f(unew) .le. f(u) + alpha*gt(unew-u) (alpha=1.d-4 used),
c and scaled steplength = tau, starting with the input tau but
c increasing or decreasing tau if necessary.  also, it produces the
c starting trust region size tau for the next iteration.
c
c on entry
c
c   n      = size of the nonlinear system, and the length of arrays
c            unew, savf, su and sf.
c
c   nu     = length of u array.
c
c   wm     = real array of length lenwm used as work space in the
c            spigmr routine.  it contains the basis vectors v(i)
c            as well as the hessenberg matrix and its qr factors.
c
c   lenwm  = length of wm array.
c
c   iwm    = integer array of length leniwm used as work space.
c            it contains the current size m of the krylov subspace
c            (in iwm(1)), and the integer work space used by the
c            psol routine.
c
c   leniwm = length of iwm array.
c
c   u      = real array containing the previous approximate root.
c
c   f1nrm  = real scalar containing 0.5*norm(sf*f(u))**2, where
c            norm() denotes the euclidean norm.
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c   stepmx = real scalar containing the maximum allowable stepsize
c            length.
c
c   stptol = user-supplied tolerance on the minimum step length
c            s = unew-u.
c
c   tau    = the current size of the trust region.  it is the trust
c            region size tried at the beginning of the dogleg step.
c
c   uprev  = real array of length n used to hold a previous value
c            of u in the dogleg step.
c
c   fprev  = real array of length n used to hold f(uprev).
c
c
c on return
c
c   unew   = real array containing the current approximate root.
c
c   savf   = real array containing the n-vector f(unew).
c
c   f1new  = real scalar containing 0.5*norm(sf*f(unew))**2, where
c            norm() denotes the euclidean norm.
c
c   x      = real array of length n containing the step computed
c            by the dogleg strategy.
c
c   mxtkn  = logical flag indicating if the step just taken was of
c            maximum length stepmx.
c
c   iret   = output flag.
c            iret=0 means that a satisfactory unew was found.
c            iret=1 means that the routine failed to find a unew
c                   that was sufficiently distinct from u.  this
c                   failure causes the nonlinear iteration to halt.
c
c-----------------------------------------------------------------------

      external f
      external jac
      external psol

      integer n
      integer nu
      integer lenwm
      integer iwm
      integer leniwm
      integer iret
      integer np1
      integer mmaxp1
      integer m
      integer mp1
      integer iv
      integer iwk
      integer iycp
      integer iynew
      integer iygm
      integer ihsv

      double precision u
      double precision uprev
      double precision fprev
      double precision unew
      double precision f1nrm
      double precision f1prv
      double precision f1new
      double precision x
      double precision su
      double precision sf
      double precision beta
      double precision tau
      double precision stepmx
      double precision stptol
      double precision cpl
      double precision gml
      double precision wm
      double precision xl
      double precision savf

      dimension wm(lenwm)
      dimension iwm(leniwm)
      dimension x(n)
      dimension u(nu)
      dimension uprev(n)
      dimension unew(n)
      dimension savf(n)
      dimension fprev(n)
      dimension su(n)
      dimension sf(n)

      logical mxtkn, dog1, nwttkn

c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf

      double precision delt
      double precision sqteta
      double precision rhom

      common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
c-----------------------------------------------------------------------
c     nks002 common block.
c-----------------------------------------------------------------------

      integer iprint
      integer iunit
      integer iermsg

      common /nks002/ iprint, iunit, iermsg
      save
c
      iret = 4
      dog1 = .true.
      if (iprint .gt. 1) write(iunit,5)
 5    format(' ------ in routine dogdrv ------ ')
c-----------------------------------------------------------------------
c initialize pointers into the wm array.
c-----------------------------------------------------------------------
      np1 = n + 1
      mmaxp1 = mmax + 1
      beta = wm(1)
      m = iwm(1)
      mp1 = m + 1
      iv = 3
      iwk = iv + n*mmax
      iygm = iwk + n + 1
      iycp = iygm + mmax
      iynew = iycp + mmax
      ihsv = iwk + n + 1 + mmax*(mmaxp1+1) + 1
      iersl = 0
 10   continue
c-----------------------------------------------------------------------
c find new step by the dogleg algorithm.
c-----------------------------------------------------------------------
      call dogstp (m, mp1, mmaxp1, wm(iygm), wm(iycp), beta, wm(ihsv),
     *             tau, wm(iynew), stepmx, dog1, nwttkn, cpl, gml, n,
     *             nu, wm(iv), x, xl, wm(iwk), wm(locwmp), iwm(locimp),
     *             u, su, savf, f, jac, psol)
      if (iersl .lt. 0) then
c nonrecoverable error from psol.  set iersl and return.
         iersl = 9
         return
         endif
c-----------------------------------------------------------------------
c check new point and update trust radius.
c-----------------------------------------------------------------------
      call trgupd (m, mp1, mmaxp1, n, nu, np1, u, savf, f1nrm, x, xl,
     *             wm(iynew), su, sf, nwttkn, stepmx, beta, wm(ihsv),
     *             stptol,  mxtkn, tau, uprev, fprev, f1prv, unew,
     *             f1new, wm(iwk), iret, f)
c-----------------------------------------------------------------------
c if iret .eq. 2 or 3, go to 10 to repeat the step.
c if iret .eq. 0 or 1, return to calling program.
c-----------------------------------------------------------------------
      if (iprint .gt. 1) write(iunit,9000) tau,cpl,gml,iret
      if (iret .ge. 2) go to 10
      return
 9000 format(' tau= ',g12.4,' cpl= ',g12.4,' gml= ',g12.4,' iret= ',i2)
c----------------------- end of subroutine dogdrv ----------------------
      end

c-----------------------------------------------------------------------
      subroutine dogstp (m, mp1, mmaxp1, ygm, ycp, beta, hes, tau, ynew,
     *                  stepmx, dog1, nwttkn, cpl, gml, n, nu, v, xnew,
     *                  xnewl, wk, wmp, iwmp, u, su, savf, f, jac, psol)
      external f
      external jac
      external psol

      integer m
      integer mp1
      integer mmaxp1
      integer n
      integer nu
      integer iwmp

      double precision ygm
      double precision ycp
      double precision beta
      double precision hes
      double precision tau
      double precision ynew
      double precision xnew
      double precision xnewl
      double precision v
      double precision wk
      double precision wmp
      double precision u
      double precision su
      double precision savf

      dimension ygm(m)
      dimension ycp(m)
      dimension hes(mmaxp1,m)
      dimension ynew(mp1)
      dimension xnew(n)
      dimension v(n,m)
      dimension wk(n)
      dimension wmp(1)
      dimension iwmp(1)
      dimension u(nu)
      dimension su(n)
      dimension savf(n)
      logical dog1
      logical nwttkn
c-----------------------------------------------------------------------
c this is subroutine dogstp, which computes the dogleg step for a
c given trust region size tau.
c
c on entry
c
c   m      = the current dimension of the krylov subspace returned
c            by routine spigmr.
c
c   mp1    = m + 1.
c
c   mmaxp1 = mmax + 1, where mmax is the maximum allowable krylov
c            subspace dimension.
c
c   ygm    = real array of length m containing the solution of the
c            m-dimensional least-squares problem computed in spigmr.
c
c   hes    = real mmaxp1-by-m array containing the hessenberg
c            matrix used in spigmr.
c
c   tau    = real scalar containing the current trust region size.
c
c   stepmx = real scalar containing the maximum allowable stepsize
c            length.
c
c   dog1   = logical variable.
c            dog1=.true. means that this is the first call to
c                        this routine by dogdrv for the current step.
c                        the length of ygm is calculated and ycp
c                        is calculated.
c            dog1=.false. means this is not the first call to this
c                         routine during the current step.
c
c   n      = size of the nonlinear system, and the length of arrays
c            u, xnew, savf, su and sf.
c
c   v      = real n-by-m array containing the basis vectors for
c            the krylov space.
c   wmp    = real work array passed directly to psol routine.
c
c   iwmp   = integer work array passed directly to psol routine.
c
c   wk     = real array of length n used as work space.
c
c   u      = real array containing the previous approximate root.
c
c   beta   = real scalar containing norm(sf*f(u)), where norm() is
c            the euclidean norm.
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c on return
c
c   ycp    = real array of length m containing the cauchy point on
c            the dogleg curve. (only calculated when nwttkn=.false.)
c
c   cpl    = real scalar containing the length of the vector ycp.
c            (cpl = 0.d0  when nwttkn=.true.)
c
c   gml    = real scalar containing the length as the vector ygm.
c
c   ynew   = real array of length m+1 used in calculating xnew and ycp.
c
c   xnew   = real array containing the current dogleg step.
c
c   xnewl  = real scalar containing the scaled length of the
c            vector p*xnew, where p is the preconditioner matrix, and
c            is equal to norm(ynew).
c
c   nwttkn = logical variable.
c            nwttkn=.true. means that tau .ge. the length of the
c                          gmres step, and so the full gmres step
c                          is returned in xnew.
c            nwttkn=.false. means that tau .lt. the length of the
c                           gmres step, and so the cauchy point
c                           ycp must be calculated and the point
c                           on the dogleg curve found.
c
c-----------------------------------------------------------------------

      integer ier
      integer i
      
      double precision a
      double precision b
      double precision c
      double precision cpl
      double precision gml
      double precision s
      double precision t
      double precision tem1
      double precision tem2
      double precision stepmx
      double precision ddot
      double precision dnrm2

c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf

      double precision delt
      double precision sqteta
      double precision rhom

      common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
c
      save
c
      if (dog1) then
        gml = dnrm2(m, ygm, 1)
        cpl = 0.0d0
        endif
      if (gml .le. tau) then
          nwttkn = .true.
          do 10 i = 1,m
            ynew(i) = ygm(i)
 10         continue
          tau = gml
c         return
        else
          if (dog1) then
c calculate cauchy point.
            dog1 = .false.
            do 20 i = 1,m
              ycp(i) = hes(1,i)
 20           continue
            tem1 = ddot(m, ycp, 1, ycp, 1)
            do 25 i = 1,mp1
              ynew(i) = 0.0d0
 25           continue
            do 30 i = 1,m
              call daxpy(mp1, ycp(i), hes(1,i), 1, ynew, 1)
 30           continue
            tem2 = ddot(mp1, ynew, 1, ynew, 1)
            t = beta*(tem1/tem2)
            call dscal(m, t, ycp, 1)
            cpl = dnrm2(m, ycp, 1)
c if this is the first iteration and no initial trust region was
c provided by the user, then set tau = min(cpl,stepmx).
            if (tau .eq. -1.0d0) tau = dmin1(cpl, stepmx)
            endif
          if (tau .le. cpl) then
              t = tau/cpl
              do 40 i = 1,m
                ynew(i) = t*ycp(i)
 40             continue
c             return
            else
              do 50 i = 1,m
                ynew(i) = ycp(i) - ygm(i)
 50             continue
              a = ddot(m, ynew, 1, ynew, 1)
              b = ddot(m, ygm, 1, ynew, 1)
              c = gml*gml - tau*tau
              s = ( -b - dsqrt(dmax1(b*b-a*c,0.d0)) )/a
              do 60 i = 1,m
                ynew(i) = ygm(i) + s*ynew(i)
 60             continue
c             return
            endif
        endif
c-----------------------------------------------------------------------
c calulate the dogleg step xnew = [v1,v2,...,vm]*ynew.
c-----------------------------------------------------------------------
      xnewl = dnrm2(m, ynew, 1)
      do 90 i = 1,n
        xnew(i) = 0.d0
 90     continue
      do 100 i = 1,m
        call daxpy (n, ynew(i), v(1,i), 1, xnew, 1)
 100    continue
      do 110 i = 1,n
 110    xnew(i) = xnew(i)/su(i)
      if (ipflg .ge. 1) then
        ier = 0
        call psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, xnew, ier)
        nps = nps + 1
        if (ier .ne. 0) iersl = -1
        endif
      return
c----------------------- end of subroutine dogstp ----------------------
      end

c-----------------------------------------------------------------------
      subroutine trgupd (m, mp1, mmaxp1, n, nu, np1, u, savf, f1nrm, x,
     *                   xl, ynew, su, sf, nwttkn, stepmx, beta, hes,
     *                   stptol, mxtkn, tau, uprev, fprev, f1prv, upls,
     *                   f1pls, wk, iret, f)

      external f
      
      integer m
      integer mp1
      integer mmaxp1
      integer n
      integer nu
      integer np1
      integer iret
      integer i

      double precision u
      double precision f1nrm
      double precision x
      double precision xl
      double precision ynew
      double precision su
      double precision sf
      double precision stepmx
      double precision beta
      double precision hes
      double precision stptol
      double precision tau
      double precision uprev
      double precision fprev
      double precision f1prv
      double precision upls
      double precision f1pls
      double precision wk
      double precision savf

      dimension u(nu)
      dimension x(n)
      dimension ynew(m)
      dimension savf(n)
      dimension su(n)
      dimension sf(n)
      dimension hes(mmaxp1,m)
      dimension uprev(n)
      dimension fprev(n)
      dimension upls(n)
      dimension wk(np1)

c-----------------------------------------------------------------------
c this is the real version of subroutine trgupd, which determines
c if the x(tau) returned by dogstp satisfies
c
c   f(unew) .le. f(u) + alpha*gt(unew-u) (alpha=1.d-4 used),
c
c where unew = u + x(tau).  if not, then a new tau is computed which
c hopefully will give a satisfactory x(tau)  (iret=0).  if this cannot
c be accomplished, then iret=1 is returned, and the nonlinear iteration
c is halted.
c
c on entry
c
c   m      = the current dimension of the krylov subspace returned
c            by routine spigmr.
c
c   mp1    = m + 1.
c
c   mmaxp1 = mmax + 1, where mmax is the maximum allowable krylov
c            subspace dimension.
c
c   n      = size of the nonlinear system, and the length of arrays
c            unew, savf, su and sf.
c
c   nu     = length of u array.
c
c   np1    = n + 1.
c
c   u      = real array containing the previous approximate root.
c
c   savf   = real array containing f(u).
c
c   f1nrm  = real scalar containing 0.5*norm(sf*f(u))**2, where
c            norm() denotes the euclidean norm.
c
c   x      = real array of length n containing the current dogleg
c            step x(tau).
c
c   xl     = the scaled length of p*x(tau), where p is the
c            preconditioner matrix.
c
c   ynew   = real array of length m used in calculating x(tau).
c            x(tau) = [v1,...,vm]*ynew, where v1,...,vm are the
c            krylov basis vectors.
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c   nwttkn = logical variable.
c            nwttkn=.true. means that tau .ge. the length of the
c                          gmres step, and so the full gmres step
c                          is in x.
c            nwttkn=.false. means that tau .lt. the length of the
c                           gmres step, and so the cauchy point
c                           ycp has been calculated in dogstp.
c
c   stepmx = real scalar containing the maximum allowable stepsize
c            length.
c
c   beta   = real scalar containing norm(sf*f(u)), where norm() is
c            the euclidean norm.
c
c   hes    = real mmaxp1-by-m array containing the hessenberg
c            matrix used in spigmr.
c
c   stptol = user-supplied tolerance on the minimum step length
c            s = unew-u.
c
c   tau    = the current size of the trust region.  it is the trust
c            region size tried at the beginning of the dogleg step.
c
c   uprev  = real array of length n used to hold a previous value
c            of u in the dogleg step.
c
c   fprev  = real array of length n used to hold f(uprev).
c
c   f1prv  = real scalar used to hold 0.5*norm(sf*f(uprev))**2, where
c            norm() is the euclidean norm.
c
c   wk     = real array of length n used as work space.
c
c on return
c
c   upls   = real array containing the current approximate root.
c
c   savf   = real array containing the n-vector f(upls).
c
c   f1pls  = real scalar containing 0.5*norm(sf*f(upls))**2, where
c            norm() denotes the euclidean norm.
c
c   mxtkn  = logical flag indicating if the step just taken was of
c            maximum length stepmx.
c
c   iret   = output flag.
c            iret=0 means that a satisfactory upls was found.
c            iret=1 means that the routine failed to find a upls
c                   that was sufficiently distinct from u.
c            iret=2 means that the current tau was too big.  tau
c                   is reduced and the global step is repeated.
c            iret=3 means that the current tau may be too small.
c                   tau is increased and the global step is retried.
c
c   tau    = the new trust region size to be used at the beginning
c            of the next call to dogdrv, or for repeating the
c            current global step.
c
c-----------------------------------------------------------------------

      double precision alpha
      double precision pt1
      double precision pt5
      double precision pt75
      double precision pt99
      double precision zero
      double precision two
      double precision ddot
      double precision delf
      double precision dfpred
      double precision tautmp
      double precision fnrmp
      double precision rlngth
      double precision slpi
      double precision vnorm

      logical mxtkn
      logical nwttkn

c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf

      double precision delt
      double precision sqteta
      double precision rhom

      common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
      save
      data alpha/1.d-4/, pt1/0.1d0/, pt5/0.5d0/, pt75/0.75d0/,
     *     pt99/0.99d0/, zero/0.0d0/, two/2.0d0/
c
      mxtkn = .false.
c-----------------------------------------------------------------------
c calculate upls = u + x.
c-----------------------------------------------------------------------
      call dcopy (n, u, 1, upls, 1)
      do 20 i = 1,n
        u(i) = upls(i) + x(i)
 20     continue
      call f (n, nu, u, savf)
      nfe = nfe + 1
      call dswap(n, u, 1, upls, 1)
      fnrmp = vnorm(n, savf, sf)
      f1pls = pt5*fnrmp*fnrmp
      delf = f1pls - f1nrm
      slpi = -beta*ddot(m, hes, mmaxp1, ynew, 1)
      if (iret .ne. 3) f1prv = zero
      if ( (iret.eq.3) .and.
     *     ((f1pls.ge.f1prv) .or. (delf.gt.alpha*slpi)) ) then
c reset upls to uprev and terminate global step.
           iret = 0
           call dcopy (n, uprev, 1, upls, 1)
           call dcopy (n, fprev, 1, savf, 1)
           f1pls = f1prv
           return
        elseif (delf .ge. alpha*slpi) then
c f1(upls) too large.
           call slngth(n, nu, u, x, su, rlngth)
           if (rlngth .lt. stptol) then
c (upls - u) too small, terminate global step.
               iret = 1
               call dcopy (n, u, 1, upls, 1)
               return
             else
c reduce tau, continue global step.
               iret = 2
               tautmp = -slpi*xl/(two*(delf-slpi))
               if (tautmp .lt. pt1*tau) then
                    tau = pt1*tau
                 elseif (tautmp .gt. pt5*tau) then
                    tau = pt5*tau
                 else
                    tau = tautmp
                 endif
             return
             endif
        else
c f1(upls) sufficiently small.
           do 45 i = 1,mp1
             wk(i) = zero
 45          continue
           do 50 i = 1,m
             call daxpy (mp1, ynew(i), hes(1,i), 1, wk, 1)
 50          continue
           dfpred = slpi + pt5*ddot (mp1, wk, 1, wk, 1)
           if ( (iret.ne.2) .and.
     *          ( (dabs(dfpred-delf).le.pt1*dabs(delf)) .or.
     *                (delf.le.slpi) ) .and. (.not.nwttkn) .and.
     *                (tau.le.pt99*stepmx) ) then
c double tau and continue global step.
               iret = 3
               call dcopy (n, upls, 1, uprev, 1)
               call dcopy (n, savf, 1, fprev, 1)
               f1prv = f1pls
               tau = dmin1( two*tau, stepmx )
               return
             else
c accept upls as new iterate.  choose new tau.
               iret = 0
               if (xl .gt. pt99*stepmx) mxtkn = .true.
               if (delf .ge. pt1*dfpred) then
c decrease tau for next iteration.
                   tau = tau/two
                 elseif (delf .le. pt75*dfpred) then
c increase tau for next iteration.
                   tau = dmin1( two*tau, stepmx )
                 else
c leave tau unchanged.
                 endif
             endif
        endif
c     return
c----------------------- end of subroutine trgupd ----------------------
      end
      subroutine lnsrch(n, nu, u, savf, f1nrm, p, su, sf, stepmx, 
     *                  stptol, iret, unew, f1nrmp, mxtkn, f, jac)
c-----------------------------------------------------------------------
c this routine is the main driver for the linesearch algorithm.
c its purpose is to find a unew = u + rl*p in the direction p from u
c such that
c                               t
c    f(unew) .le. f(u) + alpha*g (unew-u) (alpha=1.d-4 used),
c
c and
c                              t
c    f(unew) .ge. f(u) + beta*g (unew-u)  (beta=0.9d0 used),
c
c where 0 .lt. rl .le. 1.
c-----------------------------------------------------------------------
c
c on entry
c
c   n      = size of the nonlinear system, and the length of arrays
c            unew, savf, su and sf.
c
c   nu     = length of array u.
c
c   u      = real array containing the previous approximate root.
c
c   f1nrm  = real scalar containing 0.5*norm(sf*f(u))**2, where
c            norm() denotes the euclidean norm.
c
c   p      = step direction returned by routine model.
c
c   su     = real array containing scaling factors for u.
c
c   sf     = real array containing scaling factors for f(u).
c
c   stepmx = real scalar containing the maximum allowable stepsize
c            length.
c
c   stptol = user-supplied tolerance on the minimum step length
c            s = unew-u.
c
c on return
c
c   unew   = real array containing the current approximate root.
c
c   savf   = real array containing the n-vector f(unew).
c
c   f1nrmp = real scalar containing 0.5*norm(sf*f(unew))**2, where
c            norm() denotes the euclidean norm.
c
c   iret   = output flag.
c            iret=0 means that a satisfactory unew was found.
c            iret=1 means that the routine failed to find a unew
c                   that was sufficiently distinct from u.
c
c   mxtkn  = logical flag indicating if the step just taken was of
c            maximum length stepmx.
c
c   nbcf   = number of steps for which beta-condition was not met
c            (in common).  this value is incremented by one if
c            the beta-condition could not be met on this call.
c
c-----------------------------------------------------------------------
      external f
      external jac

      integer n
      integer iret
      integer i
      integer nu

      double precision savf
      double precision u
      double precision unew
      double precision f1nrm
      double precision f1nrmp
      double precision p
      double precision su
      double precision sf
      double precision stptol
      double precision stepmx
      double precision pnrm
      double precision ratio
      double precision slpi
      double precision rlngth
      double precision rl
      double precision fnrmp
      double precision rltmp
      double precision vnorm
      double precision beta
      double precision f1lo
      double precision f1nprv
      double precision rldiff
      double precision rlincr
      double precision rlmin
      double precision rllo
      double precision rlmax
      double precision rlprev

      dimension savf(n)
      dimension u(nu)
      dimension unew(n)
      dimension p(n)
      dimension su(n)
      dimension sf(n)

      double precision pt1
      double precision pt1trl
      double precision pt99
      double precision one
      double precision two
      double precision alpha

      logical mxtkn
c-----------------------------------------------------------------------
c     nks001 common block.
c-----------------------------------------------------------------------

      integer locwmp
      integer locimp
      integer iersl
      integer kmp
      integer mmax
      integer methn
      integer methk
      integer ipflg
      integer mfdif
      integer nfe
      integer nje
      integer nni
      integer nli
      integer npe
      integer nps
      integer ncfl
      integer nbcf

      double precision delt
      double precision sqteta
      double precision rhom

      common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp,
     *                mmax, methn, methk, ipflg, mfdif, nfe, nje, nni,
     *                nli,  npe, nps, ncfl, nbcf
c-----------------------------------------------------------------------
c     nks002 common block.
c-----------------------------------------------------------------------

      integer iprint
      integer iunit
      integer iermsg

      common /nks002/ iprint, iunit, iermsg
c
      save
      data pt1/0.1d0/,alpha/1.d-4/,one/1.0d0/,two/2.0d0/
      data pt99/0.99d0/,beta/0.9d0/
c
      mxtkn = .false.
      pnrm = vnorm(n,p,su)
      ratio = one
      if (pnrm .gt. stepmx) then
        ratio = stepmx/pnrm
        do 10 i = 1,n
          p(i) = p(i)*ratio
 10       continue
        pnrm = stepmx
        endif
      slpi = -two*f1nrm*ratio
      if (methk .eq. 2) slpi = slpi + rhom*rhom*ratio
      call slngth(n, nu, u, p, su, rlngth)
      rlmin = stptol/rlngth
      rl = one
      if (iprint .gt. 1) write(iunit,20) rlmin
 20   format(' ------ in routine lnsrch (min lambda=',e12.4,') ------ ')
c-----------------------------------------------------------------------
c begin iteration to find rl value satisfying alpha- and beta-
c conditions.  if rl becomes .lt. rlmin, then terminate with iret=1.
c-----------------------------------------------------------------------
 115  continue
      call dcopy(n, u, 1, unew, 1)
      do 120 i = 1,n
        u(i) = unew(i) + rl*p(i)
 120    continue
      call f(n, nu, u, savf)
      nfe = nfe + 1
      call dswap(n, u, 1, unew, 1)
      fnrmp = vnorm(n,savf,sf)
      f1nrmp = fnrmp*fnrmp/two
      if (iprint .gt. 1) write(iunit,125) rl,f1nrm,f1nrmp
 125  format(' lambda, f1, f1new ',3d20.8)
      if (f1nrmp .gt. f1nrm + alpha*slpi*rl) go to 200
c alpha-condition satisfied.  now check for beta-condition.
        if (f1nrmp .lt. f1nrm + beta*slpi*rl) then
          if ( (rl.eq.one) .and. (pnrm.lt.stepmx) ) then
            rlmax = stepmx/pnrm
 130        continue
            rlprev = rl
            f1nprv = f1nrmp
            rl = dmin1(two*rl,rlmax)
            call dcopy(n, u, 1, unew, 1)
            do 140 i = 1,n
 140          u(i) = unew(i) + rl*p(i)
            call f(n, nu, u, savf)
            nfe = nfe + 1
            call dswap(n, u, 1, unew, 1)
            fnrmp = vnorm(n,savf,sf)
            f1nrmp = fnrmp*fnrmp/two
            if (iprint .gt. 1) write(iunit,125) rl,f1nrm,f1nrmp
            if ( (f1nrmp .le. f1nrm + alpha*slpi*rl) .and.
     *           (f1nrmp .lt. f1nrm + beta*slpi*rl)  .and.
     *           (rl .lt. rlmax) ) go to 130
            endif
          if ( (rl.lt.one) .or.
     *       ((rl.gt.one).and.(f1nrmp.gt.f1nrm+alpha*slpi*rl)) ) then
            rllo = dmin1(rl,rlprev)
            rldiff = dabs(rlprev-rl)
            if (rl .lt. rlprev) then
                f1lo = f1nrmp
              else
                f1lo = f1nprv
              endif
 150        continue
            rlincr = rldiff/two
            rl = rllo + rlincr
            call dcopy(n, u, 1, unew, 1)
            do 160 i = 1,n
 160          u(i) = unew(i) + rl*p(i)
            call f(n, nu, u, savf)
            nfe = nfe + 1
            call dswap(n, u, 1, unew, 1)
            fnrmp = vnorm(n,savf,sf)
            f1nrmp = fnrmp*fnrmp/two
            if (iprint .gt. 1) write(iunit,125) rl,f1nrm,f1nrmp
            if (f1nrmp .gt. f1nrm + alpha*slpi*rl) then
                rldiff = rlincr
              elseif (f1nrmp .lt. f1nrm + beta*slpi*rl) then
                rllo = rl
                rldiff = rldiff - rlincr
                f1lo = f1nrmp
              endif
            if ( ( (f1nrmp .gt. f1nrm + alpha*slpi*rl) .or.
     *             (f1nrmp .lt. f1nrm + beta*slpi*rl) )  .and.
     *           (rldiff .gt. rlmin) ) go to 150
            if (f1nrmp .lt. f1nrm + beta*slpi*rl) then
c beta condition could not be satisfied.  set unew to last
c u value that satisfied alpha-condition and continue.
c increment counter on number of steps beta-condition not satisfied.
              f1nrmp = f1lo
              do 170 i = 1,n
 170            unew(i) = u(i) + rllo*p(i)
              nbcf = nbcf + 1
              endif
            endif
          endif
        iret = 0
        if (rl*pnrm .gt. pt99*stepmx) mxtkn = .true.
        return
c-----------------------------------------------------------------------
c alpha-condition not satisfied.  perform quadratic backtrack to
c compute new rl value.
c-----------------------------------------------------------------------
 200  continue
      if (rl .lt. rlmin) then
c no satisfactory unew can be found sufficiently distinct from u.
c copy u into unew, set return code and return.
        iret = 1
        call dcopy (n, u, 1, unew, 1)
        return
        endif
c compute new rl using a quadratic backtrack.
      rltmp = -slpi/(two*(f1nrmp - f1nrm - slpi))
      if (rltmp .gt. rl/two) rltmp = rl/two
      rlprev = rl
      f1nprv = f1nrmp
      pt1trl = pt1*rl
      if (rltmp .le. pt1trl) then
          rl = pt1trl
        else
          rl = rltmp
        endif
      go to 115
c----------------------- end of subroutine lnsrch ----------------------
      end
      subroutine errgen (ierr, v1, v2, i1, i2)
c-----------------------------------------------------------------------
c this routine prints error messages from the driver nksol.  the output
c from this routine can be turned off by setting a flag in iwork.
c-----------------------------------------------------------------------

      integer ierr
      integer i1
      integer i2

      double precision v1
      double precision v2

c-----------------------------------------------------------------------
c     nks002 common block.
c-----------------------------------------------------------------------

      integer iprint
      integer iunit
      integer iermsg


      common /nks002/ iprint, iunit, iermsg
      save
c
      if (iermsg .gt. 0) return
      if (ierr .eq. 10) then
        write(iunit,9000)
        return
      endif
      if (ierr .eq. 20) then
        write(iunit,9010)
        return
      endif
      if (ierr .eq. 30) then
        write(iunit,9020)
        return
      endif
      if (ierr .eq. 40) then
        write(iunit,9030) i1,i2
        return
      endif
      if (ierr .eq. 50) then
        write(iunit,9040)  i1,v1
        return
      endif
      if (ierr .eq. 100) then
        write(iunit,9100) i1,i2
        return
      endif
      if (ierr .eq. 110) then
        write(iunit,9110) i1,i2
        return
      endif
      if (ierr .eq. 120) then
        write(iunit,9120) i1,i2
        return
      endif
      if (ierr .eq. 130) then
        write(iunit,*)'nu must be at least as large as n'
        return
      endif
c-----------------------------------------------------------------------
c     formats.
c-----------------------------------------------------------------------
 9000   format(//
     * ' nksol ---  illegal value for mf.  mf must be between '
     */'            1 and 3.'
     *)
 9010   format(//
     * ' nksol ---  illegal value for mdif.  mdif must either 0 or 1. '
     *)
 9020   format(//
     * ' nksol ---  illegal value for ipflag.  ipflag must be either '
     */'            0 or 1.'
     *)
 9030   format(//
     * ' nksol ---  illegal value for optional input in iwork. '
     */'            iwork(',i2,') = ',i3,' must be nonnegative.'
     *)
 9040   format(//
     * ' nksol ---  illegal value for optional input in rwork. '
     */'            rwork(',i2,') = ',e12.4,' must be nonnegative.'
     *)
 9100   format(//
     * ' nksol ---  insufficient length for rwork.'
     */'            rwork length given  - ',i8,
     */'            rwork length needed - ',i8
     *)
 9110   format(//
     * ' nksol ---  insufficient length for iwork.'
     */'            iwork length given  - ',i8,
     */'            iwork length needed - ',i8
     *)
 9120   format(//
     * ' nksol ---  maximum number of beta-condition test failures',
     */'            exceeded.'
     */'            number of beta-condition failures  - ',i8,
     */'            maximum number of failures allowed - ',i8
     *)
      return
c-------------  end of subroutine errgen  ------------------------------
      end

      subroutine daxpy(n,da,dx,incx,dy,incy)
c
c     constant times a vector plus a vector.
c     uses unrolled loops for increments equal to one.
c     jack dongarra, linpack, 3/11/78.
c
      integer i
      integer incx
      integer incy
      integer m
      integer mp1
      integer n
      integer ix
      integer iy
      double precision dx(n*incx)
      double precision dy(n*incy)
      double precision da
c
      if(n.le.0)return
      if (da .eq. 0.0d0) return
      if(incx.eq.1.and.incy.eq.1)go to 20
c
c        code for unequal increments or equal increments
c          not equal to 1
c
      ix = 1
      iy = 1
      if(incx.lt.0)ix = (-n+1)*incx + 1
      if(incy.lt.0)iy = (-n+1)*incy + 1
      do 10 i = 1,n
        dy(iy) = dy(iy) + da*dx(ix)
        ix = ix + incx
        iy = iy + incy
   10 continue
      return
c
c        code for both increments equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,4)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        dy(i) = dy(i) + da*dx(i)
   30 continue
      if( n .lt. 4 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,4
        dy(i) = dy(i) + da*dx(i)
        dy(i + 1) = dy(i + 1) + da*dx(i + 1)
        dy(i + 2) = dy(i + 2) + da*dx(i + 2)
        dy(i + 3) = dy(i + 3) + da*dx(i + 3)
   50 continue
      return
      end
      subroutine  dcopy(n,dx,incx,dy,incy)
c
c     copies a vector, x, to a vector, y.
c     uses unrolled loops for increments equal to one.
c     jack dongarra, linpack, 3/11/78.
c
      integer i
      integer incx
      integer incy
      integer ix
      integer iy
      integer m
      integer mp1
      integer n
      double precision dx(n*incx)
      double precision dy(n*incy)
c
      if(n.le.0)return
      if(incx.eq.1.and.incy.eq.1)go to 20
c
c        code for unequal increments or equal increments
c          not equal to 1
c
      ix = 1
      iy = 1
      if(incx.lt.0)ix = (-n+1)*incx + 1
      if(incy.lt.0)iy = (-n+1)*incy + 1
      do 10 i = 1,n
        dy(iy) = dx(ix)
        ix = ix + incx
        iy = iy + incy
   10 continue
      return
c
c        code for both increments equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,7)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        dy(i) = dx(i)
   30 continue
      if( n .lt. 7 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,7
        dy(i) = dx(i)
        dy(i + 1) = dx(i + 1)
        dy(i + 2) = dx(i + 2)
        dy(i + 3) = dx(i + 3)
        dy(i + 4) = dx(i + 4)
        dy(i + 5) = dx(i + 5)
        dy(i + 6) = dx(i + 6)
   50 continue
      return
      end
      double precision function ddot(n,dx,incx,dy,incy)
c
c     forms the dot product of two vectors.
c     uses unrolled loops for increments equal to one.
c     jack dongarra, linpack, 3/11/78.
c
      integer i
      integer incx
      integer incy
      integer ix
      integer iy
      integer m
      integer mp1
      integer n
      double precision dx(n*incx)
      double precision dy(n*incy)
      double precision dtemp
c
      ddot = 0.0d0
      dtemp = 0.0d0
      if(n.le.0)return
      if(incx.eq.1.and.incy.eq.1)go to 20
c
c        code for unequal increments or equal increments
c          not equal to 1
c
      ix = 1
      iy = 1
      if(incx.lt.0)ix = (-n+1)*incx + 1
      if(incy.lt.0)iy = (-n+1)*incy + 1
      do 10 i = 1,n
        dtemp = dtemp + dx(ix)*dy(iy)
        ix = ix + incx
        iy = iy + incy
   10 continue
      ddot = dtemp
      return
c
c        code for both increments equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,5)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        dtemp = dtemp + dx(i)*dy(i)
   30 continue
      if( n .lt. 5 ) go to 60
   40 mp1 = m + 1
      do 50 i = mp1,n,5
        dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) +
     *   dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4)
   50 continue
   60 ddot = dtemp
      return
      end

      double precision function dnrm2 ( n, dx, incx)

      integer next
      integer n
      integer nn
      integer incx
      integer i
      integer j

      double precision dx(n*incx)
      double precision cutlo
      double precision cuthi
      double precision hitest
      double precision sum
      double precision xmax
      double precision zero
      double precision one
      data   zero, one /0.0d0, 1.0d0/
c
c     euclidean norm of the n-vector stored in dx() with storage
c     increment incx .
c     if    n .le. 0 return with result = 0.
c     if n .ge. 1 then incx must be .ge. 1
c
c           c.l.lawson, 1978 jan 08
c
c     four phase method     using two built-in constants that are
c     hopefully applicable to all machines.
c         cutlo = maximum of  dsqrt(u/eps)  over all known machines.
c         cuthi = minimum of  dsqrt(v)      over all known machines.
c     where
c         eps = smallest no. such that eps + 1. .gt. 1.
c         u   = smallest positive no.   (underflow limit)
c         v   = largest  no.            (overflow  limit)
c
c     brief outline of algorithm..
c
c     phase 1    scans zero components.
c     move to phase 2 when a component is nonzero and .le. cutlo
c     move to phase 3 when a component is .gt. cutlo
c     move to phase 4 when a component is .ge. cuthi/m
c     where m = n for x() real and m = 2*n for complex.
c
c     values for cutlo and cuthi..
c     from the environmental parameters listed in the imsl converter
c     document the limiting values are as follows..
c     cutlo, s.p.   u/eps = 2**(-102) for  honeywell.  close seconds are
c                   univac and dec at 2**(-103)
c                   thus cutlo = 2**(-51) = 4.44089e-16
c     cuthi, s.p.   v = 2**127 for univac, honeywell, and dec.
c                   thus cuthi = 2**(63.5) = 1.30438e19
c     cutlo, d.p.   u/eps = 2**(-67) for honeywell and dec.
c                   thus cutlo = 2**(-33.5) = 8.23181d-11
c     cuthi, d.p.   same as s.p.  cuthi = 1.30438d19
c     data cutlo, cuthi / 8.232d-11,  1.304d19 /
c     data cutlo, cuthi / 4.441e-16,  1.304e19 /
      data cutlo, cuthi / 8.232d-11,  1.304d19 /
c
      if(n .gt. 0) go to 10
         dnrm2  = zero
         go to 300
c
   10 assign 30 to next
      sum = zero
      nn = n * incx
c                                                 begin main loop
      i = 1
   20    go to next,(30, 50, 70, 110)
   30 if( dabs(dx(i)) .gt. cutlo) go to 85
      assign 50 to next
      xmax = zero
c
c                        phase 1.  sum is zero
c
   50 if( dx(i) .eq. zero) go to 200
      if( dabs(dx(i)) .gt. cutlo) go to 85
c
c                                prepare for phase 2.
      assign 70 to next
      go to 105
c
c                                prepare for phase 4.
c
  100 i = j
      assign 110 to next
      sum = (sum / dx(i)) / dx(i)
  105 xmax = dabs(dx(i))
      go to 115
c
c                   phase 2.  sum is small.
c                             scale to avoid destructive underflow.
c
   70 if( dabs(dx(i)) .gt. cutlo ) go to 75
c
c                     common code for phases 2 and 4.
c                     in phase 4 sum is large.  scale to avoid overflow.
c
  110 if( dabs(dx(i)) .le. xmax ) go to 115
         sum = one + sum * (xmax / dx(i))**2
         xmax = dabs(dx(i))
         go to 200
c
  115 sum = sum + (dx(i)/xmax)**2
      go to 200
c
c
c                  prepare for phase 3.
c
   75 sum = (sum * xmax) * xmax
c
c
c     for real or d.p. set hitest = cuthi/n
c     for complex      set hitest = cuthi/(2*n)
c
   85 hitest = cuthi/float( n )
c
c                   phase 3.  sum is mid-range.  no scaling.
c
      do 95 j =i,nn,incx
      if(dabs(dx(j)) .ge. hitest) go to 100
   95    sum = sum + dx(j)**2
      dnrm2 = dsqrt( sum )
      go to 300
c
  200 continue
      i = i + incx
      if ( i .le. nn ) go to 20
c
c              end of main loop.
c
c              compute square root and adjust for scaling.
c
      dnrm2 = xmax * dsqrt(sum)
  300 continue
      return
      end

      subroutine  dscal(n,da,dx,incx)
c
c     scales a vector by a constant.
c     uses unrolled loops for increment equal to one.
c     jack dongarra, linpack, 3/11/78.
c
      integer i
      integer incx
      integer m
      integer mp1
      integer n
      integer nincx
      double precision da
      double precision dx(n*incx)
c
      if(n.le.0)return
      if(incx.eq.1)go to 20
c
c        code for increment not equal to 1
c
      nincx = n*incx
      do 10 i = 1,nincx,incx
        dx(i) = da*dx(i)
   10 continue
      return
c
c        code for increment equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,5)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        dx(i) = da*dx(i)
   30 continue
      if( n .lt. 5 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,5
        dx(i) = da*dx(i)
        dx(i + 1) = da*dx(i + 1)
        dx(i + 2) = da*dx(i + 2)
        dx(i + 3) = da*dx(i + 3)
        dx(i + 4) = da*dx(i + 4)
   50 continue
      return
      end
      subroutine  dswap (n,dx,incx,dy,incy)
c
c     interchanges two vectors.
c     uses unrolled loops for increments equal one.
c     jack dongarra, linpack, 3/11/78.
c
      integer i
      integer incx
      integer incy
      integer ix
      integer iy
      integer m
      integer mp1
      integer n
      double precision dx(n*incx)
      double precision dy(n*incy)
      double precision dtemp
c
      if(n.le.0)return
      if(incx.eq.1.and.incy.eq.1)go to 20
c
c       code for unequal increments or equal increments not equal
c         to 1
c
      ix = 1
      iy = 1
      if(incx.lt.0)ix = (-n+1)*incx + 1
      if(incy.lt.0)iy = (-n+1)*incy + 1
      do 10 i = 1,n
        dtemp = dx(ix)
        dx(ix) = dy(iy)
        dy(iy) = dtemp
        ix = ix + incx
        iy = iy + incy
   10 continue
      return
c
c       code for both increments equal to 1
c
c
c       clean-up loop
c
   20 m = mod(n,3)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        dtemp = dx(i)
        dx(i) = dy(i)
        dy(i) = dtemp
   30 continue
      if( n .lt. 3 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,3
        dtemp = dx(i)
        dx(i) = dy(i)
        dy(i) = dtemp
        dtemp = dx(i + 1)
        dx(i + 1) = dy(i + 1)
        dy(i + 1) = dtemp
        dtemp = dx(i + 2)
        dx(i + 2) = dy(i + 2)
        dy(i + 2) = dtemp
   50 continue
      return
      end
      integer function idamax(n,dx,incx)
c
c     finds the index of element having max. absolute value.
c     jack dongarra, linpack, 3/11/78.
c
      integer i
      integer incx
      integer ix
      integer n
      double precision dx(n*incx)
      double precision dmax
c
      idamax = 0
      if( n .lt. 1 ) return
      idamax = 1
      if(n.eq.1)return
      if(incx.eq.1)go to 20
c
c        code for increment not equal to 1
c
      ix = 1
      dmax = dabs(dx(1))
      ix = ix + incx
      do 10 i = 2,n
         if(dabs(dx(ix)).le.dmax) go to 5
         idamax = i
         dmax = dabs(dx(ix))
    5    ix = ix + incx
   10 continue
      return
c
c        code for increment equal to 1
c
   20 dmax = dabs(dx(1))
      do 30 i = 2,n
         if(dabs(dx(i)).le.dmax) go to 30
         idamax = i
         dmax = dabs(dx(i))
   30 continue
      return
      end
      subroutine jac(n,nu,u,savf,v,z)
      integer n
      integer nu
      double precision u(nu),savf(n),v(n),z(n)

      return
      end

      subroutine pset(n, nu, u, savf, su, sf, wk, f, wmp, iwmp, ier)

      integer n
      integer nu
      integer iwmp
      integer ier

      double precision u
      double precision savf
      double precision su
      double precision sf
      double precision wk
      double precision wmp

      dimension u(nu)
      dimension savf(n)
      dimension su(n)
      dimension sf(n)
      dimension wk(n)
      dimension wmp(1)
      dimension iwmp(1)

      external f

      ier = 0
      
      return
      end

      subroutine psol(n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier)

      integer n
      integer nu
      integer iwmp
      integer ier

      double precision u
      double precision savf
      double precision wk
      double precision wmp
      double precision x

      dimension u(nu)
      dimension savf(n)
      dimension wk(n)
      dimension wmp(1)
      dimension iwmp(1)
      dimension x(n)


      external f
      external jac

      return
      end


      subroutine boundy(u,nx,ny,type)
      integer i,j,nx,ny,type
      double precision u(0:nx+1,0:ny+1)
      if (type .eq. 1) then
*     Dirichlet:
      do 10 i=1,nx
         u(i,0) = 0.D0
         u(i,ny+1) = 0.0D0
 10   continue
      do 15 j=1,ny
         u(0,j) = 0.D0
         u(nx+1,j) = 0.D0
 15   continue
      endif
      if (type .eq. 2) then
*     mixed:
      do 20 i=1,nx
         u(i,0) = u(i,ny)
         u(i,ny+1) = u(i,1)
 20   continue
      do 25 j=1,ny
         u(0,j) = 1.D0
         u(nx+1,j) = 0.D0
 25   continue
      endif

      end
      subroutine jacobm(n, nu, u, savf, v, z)
*     ***************************************************
*     optional routine to provide the jacobian to the nksol.
*     z(k) = sum_j df_k/du_j (v)
*     ***************************************************
      integer n,nu,i,j,k,nx,ny
      double precision u(nu), savf(n), v(n), z(n),
     +                 xlam,xeta,fretd
      common/epscal/xlam,xeta
      common/ijsize/nx,ny
*     first column
      z(1) = (1.D0 + 4.D0*xlam)*v(1) -
     +          xlam*( v(ny+1) + v(2) + v(ny) ) - 
     +          xeta*fretd(u(1))*v(1)
      do 10 j=2,ny-1
         z(j) = (1.D0 + 4.D0*xlam)*v(j) -
     +          xlam*( v(ny+j) + v(j-1) + v(j+1) ) -
     +          xeta*fretd(u(j))*v(j)
 10   continue
      z(ny) = (1.D0 + 4.D0*xlam)*v(ny) -
     +        xlam*( v(2*ny) + v(ny-1) + v(1) ) -
     +           xeta*fretd(u(ny))*v(ny)
*     2nd through the next to the last columns.
      k=ny
      do 20 i=2,nx-1
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k+ny) + 
     +                 v(k+1) + v(k+ny-1) ) -
     +          xeta*fretd(u(k))*v(k)
      do 25 j=2,ny-1
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k+ny) +
     +                 v(k-1) + v(k+1) ) -
     +          xeta*fretd(u(k))*v(k)
 25   continue
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k+ny) + 
     +                 v(k-1) + v(k-ny+1) ) -
     +          xeta*fretd(u(k))*v(k)
 20   continue
*     last column
      k=k+1
      z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +       xlam*( v(k-ny) + v(k+1) + v(k+ny-1) ) - 
     +       xeta*fretd(u(k))*v(k)
      do 30 j=2,ny-1
         k=k+1
         z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +          xlam*( v(k-ny) + v(k-1) + v(k+1) ) -
     +          xeta*fretd(u(k))*v(k)
 30   continue
      k=k+1
      z(k) = (1.D0 + 4.D0*xlam)*v(k) -
     +       xlam*( v(k-ny) + v(k-1) + v(k-ny+1) ) -
     +       xeta*fretd(u(k))*v(k)
      end