program rdfrnt * ************************************************************* * * A second-order code for the 2-D R-D equation with convection. * * u_t + v dot grad u = eps div ( a grad u ) + 1/eps f(u) * * where f is the kpp reaction * * f(u) = u*(1-u) * * and the velocity field v is given and satisfies * * div v = 0. * * The domain is [0,L]x[-1,1] and we can prescribe both the Dirichlet * and mixed (Dirichlet at x=0,L and periodic in y) by specifying * the integer type to be: * * 1 Dirichlet: channel flow (bubble), u=0 on boundary; * 2 mixed: channel flow (almost planar front), * u=1, at x=0, and u=0 at x=L, periodic in y. * * ************************************************************* character*1 cont integer nu,liw,kp,ku,mmax,mf,mdif,iopt,ipflag, + lenk,iterm,lrw,nk,k,mx,my,mxy,nx,ny,nxy, + np,type parameter( mx=600,my=300, mxy=mx*my ) parameter( nu=2*mxy, liw=30, mmax=10 ) parameter( lenk=236+14*mxy, lrw=4+4*mxy+lenk ) integer iwork(liw) double precision uf(nu),savf(mxy),su(mxy),rwork(lrw), + pf(0:mx+1,0:my+1),pex(mx,my), + ue(0:mx,my),ve(mx,0:my), + uc(mx,my), vc(mx,my), + pv(0:mx,my),ph(mx,0:my), + px(mx,my+1),py(mx,my+1), + a,b,c,xlam,xeta,eps,epsmac,ftol,stptol,h,dt, + atotal,hinv,res,t,rms,dth,uvm,cfl,pi, + tend,rini,thl,cut,frto,frtn,spe,alpha,amp parameter( type=2) parameter( epsmac=0.1D-8, cut=0.5D0 ) common/epscal/xlam,xeta common/ijsize/nx,ny common/posini/rini,thl common/valupi/pi common/meshsz/h,hinv common/meanfd/amp external fnonld,fnonlm,jacobd,jacobm,pset,psol data su/mxy*1.D0/, t,ftol,stptol/3*0.D0/, + mf,iopt,mdif,ipflag/3,0,1,0/, ku/10/ cont = 'n' * * read input data: read *, nx,ny read *, eps,alpha,rini,thl,cfl read *, amp,rms read *, tend,np read(5,'(a)') cont * open(7,file='area_t',status='unknown') pi = 4.D0*datan(1.D0) nxy = nx*ny h = 2.D0/dfloat(ny) hinv = 1.D0/h * * initialize and plot the solution call initia(pf,ue,ve,uc,vc,nx,ny) call posfrt(pf,frto,nx,ny) call output(pf,nx,ny,ku) * * * read the velocity field uvm=2.D0 call readuf(ue,ve,uc,vc,uvm,nx,ny,rms,t,alpha,eps,h) * call readuf(ue,ve,uc,vc,uvm,nx,ny,rms) c call ushift(ue,uc,nx,ny) c call shearf(ue,ve,uc,vc,uvm,nx,ny,rms) * dt = dmin1(0.5D0*h,cfl*h/uvm) nk = idint(tend/dt) kp = nk/np print 105, dt dth = dt/h xlam = 0.5D0*eps*dt/h/h xeta = 0.5D0*dt/eps * * main loop to solve the equation in time: * first, form the rhs of the equations; then, use nksol to solve * the nonlinear system; after this solution is updated by renewp. do 50 k=1,nk * calculate the rhs; first the diffusion and reaction term call knowtm(pf,pex,nx,ny,type) * if( dabs(rms) .le. epsmac ) goto 51 * then the convection, if( type .eq. 1) then call conved(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny) else call convem(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny) endif 51 call feqrhs(nxy,nu,uf,pf,pex,nx,ny) * solve the nonlinear system by nksol, if( type .eq. 1) then call nksol(nxy, nu, uf, savf, fnonld, jacobd, su, su, ftol, + stptol, rwork, lrw, iwork, liw, iopt, iterm, + pset, psol, mf, mdif, ipflag) else call nksol(nxy, nu, uf, savf, fnonlm, jacobm, su, su, ftol, + stptol, rwork, lrw, iwork, liw, iopt, iterm, + pset, psol, mf, mdif, ipflag) endif * update the solution and enforce the boundary condition. call renewp(uf,pf,nu,nx,ny,type) call posfrt(pf,frtn,nx,ny) spe = (frtn - frto)/dt frto = frtn write(21,101) t,frtn write(22,101) t,spe * compute the area bounded by the level set u=cut (0 < cut < 1) * call pharea(pf,cut,nx,ny,atotal) * write (7,101) t,atotal t = t + dt call readuf(ue,ve,uc,vc,uvm,nx,ny,rms,t,alpha,eps,h) if(mod(k,kp) .eq. 0) then call residu(savf,nxy,h,res) ku = ku+1 print 102,t,ku,res call output(pf,nx,ny,ku) endif 50 continue 101 format(f12.8,2x,f12.8) 102 format(' t=',f8.4,' write to unit ',i3,' res= ',e12.6) 105 format(' dt = ',f10.8) end subroutine fnonld(n, nu, u, savf) * *************************************************** * establish the nonlinear system of equations for nksol * to solve. * *************************************************** integer n,nu,nx,ny,i,j,k double precision u(nu),savf(n) double precision xlam,xeta,fret common/epscal/xlam,xeta common/ijsize/nx,ny savf(1) = (1.D0 + 4.D0*xlam)*u(1) - + xlam*( u(ny+1) + u(2) ) - + xeta*fret(u(1)) - + u(n+1) do 10 j=2,ny-1 savf(j) = (1.D0 + 4.D0*xlam)*u(j) - + xlam*( u(ny+j) + u(j-1) + u(j+1) ) - + xeta*fret(u(j)) - + u(n+j) 10 continue savf(ny) = (1.D0 + 4.D0*xlam)*u(ny) - + xlam*( u(2*ny) + u(ny-1) ) - + xeta*fret(u(ny)) - + u(n+ny) k=ny do 20 i=2,nx-1 k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k+ny) + u(k+1) ) - + xeta*fret(u(k)) - + u(n+k) do 25 j=2,ny-1 k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + + u(k+ny) + + u(k-1) + + u(k+1) ) - + xeta*fret(u(k)) - + u(n+k) 25 continue k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k+ny) + u(k-1) ) - + xeta*fret(u(k)) - + u(n+k) 20 continue k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k+1) ) - + xeta*fret(u(k)) - + u(n+k) do 30 j=2,ny-1 k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k-1) + u(k+1) ) - + xeta*fret(u(k)) - + u(n+k) 30 continue k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k-1) ) - + xeta*fret(u(k)) - + u(n+k) end subroutine jacobd(n, nu, u, savf, v, z) * *************************************************** * optional routine to provide the jacobian to the nksol. * z(k) = sum_j df_k/du_j (v) * *************************************************** integer n,nu,i,j,k,nx,ny double precision u(nu), savf(n), v(n), z(n), + xlam,xeta,fretd common/epscal/xlam,xeta common/ijsize/nx,ny z(1) = (1.D0 + 4.D0*xlam)*v(1) - + xlam*( v(ny+1) + v(2) ) - + xeta*fretd(u(1))*v(1) do 10 j=2,ny-1 z(j) = (1.D0 + 4.D0*xlam)*v(j) - + xlam*( v(ny+j) + v(j-1) + v(j+1) ) - + xeta*fretd(u(j))*v(j) 10 continue z(ny) = (1.D0 + 4.D0*xlam)*v(ny) - + xlam*( v(2*ny) + v(ny-1) ) - + xeta*fretd(u(ny))*v(ny) k=ny do 20 i=2,nx-1 k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+ny) + v(k+1) ) - + xeta*fretd(u(k))*v(k) do 25 j=2,ny-1 k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+ny) + + v(k-1) + v(k+1) ) - + xeta*fretd(u(k))*v(k) 25 continue k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+ny) + v(k-1) ) - + xeta*fretd(u(k))*v(k) 20 continue k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+1) ) - + xeta*fretd(u(k))*v(k) do 30 j=2,ny-1 k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k-1) + v(k+1) ) - + xeta*fretd(u(k))*v(k) 30 continue k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k-1) ) - + xeta*fretd(u(k))*v(k) end subroutine fnonlm(n, nu, u, savf) * *************************************************** * establish the nonlinear system of equations for nksol * to solve. * *************************************************** integer n,nu,nx,ny,i,j,k double precision u(nu),savf(n) double precision xlam,xeta,fret common/epscal/xlam,xeta common/ijsize/nx,ny * first column savf(1) = (1.D0 + 4.D0*xlam)*u(1) - + xlam*( u(ny+1) + u(2) + u(ny) + 1.D0 ) - + xeta*fret(u(1)) - + u(n+1) do 10 j=2,ny-1 savf(j) = (1.D0 + 4.D0*xlam)*u(j) - + xlam*( u(ny+j) + u(j-1) + + u(j+1) +1.D0 ) - + xeta*fret(u(j)) - + u(n+j) 10 continue savf(ny) = (1.D0 + 4.D0*xlam)*u(ny) - + xlam*( u(2*ny) + u(ny-1) + + u(1) + 1.D0 ) - + xeta*fret(u(ny)) - + u(n+ny) * 2nd through the next to the last columns. k=ny do 20 i=2,nx-1 k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k+ny) + + u(k+1) + u(k+ny-1) ) - + xeta*fret(u(k)) - + u(n+k) do 25 j=2,ny-1 k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + + u(k+ny) + + u(k-1) + + u(k+1) ) - + xeta*fret(u(k)) - + u(n+k) 25 continue k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k+ny) + + u(k-1) + u(k-ny+1) ) - + xeta*fret(u(k)) - + u(n+k) 20 continue * last column k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k+1) + u(k+ny-1) ) - + xeta*fret(u(k)) - + u(n+k) do 30 j=2,ny-1 k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k-1) + u(k+1) ) - + xeta*fret(u(k)) - + u(n+k) 30 continue k=k+1 savf(k) = (1.D0 + 4.D0*xlam)*u(k) - + xlam*( u(k-ny) + u(k-1) + u(k-ny+1) ) - + xeta*fret(u(k)) - + u(n+k) end subroutine knowtm(uold,ukw,nx,ny,type) * ********************************************************* * calculate the diffusion and reaction terms at the previous * time step and add them up, multiply by dt/2. * This term contributes to the rhs of the Crank-Nicholson * and also to the Taylor expansion in the flux calcuation. * ********************************************************* integer i,j,nx,ny,type double precision uold(0:nx+1,0:ny+1),ukw(nx,ny), + xlam,xeta,fret common/epscal/xlam,xeta call boundy(uold,nx,ny,type) do 20 j=1,ny do 20 i=1,nx ukw(i,j) = xlam*( uold(i-1,j) + + uold(i+1,j) + + uold(i,j-1) + + uold(i,j+1) - + 4.D0*uold(i,j) ) + + xeta*fret(uold(i,j)) 20 continue end subroutine feqrhs(n,nu,u,uold,pex,nx,ny) * **** **** **** * give an initial guess and form the forcing term in the equation. * **** **** **** integer n,nu,i,j,k,nx,ny double precision u(nu),uold(0:nx+1,0:ny+1),pex(nx,ny) k = 0 do 5 i=1,nx do 5 j=1,ny k = k+1 u(k) = uold(i,j) u(n+k) = uold(i,j) + pex(i,j) 5 continue end subroutine renewp(uf,pf,nu,nx,ny,type) * **** **** **** * update the solution and enforce the periodic condition. * **** **** **** integer nu,nx,ny,i,j,k,type double precision uf(nu),pf(0:nx+1,0:ny+1) k = 0 do 10 i=1,nx do 10 j=1,ny k = k+1 pf(i,j) = uf(k) 10 continue call boundy(pf,nx,ny,type) end subroutine residu(savf,n,h,res) * **** **** **** * calculate the residual of the nonlinear equations. * **** **** **** integer n,j double precision savf(n),h,res res = 0.D0 do 10 j=1,n res = res + savf(j)*savf(j) 10 continue res = h*dsqrt(res) end subroutine output(pf,nx,ny,ku) integer ku,nx,ny,i,j double precision pf(0:nx+1,0:ny+1) write(ku,103) ((pf(i,j),i=1,nx),j=1,ny) 103 format(e14.8) end subroutine contin(pf,nx,ny) integer nx,ny,i,j double precision pf(0:nx+1,0:ny+1) read(7,103) ((pf(i,j),i=1,nx),j=1,ny) do 10 j=0,ny+1 pf(0,j) = 0.D0 pf(nx+1,j) = 1.D0 10 continue do 20 i=1,nx pf(i,0) = pf(i,ny) pf(i,ny+1) = pf(i,1) 20 continue 103 format(e14.8) end function fret(u) double precision fret,u fret = u*(1.D0-u) end function fretd(u) double precision fretd,u fretd = 1.D0 - 2.D0*u end subroutine conved(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny) * ********************************************************** * * nonsplit second-order upwind scheme to calculate the flux term * * on input, pex is 1/2( grad^2 u + 1/f(u)), and on output * pex = pex - (u cdot pf)^{n+1/2} * * velocity field given on a staggered grid: * * --- ve(i,j) -- --- ph(i,j) --- * | | | | * | | | | * ue u(v)c(i,j) ue(i,j) pv pf(i,j) pv(i,j) * | | | | * | | | | * ----- ve ----- ----- ph ------ * * ********************************************************** integer i,j,nx,ny double precision dth,slimit double precision ue(0:nx,ny),ve(nx,0:ny), + uc(nx,ny), vc(nx,ny), + pv(0:nx,ny),ph(nx,0:ny), + px(nx,ny),py(nx,ny), + pf(0:nx+1,0:ny+1),pex(nx,ny) * construct 1st derivatives with limiter in the x (normal) direction * and upwind in the y (transverse) direction do 5 j=1,ny do 5 i=1,nx px(i,j) = slimit(pf(i,j)-pf(i-1,j),pf(i+1,j)-pf(i,j)) if ( vc(i,j) .gt. 0.D0) then py(i,j) = pf(i,j) - pf(i,j-1) else py(i,j) = pf(i,j+1) - pf(i,j) end if 5 continue * approximate vertical edge values. do 10 j=1,ny do 10 i=1,nx-1 if (ue(i,j) .gt. 0.D0) then pv(i,j) = pf(i,j) + 0.5D0*( (1.D0 - + dth*uc(i,j) )*px(i,j) - + dth*vc(i,j) *py(i,j) ) + + pex(i,j) else pv(i,j) = pf(i+1,j) - 0.5D0*( (1.D0 + + dth*uc(i+1,j) )*px(i+1,j) + + dth*vc(i+1,j) *py(i+1,j) ) + + pex(i+1,j) end if 10 continue do 12 j=1,ny pv(0,j) = 0.D0 pv(nx,j) = 0.D0 12 continue * construct 1st derivatives with limiter in the y (normal) direction * and upwind in the x (transverse) direction do 15 j=1,ny do 15 i=1,nx * approximate horizontal edge values. py(i,j) = slimit(pf(i,j)-pf(i,j-1),pf(i,j+1)-pf(i,j)) if (uc(i,j) .gt. 0.D0) then px(i,j) = pf(i,j) - pf(i-1,j) else px(i,j) = pf(i+1,j) - pf(i,j) end if 15 continue do 20 j=1,ny-1 do 20 i=1,nx if (ve(i,j) .gt. 0.D0) then ph(i,j) = pf(i,j) + 0.5D0*( (1.D0 - + dth*vc(i,j) )*py(i,j) - + dth*uc(i,j) *px(i,j) ) + + pex(i,j) else ph(i,j) = pf(i,j+1) - 0.5D0*( (1.D0 + + dth*vc(i,j+1) )*py(i,j+1) + + dth*uc(i,j+1) *px(i,j+1) ) + + pex(i,j+1) end if 20 continue do 25 i=1,nx ph(i,0) = 0.D0 ph(i,ny) = 0.D0 25 continue * calculate fluxes and subtract it from the forcing term. do 30 j=1,ny do 30 i=1,nx pex(i,j) = pex(i,j) - dth*( + ue(i,j)*pv(i,j) - ue(i-1,j)*pv(i-1,j) + + ve(i,j)*ph(i,j) - ve(i,j-1)*ph(i,j-1) ) 30 continue end subroutine convem(ue,ve,uc,vc,pf,pex,px,py,pv,ph,dth,nx,ny) * ********************************************************** * * nonsplit second-order upwind scheme to calculate the flux term * * on input, pex is dt/2( eps grad^2 u + 1/eps f(u)), and on output * pex = pex - (u cdot pf)^{n+1/2} * * velocity field given on a staggered grid: * * --- ve(i,j) -- --- ph(i,j) --- * | | | | * | | | | * ue u(v)c(i,j) ue(i,j) pv pf(i,j) pv(i,j) * | | | | * | | | | * ----- ve ----- ----- ph ------ * * Dirichlet in x, periodic in y * ********************************************************** integer i,j,nx,ny double precision dth,slimit double precision ue(0:nx,ny),ve(nx,0:ny), + uc(nx,ny), vc(nx,ny), + pv(0:nx,ny),ph(nx,0:ny), + px(nx,ny),py(nx,ny), + pf(0:nx+1,0:ny+1),pex(nx,ny) * construct 1st derivatives with limiter in the x (normal) direction * and upwind in the y (transverse) direction do 5 j=1,ny do 5 i=1,nx px(i,j) = slimit(pf(i,j)-pf(i-1,j),pf(i+1,j)-pf(i,j)) if ( vc(i,j) .gt. 0.D0) then py(i,j) = pf(i,j) - pf(i,j-1) else py(i,j) = pf(i,j+1) - pf(i,j) end if 5 continue * approximate vertical edge values. do 10 j=1,ny do 10 i=1,nx-1 if (ue(i,j) .gt. 0.D0) then pv(i,j) = pf(i,j) + 0.5D0*( (1.D0 - + dth*uc(i,j) )*px(i,j) - + dth*vc(i,j) *py(i,j) ) + + pex(i,j) else pv(i,j) = pf(i+1,j) - 0.5D0*( (1.D0 + + dth*uc(i+1,j) )*px(i+1,j) + + dth*vc(i+1,j) *py(i+1,j) ) + + pex(i+1,j) end if 10 continue do 12 j=1,ny pv(0,j) = 1.D0 pv(nx,j) = 0.D0 12 continue * construct 1st derivatives with limiter in the y (normal) direction * and upwind in the x (transverse) direction do 15 j=1,ny do 15 i=1,nx * approximate horizontal edge values. py(i,j) = slimit(pf(i,j)-pf(i,j-1),pf(i,j+1)-pf(i,j)) if (uc(i,j) .gt. 0.D0) then px(i,j) = pf(i,j) - pf(i-1,j) else px(i,j) = pf(i+1,j) - pf(i,j) end if 15 continue do 20 j=1,ny-1 do 20 i=1,nx if (ve(i,j) .gt. 0.D0) then ph(i,j) = pf(i,j) + 0.5D0*( (1.D0 - + dth*vc(i,j) )*py(i,j) - + dth*uc(i,j) *px(i,j) ) + + pex(i,j) else ph(i,j) = pf(i,j+1) - 0.5D0*( (1.D0 + + dth*vc(i,j+1) )*py(i,j+1) + + dth*uc(i,j+1) *px(i,j+1) ) + + pex(i,j+1) end if 20 continue j=ny do 21 i=1,nx if (ve(i,j) .gt. 0.D0) then ph(i,j) = pf(i,j) + 0.5D0*( (1.D0 - + dth*vc(i,j) )*py(i,j) - + dth*uc(i,j) *px(i,j) ) + + pex(i,j) else ph(i,j) = pf(i,1) - 0.5D0*( (1.D0 + + dth*vc(i,1) )*py(i,1) + + dth*uc(i,1) *px(i,1) ) + + pex(i,1) end if 21 continue do 25 i=1,nx ph(i,0) = ph(i,ny) 25 continue * calculate fluxes and subtract it from the forcing term. do 30 j=1,ny do 30 i=1,nx pex(i,j) = pex(i,j) - dth*( + ue(i,j)*pv(i,j) - ue(i-1,j)*pv(i-1,j) + + ve(i,j)*ph(i,j) - ve(i,j-1)*ph(i,j-1) ) 30 continue end function slimit(ul,ur) * *********************************************************** * * limiter definition in the interior * * ul,ur -> values on the left and right sides * slimit <- resolved value * * *********************************************************** double precision ul,ur,dut,slimit if ( ul*ur .le. 0.D0 ) then slimit = 0.D0 else dut = 0.5D0*(ul+ur) slimit = dsign( dmin1( + dabs(dut), + 2.D0*dabs(ul), + 2.D0*dabs(ur) ), + dut ) end if end subroutine initia(pf,ue,ve,uc,vc,nx,ny) integer i,j,nx,ny double precision pf(0:nx+1,0:ny+1),ue(0:nx,ny),ve(nx,0:ny), + uc(nx,ny),vc(nx,ny) double precision rini,xl,yl,x,y,r,frtini,pi,h,hinv common/meshsz/h,hinv common/valupi/pi do 30 i=0,nx+1 x = (dble(i)-0.5D0)*h do 30 j=0,ny+1 y = -1.D0 + (dble(j)-0.5D0)*h c pf(i,j) = frtini(x-0.5*dsin(pi*y)) pf(i,j) = frtini(x) 30 continue do 40 j=1,ny do 40 i=1,nx uc(i,j) = 0.D0 vc(i,j) = 0.D0 40 continue do 50 j=1,ny do 50 i=0,nx ue(i,j) = 0.D0 50 continue do 60 j=0,ny do 60 i=1,nx ve(i,j) = 0.D0 60 continue end function frtini(r) * ************************************************************ * This function gives the initial front profile by connecting * 0 to 1 by half period of sin(x). * rini: initial front position, * th: initial front thickness. * ************************************************************ double precision r,rini,thl,frtini,hdel,pi common/posini/rini,thl common/valupi/pi hdel = 0.5D0*thl if ( r-rini .ge. hdel) then frtini = 0.D0 elseif ( r-rini .le. -hdel ) then frtini = 1.D0 else frtini = 0.5D0*(1.D0 - dsin((r-rini)*pi/thl)) endif end subroutine shearf(ue,ve,uc,vc,uvm,nx,ny,rms) * *************************************************** * read the velocity field data * *************************************************** integer nx,ny,i,j,n_peri,k double precision ue(0:nx,ny),ve(nx,0:ny), + uc(nx,ny),vc(nx,ny) double precision uvm,rms,uy,pih,piy,ucoef uvm = 0.D0 pih = 8.D0*datan(1.D0)/dfloat(ny) ucoef = dsqrt(2.D0)*rms do 10 j=1,ny piy = (dfloat(j)-0.5D0)*pih uy = ucoef*dsin(piy) uvm = dmax1(dabs(uy),uvm) do 10 i=0,2*ny ue(i,j) = uy 10 continue do 20 j=0,ny do 20 i=1,2*ny ve(i,j) = 0.D0 20 continue do 5 k=1,n_peri-1 do 30 j=1,ny do 30 i=1,2*ny ue(2*k*ny+i,j) = ue(i,j) 30 continue do 40 j=0,ny do 40 i=1,2*ny ve(2*k*ny+i,j) = 0.D0 40 continue 5 continue do 60 j=1,ny do 60 i=1,nx uc(i,j) = ue(i,j) vc(i,j) = 0.D0 60 continue return end subroutine readuf(ue,ve,uc,vc,uvm,nx,ny,rms,t,alpha,eps,h) * *************************************************** * read the velocity field data * *************************************************** integer nx,ny,i,j,n_peri,k,kv_cell double precision ue(0:nx,ny),ve(nx,0:ny), + uc(nx,ny),vc(nx,ny) double precision uvm,rms,u,v,t,eps,alpha,h,fr,yy,ym, + amp common/meanfd/amp uvm = 0.D0 fr= 16.D0*datan(1.D0) yy= -1.D0 + 0.5D0*h do 15 j=1,ny ym = yy + amp*t do 10 i=0,nx c ue(i,j) = ue(i,j) + dsin(eps**(-alpha) * t) ue(i,j) = -2.D0 + rms*dsin(fr*ym) 10 continue yy=yy+h 15 continue uvm = 2.D0 + rms do 25 j=0,ny do 20 i=1,nx ve(i,j) = 0. 20 continue 25 continue do 65 j=1,ny do 60 i=1,nx uc(i,j) = 0.5D0*(ue(i-1,j)+ue(i,j)) vc(i,j) = 0.5D0*(ve(i,j-1)+ve(i,j)) 60 continue 65 continue end subroutine posfrt(pf,frt,nx,ny) * use the moment to calculate the front position integer nx,ny,i,jj,nms double precision pf(0:nx+1,0:ny+1) double precision frt,ax,a,h,hinv,x,du common/meshsz/h,hinv frt = 0.D0 nms = 0 do 5 jj=1,ny if(mod(jj,5) .eq. 0) then nms = nms + 1 a = pf(1,jj)-1.D0 ax = 0.D0 do 10 i=1,nx-1 du = pf(i+1,jj) - pf(i,jj) x = dfloat(i)*h a = a + du ax = ax + du*x 10 continue du = - pf(nx,jj) a = a + du ax = ax + du*dfloat(nx)*h frt = frt + ax/a endif 5 continue frt = frt/dfloat(nms) end subroutine ushift(ue,uc,nx,ny) * add the constant velocity (2,0) to work in the moving coordinates. integer nx,ny,i,j double precision ue(0:nx,ny),uc(nx,ny) do 10 j=1,ny do 12 i=1,nx uc(i,j) = uc(i,j) - 2.D0 12 continue do 14 i=0,nx ue(i,j) = ue(i,j) - 2.D0 14 continue 10 continue end subroutine nksol (n, nu, u, savf, f, jac, su, sf, ftol, stptol, * rwork, lrw, iwork, liw, iopt, iterm, pset, * psol, mf, mdif, ipflag) c----------------------------------------------------------------------- c this is the february 26, 1988 version of c nksol.. a nonlinear krylov solver for nonlinear systems of c equations of the form f(u) = 0. c c this version is in double precision. c c authors.. c c peter n. brown c computing and mathematics research division c lawrence livermore national laboratory c livermore, ca 94550 c and c youcef saad c center for supercomputer research and development c university of illinois c urbana, il 61801 c c references.. c 1. peter n. brown and youcef saad c hybrid krylov methods for nonlinear systems of equations c llnl report ucrl-97645, november 1987. c c 2. john e. dennis and robert b. schnabel c numerical methods for unconstrained optimization and nonlinear c equations, prentice-hall, englewood cliffs, nj, 1983. c (this reference contains the basic algorithms for the c stopping tests, dogleg and linesearch strategies used c in nksol.) c c----------------------------------------------------------------------- c Modifications: c c [29-Mar-1993] Removal of implicit data typing. c This version should now compile with the fortran c options -u -C. The call lists now have one additional c integer, nu following n. c c nu - length of u array c -u is the explicit data typing option c -C is the array bound checking c c [12-May-1993] Addition of two stopping criterion for the linear c krylov iteration. The previously unused input variable c iwork(9) now holds the option (described below). c c Author - Jeff McGough c c This code compiles on: DEC station 5000 c SPARC station 1 c SGI Indigo c Author - Jeff McGough c c----------------------------------------------------------------------- c introduction. c c nksol solves nonlinear systems f(u)=0 rewritten in the form c c sf*f((su-inverse)*ubar) = 0, ubar = su*u, c c where f and u are n-vectors, and sf and su are diagonal scaling c matrices with positive diagonal entries. nksol uses an inexact c newton method as the basic nonlinear iteration, where the newton c equations are solved only approximately by a linear krylov iteration, c coupled with either a linesearch or dogleg global strategy. the c user may optionally choose either arnoldi-s method (with linesearch c backtracking) or the generalized minimum residual method (gmres) c (with either the linesearch or dogleg strategy) as the krylov c iteration technique, with or without preconditioning. c c the scaling matrices su and sf should be chosen so that the vector c su*u has all its components with roughly the same magnitude when u c is close to a root of f, and similarly for sf*f(u) when u is not too c near a root of f. c c c nksol generates a sequence of approximations u(k) to a root of f. c the stopping criteria for the nonlinear iteration are the c following.. c c 1. maxnorm( sf*f(u(k)) ) .le. ftol, c c where maxnorm() is the maximum norm function and ftol is a c user-supplied stopping tolerance. c c 2. max(abs(u(k,i)-u(k-1,i))/max(abs(u(k,i)),1/su(i))) .le. stptol, c i=1,...,n c c where u(j,i) is the i-th component of u(j) (j=k,k-1) , su(i) is c the i-th diagonal element of the matrix su, and stptol is a user- c supplied stopping tolerance. the above test measures the length c of the scaled distance between the last two iterates. c c c the scaled/preconditioned newton equations are of the form c c (sf*j*(p-inverse)*(su-inverse))*(su*p*x) = sf*b, c c where j = j(u) = df/du is the system jacobian matrix evaluated at the c current iterate u, b = -f(u), and p is a preconditioner matrix c (applied on the right). the solution x is the approximate newton c step. c c the linear krylov iteration generates a sequence x(m) of c approximate solutions to the newton equations within an m-dimensional c krylov subspace (m = 1,2,...). the stopping test for the linear c krylov iteration is c c norm(sf*(b - j*x(m))) .le. etak*norm(sf*b), c c where norm() is the euclidean norm, b = -f(u(k)), j = j(u(k)), c and by default etak = 0.5**k (or user choosen through iwork(9)). c the counter ncfl counts the number of times this condition c is not met within mmax linear iterations. (see the optional inputs c and outputs sections below for the definitions of mmax and ncfl.) c c----------------------------------------------------------------------- c full description of user interface to nksol. c c the user interface to nksol consists of the following parts. c c i. the call sequence to subroutine nksol, which is a driver c routine for the solver. this includes descriptions of both c the call sequence arguments and of user-supplied routines. c following these descriptions is a description of c optional inputs available through the call sequence, and then c a description of optional outputs (in the work arrays). c c ii. description of two routines in the nksol package which c the user may replace with his own versions, if desired. c these relate to the above stopping criteria. c c----------------------------------------------------------------------- c part i. call sequence. c c the call sequence parameters used for input only are c n, nu, f, jac, su, sf, ftol, stptol, lrw, liw, iopt, pset, psol, mf, c mdif, ipflag, c those used for both input and output are c u, c and those used only for output are c savf, iterm. c the work arrays rwork and iwork are also used for conditional and c optional inputs and optional outputs. (the term output here refers c to the return from subroutine nksol to the user-s calling program.) c c the legality of input parameters will be thoroughly checked on each c call to nksol. c c the descriptions of the call arguments are as follows. c c n = integer scalar containing the size of the nonlinear system, c and the length of arrays savf, su and sf. c c nu = integer scalar containing the length of array u. c c u = real array of length nu containing the initial c guess to a root of f(u) = 0 on input, and the approximate c solution on return. c c this array is passed as the u argument in all calls to c f, jac, pset and psol. hence its length may exceed n, and c locations u(n+1),... may be used to store other real data c and pass it to f (or jac), pset, and/or psol. c c savf = real array of length n containing f(u) on return from c nksol, and undefined on input. c c f = the name of the user-supplied subroutine for defining the c nonlinear system f(u) = 0. f is a vector-valued function c of the vector u. subroutine f is to compute the function c f. it is to have the form c c subroutine f(n, nu, u, savf) c dimension u(nu), savf(n) c c where n, nu, u are input and the array savf = f(u) is output. c subroutine f should not alter u. f must be declared c external in the calling program. c c subroutine f may access user-defined quantities in c u(n+1),... if u has length exceeding n (nu) in the calling c program. see description of u above. c c jac = the name of the optional user-supplied subroutine for c calculating j(u)*v, where j(u) is the jacobian matrix of c f evaluated at u. subroutine jac returns j(u)*v for a c given vector v. it is to have the form c c subroutine jac (n, nu, u, savf, v, z) c dimension u(nu), savf(n), v(n), z(n) c c where n, nu, u, savf and v are input, and z = j(u)*v as c output. the array u contains the last newton iterate and c savf = f(u). subroutine jac should not alter u or savf. c jac must be declared external in the calling program, and c is only called when mdif=1. when mdif=0, a dummy routine c may be supplied for jac. c c subroutine jac may access user-defined quantities in c u(n+1),... if u has length exceeding n in the calling c program. see description of u above. c c su = real array of length n containing scale factors for the c solution vector u. c c sf = real array of length n containing scale factors for the c function value f(u). c c arrays su and sf can be the same array in the user-s c calling program. if the user does not want to use scaling, c then su(i) and sf(i) should be loaded with 1 for all i. c c ftol = real scalar containing the stopping tolerance on c maxnorm(sf*f(u)). if ftol=zero on input, then c a default value of epsmch**(1/3) will be used, where c epsmch is the machine epsilon (or unit roundoff). c c stptol = real scalar containing the stopping tolerance on the c minimum scaled step u(k) - u(k-1). if stptol=zero on c input, then a default value of epsmch**(2/3) will be c used, where epsmch is the machine epsilon (or unit c roundoff). c c mf = integer scalar containing the nonlinear iteration c method flag. c mf = 1 means the dogleg global strategy with gmres is used. c mf = 2 means the linesearch backtracking with descent c direction chosen by arnoldi is used. c mf = 3 means the linesearch backtracking with descent c direction chosen by gmres is used. c c rwork = a real working array (double precision). c the length of rwork must be at least c 4 + 4*n + lenk + lenwmp, c where c lenk = 4 + (4+mmax)*n + mmax**2, if mf=2, c lenk = 6 + 4*n + (n+1)*mmax + 2*mmax*(mmax+1), if mf=1 or 3. c lenwmp is the length of the user-defined workspace for the c preconditioner routines pset and psol. for the default c value of mmax(=10), c lenk = 104 + 14*n, if mf = 2, and c lenk = 236 + 14*n, if mf = 1 or 3. c c lrw = integer scalar containing the length of the array rwork, c as declared by the user. (this will be checked by the c solver). c c iwork = an integer work array. the length of iwork must be at least c 20 + mmax + lenimp, c where lenimp is the length of the user-defined integer work c space for the preconditioner routines pset and psol. c for the default value of mmax(=10), the length of iwork c must be at least 30 + lenimp. c c liw = integer scalar containing the length of the array iwork, c as declared by the user. (this will be checked by the c solver.) c c iopt = an integer flag to specify whether or not any optional c inputs are being used. c iopt=0 means there are no optional inputs. c default values will be used in all cases. c iopt .ne. 0 means one or more optional inputs are being c used. see the optional inputs section below c for more information. c c iterm = output flag. c iterm= 1 means maxnorm(sf*f(u)) .le. ftol, where c maxnorm() is the maximum norm function. u c is probably an approximate root of f. c iterm= 2 means the scaled distance between the last two c steps is less than stptol. u may be an c approximate root of f, but it is also possible c that the algorithm is making very slow progress c and is not near a root, or that stptol is too c large. c iterm= 3 means iret=1 was returned from either the dogdrv c or lnsrch module, and that the last global step c failed to reduce norm(f) sufficiently. either u c is close to a root of f and no more accuracy is c possible, or the finite-difference approximation c to j*v is inaccurate, or stptol is too large. c if the ncfl optional output value (see below) is c close to the nni value, it may be the case that c the krylov iteration is converging very slowly. c in this case, the user may want to use c preconditioning and/or increase the mmax c value (i.e., increase the maximum dimension of the c krylov subspace.) c iterm= 4 means that the maximum allowable number of c nonlinear iterations has been reached. this c is currently set at 200, but may be changed as c an optional input. see below. c iterm= 5 means 5 consecutive steps of length stepmx (the c maximum stepsize limit) have been taken. either c norm(f) asymptotes from above to a finite value c in some direction, or stepmx is too small. stepmx c is computed internally as c stepmx = 1000*max(norm(su*u0),norm(su)), c where u0 is the initial guess, and norm() is the c euclidean norm. norm(su) here means the euclidean c norm of the n-dimensional array su. stepmx may c also be set as an optional input. see below. c iterm= 6 means that more than 10 failures occurred when c trying to satisfy the beta-condition in the c linesearch algorithm. it is likely that the c iteration is making poor progress. c iterm= 7 means that there was a breakdown in the krylov c iteration. this will likely only occur when c the jacobian matrix j or j*(p-inverse) is ill- c conditioned. if this error return occurs with c mf=2, try either mf=1 or mf=3 instead. c iterm= 8 means there was a nonrecoverable error in pset c causing the iteration to halt. c iterm= 9 means there was a nonrecoverable error in psol c causing the iteration to halt. c iterm=-1 means the mf value was illegal. the allowable c mf values are 1, 2 or 3. c iterm=-2 means the mdif value was illegal. the allowable c mdif values are 0 and 1. c iterm=-3 means the ipflag value was illegal. the allowable c ipflag values are 0 and 1. c iterm=-4 means that an optional input value in iwork was c negative. only nonnegative values are allowed. c iterm=-5 means that the stepmx optional input value was c negative. stepmx must be .ge. 0. c iterm=-6 means that the eta optional input value was c negative. eta must be .ge. 0. c iterm=-7 means that the tau optional input value was c negative. tau must be .ge. 0. c iterm=-8 means that there was insufficient length declared c for the rwork array. see the lenrw optional output c value for the minimum needed length of rwork. c iterm=-9 means that there was insufficient length declared c for the iwork array. see the leniw optional output c value for the minimum needed length of iwork. c c pset = the name of the optional user-supplied subroutine for c calculating any matrix data associated with the c preconditioner p. it is to have the form c c subroutine pset (n, nu, u, savf, su, sf, wk, f, wmp, iwmp, ier) c dimension u(nu), savf(n), su(n), sf(n), wmp(1), iwmp(1) c c where n, nu, u, and savf are input. f is the name of the c user-supplied subroutine defining f(u). any matrix data c loaded by this routine must be stored in the real and integer c arrays wmp and iwmp. these arrays are passed unaltered c to subroutine psol. the array u contains the last newton c iterate and savf = f(u). wk is a work array of length n c for use in this routine. subroutine pset should not alter c u or savf, and is called infrequently in an effort to use c preconditioner data evaluated at an earlier newton iterate. c pset must be declared external in the calling program, c and is only called when ipflag=1. when ipflag=0, a c dummy routine may be supplied for pset. on return pset c should set the error flag as follows.. c ier = 0 if pset was successful. c ier.ne.0 if an unrecoverable error occurred. for c example, the preconditioner may be singular. c in this case the nonlinear iteration is halted. c note: the lengths of arrays wmp and iwmp must be supplied c as optional inputs in the iwork array. see the c optional inputs section below. c c subroutine pset may access user-defined quantities in c u(n+1),... if u has length exceeding n in the calling c program. see description of u above. c c psol = the name of the optional user-supplied subroutine for solving c the linear system p*x = c. it uses matrix data loaded c in subroutine pset and passed via the arrays wmp and iwmp. c it is to have the form c c subroutine psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier) c dimension u(nu), savf(n), wk(n), wmp(1), iwmp(1) c c where n, nu, u, savf, wmp and iwmp are input. x contains the c right hand side on input and the solution on exit. f is the c name of the user-supplied routine defining f(u), and jac is c the name of the user-supplied routine for calculating j(u)*v. c wk is a real work array of length n available for use in c psol. psol should not alter u or savf, and is only called c when ipflag=1. when ipflag=0, a dummy routine may be c supplied for psol. on return, psol should set the error flag c ier as follows.. c ier = 0 if psol was successful. c ier.lt.0 if an unrecoverable error occurred. in this c case the nonlinear iteration is halted. c ier.gt.0 if a recoverable error occurred, possibly caused c by the preconditioner being out of date. a call c to pset is done, and the solution of the current c linear system is retried. c note: the lengths of arrays wmp and iwmp must be supplied c as optional inputs in the iwork array. see the c optional inputs section below. c c subroutine psol may access user-defined quantities in c u(n+1),... if u has length exceeding n in the calling c program. see description of u above. c c mdif = integer scalar containing the method flag for the c j*v calculation. c mdif=1 means a user-supplied routine jac is used for c calculating the product j*v in the krylov iteration. c mdif=0 means an internally generated j*v is used in the c krylov iteration. this is done by using an f c evaluation and a difference-quotient. c c ipflag = integer scalar indicating if preconditioning is being used. c ipflag=0 means no preconditioning is used. c ipflag=1 means preconditioning is done (on the right only). c c----------------------------------------------------------------------- c optional inputs.. c c the following is a list of the optional inputs provided for in the c call sequence. for each such input variable, this table lists its c name as used in this documentation, its location in the call sequence, c its meaning, and the default value. the use of any of these inputs c requires iopt .ne. 0, and in that case all of these inputs are c examined. a value of zero for any of these optional inputs will c cause the default value to be used. thus to use a subset of the c optional inputs, simply preload locations 1 to 8 in iwork to 0, and c locations 1 to 3 in rwork to 0.0. then set those of interest c to nonzero values. c c name location meaning and default value c c mmax iwork(1) maximum dimension of krylov subspace in the linear c iteration. the default value of mmax is 10. c c iwork(2) not used c c lenwmp iwork(3) length of the real work array wmp for preconditioner c matrix data storage used in subroutines pset and c psol. the default value of lenwmp is 0. c c lenimp iwork(4) length of the integer work array iwmp for c preconditioner matrix data storage used in c subroutines pset and psol. the default value c of lenimp is 0. c c iprint iwork(5) flag indicating whether optional statistics are c desired. c iprint=0 means no statistics are printed. c this is the default. c iprint=1 means the nonlinear iteration count, c norm(sf*f(u)) and number of calls to f are c printed for each nonlinear iterate. c iprint=2 includes the statistics for iprint=1, plus c additional statistics from the krylov c iteration and dogleg/lnsrch strategies. c for the dogleg statistics, tau is the c trust region size, cpl is the length of c of the step to the cauchy point, and gml c is the length of the gmres step. c for the lnsrch statistics, f1 refers to c 0.5*norm(sf*f(u))**2 and f1new refers to c 0.5*norm(sf*f(unew))**2. c c iunit iwork(6) i/o unit number for optional statistics and error c messages. the default value of iunit is 6. c c iermsg iwork(7) flag indicating whether or not error messages are c desired. iermsg .ne. 0 turns off the printing c of error messages. the default value of iermsg c is 0. c c itermx iwork(8) maximum allowable number of nonlinear iterations. c the default value of itermx is 200. c c linerr iwork(9) linear solver error tolerance scheme. the default c value for linerr is 1. c linerr=1 etak = 0.5**k (the default). c linerr=2 etak = min{||F(x_k)||,1/(k+1)}. c linerr=3 etak = min{etak~,.99} where c etak~ = |(||F(x_k)|| - res)|/||F(x_k-1)|| c res = residual for previous linear solve c c stepmx rwork(1) maximum allowable length of a newton step. the c default value of stepmx is c stepmx = 1000*max(norm(su*u0),norm(su)), c where u0 is the initial guess, and norm() is the c euclidean norm. norm(su) here means the euclidean c norm of the n-dimensional array su. c c eta rwork(2) the relative error in computing f(u). this c value is used in the difference-quotient option c for calculating j*v. if the user-s f routine has c large errors associated with the calculation of c f(u), then supplying an accurate eta value will c enhance the accuracy of the difference-quotient. c the default value of eta is the machine epsilon. c c tau rwork(3) real scalar containing the initial trust region c size for the dogleg strategy. tau is the radius c of the m-dimensional ball (m = dimension of the c krylov subspace) in which the nonlinear function c f1(x) = 0.5*norm( sf*f(u+x) )**2 c is well approximated by the local quadratic model c g(x) = 0.5*norm( sf*(f(u) + j(u)*x) )**2 c for x in the krylov subspace, norm() the c euclidean norm, and u the current newton iterate. c if tau=zero on input, then an initial value for c it will be chosen internally. c (tau is not used with the linesearch strategy). c c----------------------------------------------------------------------- c optional outputs. c c as optional additional output from nksol, the variables listed c below are quantities related to the performance of nksol c which are available to the user. these are communicated by way of c the work arrays, but also have internal mnemonic names as shown. c except where stated otherwise, all of these outputs are defined c on return from nksol. on an illegal input return (iterm .lt. 0) c they will be unchanged from their existing values (if any), c except possibly for lenrw and leniw, and the integer counter values c will have been reset to 0. c c name location meaning c c nni iwork(10) the total number of nonlinear iterations. c c nli iwork(11) the total number of linear krylov iterations. c c nfe iwork(12) the total number of calls to the f routine. c c nje iwork(13) the total number of calls to the jac routine. c c npe iwork(14) the total number of calls to pset. c c nps iwork(15) the total number of calls to psol. c c ncfl iwork(16) the total number of times the error tolerance c for the linear krylov iteration was not met c in mmax iterations. c c nbcf iwork(17) the total number of times the beta condition c could not be met in the linesearch algorithm. c the nonlinear iteration is halted if this value c ever exceeds 10. c c nb iwork(18) the total number of backtracks in the linesearch c algorithm, and the number of extra f evaluations c used by the dogleg strategy. nb is calculated as c nb = nfe - nni - (nli - nje) - 1. c c lenrw iwork(19) required minimum length of the rwork array. c c leniw iwork(20) required minimum length of the iwork array. c c stepmx rwork(1) maximum allowable length of a newton step. the c default value of stepmx is c stepmx = 1000*max(norm(su*u0),norm(su)), c where u0 is the initial guess, and norm() is the c euclidean norm. if a value of stepmx .gt. 0 was c input, it is not changed on output. c c fnrm rwork(2) the scaled norm norm(sf*f(u(k))) for the final c computed iterate u(k). c c tau rwork(3) real scalar containing the last used trust region c size for the dogleg strategy. c c----------------------------------------------------------------------- c part ii. optionally replaceable solver routines. c c below are descriptions of two routines in the nksol package which c relate to the stopping criteria for the nonlinear iteration. either c routine may be replaced by a user-supplied version. however, since c such a replacement may have a major impact on performance, it should c be done only when absolutely necessary, and only with great caution. c (note.. the means by which the package version of a routine is c superseded by the user-s version may be system-dependent.) c c (a) snrmf. c the following subroutine is called to compute the scaled norm of c f(u) at the current iterate u.. c subroutine snrmf(n, savf, sf, fnorm) c dimension savf(n), sf(n) c where n and sf are as in the nksol call sequence, savf contains c f(u) with u the current iterate, and fnorm = norm(sf*f(u)) where c norm() is the euclidean norm. the package version of this routines c uses the scaled norm of sf*f(u) defined in stopping criterion 1 above. c c if the user supplies this subroutine, it must return in fnorm c the user-defined scaled norm of f(u). if the user-s scaled norm c of sf*f(u) is simply norm(sf*f(u)), then there is nothing for the c new routine to compute since fnorm already has the desired value c on input. c c (b) slngth. c the following subroutine is called to compute the relative scaled c length of the current step from u to unew = u + s.. c subroutine slngth(n, nu, u, s, su, rlngth) c dimension u(nu), s(n), su(n) c where n, nu, u and su are as in the nksol call sequence, u is the c current newton iterate, and s contains the current step from u c to unew. rlngth is undefined on input. the package version of c this routine computes the relative scaled length defined in stopping c criterion 2 above. c c if the user supplies this subroutine, it must return in rlngth c the user-defined relative scaled length of the step s. c c----------------------------------------------------------------------- c other routines in the package.. c c nkstop stopping routine for the nonlinear iteration. c nkstp0 stopping routine for the initial guess. c slngth computes the relative scaled length of a step in maxnorm. c snrmf computes the maxnorm of sf*f(u). c model driver for approximate solution of newton equations. c solpk interfaces to either spiom or spigmr. c spiom contains the main iteration loop for arnoldi. c spigmr contains the main iteration loop for gmres. c atv calculates j*v for a given vector v. c orthog orthogonalizes a new vector against older basis vectors. c dhefa computes an lu decomposition of a hessenberg matrix. c dhesl solves a hessenberg linear system, using lu factors. c dheqr computes a qr decomposition of a hessenberg matrix. c dhels solves a hessenberg least-squares system, using qr factors. c d1mach computes machine epsilon in double precision. c vnorm computes a scaled euclidean norm of a vector. c dogdrv driver for the dogleg strategy. c dogstp computes the dogleg step for the current trust region. c trgupd decides if step is successfull, and updates trust region size. c lnsrch driver for linesearch backtracking strategy. c errgen outputs error messages. c dnrm2 computes the l2 norm of a vector. c dcopy copies a vector to another vector. c dswap swaps two vectors. c ddot computes the dot product of two vectors. c daxpy adds a scalar times a vector to a vector. c dscal scales a vector by a scalar. c idamax finds the element of largest magnitude in a vector. c note.. dnrm2, dcopy, dswap, ddot, daxpy, dscal, and idamax are from c the blas collection (basic linear algebra modules). c intrinsic fortran routines.. dabs, dsqrt, dmin1, dmax1, dfloat, dsign. c----------------------------------------------------------------------- external f external jac external pset external psol c passed variables integer iprint integer iunit integer iermsg integer n integer nu integer lrw integer iwork integer liw integer iopt integer iterm integer mf integer mdif integer ipflag integer ierr integer lenwmp integer lenimp integer itermx integer i double precision savf double precision u double precision rwork double precision su double precision sf double precision stptol double precision ftol c local variables integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer iret integer lup integer lx integer luprv integer lfprv integer lwm integer lenk integer lenwm integer lenrw integer leniw integer liwm integer leniwm integer nbcfmx integer iter integer ncscmx integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf integer linerr double precision epsmch double precision fnrm double precision fnrmold double precision f1nrm double precision f1nrmp double precision unrm double precision stepmx double precision d1mach double precision vnorm double precision epsfac double precision epsfac1 double precision sunrm double precision dnrm2 double precision tau dimension savf(n) dimension u(nu) dimension rwork(lrw) dimension su(n) dimension sf(n) dimension iwork(liw) double precision zero double precision one double precision two double precision three logical mxtkn c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- double precision eps, sqteta, rhom common /nks001/ eps, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf c----------------------------------------------------------------------- c nks002 common block. c----------------------------------------------------------------------- common /nks002/ iprint, iunit, iermsg c----------------------------------------------------------------------- c nks003 common block. c----------------------------------------------------------------------- double precision pthrsh common /nks003/ pthrsh c save data zero/0.0d0/,one/1.0d0/,two/2.0d0/,three/3.0d0/ c----------------------------------------------------------------------- c zero counters in common block nks001. c----------------------------------------------------------------------- nni = 0 nli = 0 npe = 0 nps = 0 ncfl = 0 nbcf = 0 nfe = 0 nje = 0 c----------------------------------------------------------------------- c load values in nks002 common block. c----------------------------------------------------------------------- iprint = 0 iunit = 6 iermsg = 0 c----------------------------------------------------------------------- c load pthrsh value in nks003 common block. c----------------------------------------------------------------------- pthrsh = two c======================================================================= c block a: initialization section. c======================================================================= c----------------------------------------------------------------------- c compute machine epsilon. c----------------------------------------------------------------------- epsmch = d1mach(4) c----------------------------------------------------------------------- c initialize parameters. check for illegal input. c----------------------------------------------------------------------- if (nu.lt.n) then iterm = -1 ierr = 130 call errgen(ierr,zero,zero,0,0) go to 500 endif if ( (mf .lt. 1) .or. (mf .gt. 3) ) then c illegal value of method flag mf. iterm = -1 ierr = 10 call errgen(ierr,zero,zero,0,0) go to 500 endif c----------------------------------------------------------------------- c load common variables methn and methk. c methn = 1, dogleg global strategy used. c 2, linesearch backtracking global strategy used. c methk = 1, arnoldi algorithm used. c 2, gmres algorithm used. c----------------------------------------------------------------------- if (mf .eq. 1) then methn = 1 methk = 2 else methn = 2 methk = mf - 1 endif if ( (mdif .lt. 0) .or. (mdif .gt. 1) ) then c illegal value of method flag mdif. iterm = -2 ierr = 20 call errgen(ierr,zero,zero,0,0) go to 500 endif c load common variable mfdif. mfdif = 2 - mdif if ( (ipflag .lt. 0) .or. (ipflag .gt. 1) ) then c illegal value of ipflag. iterm = -3 ierr = 30 call errgen(ierr,zero,zero,0,0) go to 500 endif c load common variable ipflg. ipflg = ipflag c linerr = 1 lenwmp = 0 lenimp = 0 mmax = 10 kmp = mmax stepmx = zero itermx = 0 sqteta = zero tau = zero c check for optional inputs. if (iopt .ne. 0) then c first check for illegal values. do 10 i = 1,9 if (iwork(i) .lt. 0) then iterm = -4 ierr = 40 call errgen(ierr,zero,zero,i,iwork(i)) go to 500 endif 10 continue do 20 i = 1,3 if (rwork(i) .lt. zero) then iterm = -(i+4) ierr = 50 call errgen(ierr,rwork(i),zero,i,0) go to 500 endif 20 continue if (iwork(1) .gt. 0) mmax = iwork(1) if (iwork(2) .gt. 0) kmp = iwork(2) if (iwork(3) .gt. 0) lenwmp = iwork(3) if (iwork(4) .gt. 0) lenimp = iwork(4) if (iwork(5) .gt. 0) iprint = iwork(5) if (iwork(6) .gt. 0) iunit = iwork(6) if (iwork(7) .gt. 0) iermsg = iwork(7) if (iwork(8) .gt. 0) itermx = iwork(8) if (iwork(9) .gt. 0) linerr = iwork(9) if (rwork(1) .gt. zero) stepmx = rwork(1) if (rwork(2) .gt. zero) sqteta = dsqrt(rwork(2)) if (rwork(3) .gt. zero) tau = rwork(3) endif mmax = min0(mmax,n) kmp = mmax lup = 4 lx = lup + n luprv = lx + n lfprv = luprv + n lwm = lfprv + n if (methk .eq. 1) then lenk = 1 + 3*n + 3 + n*mmax + n + mmax**2 else if (methk .eq. 2) then lenk = 1 + 3*n + 3 + n*mmax + n + 2 + 2*mmax*(mmax+1) + mmax endif lenwm = lenk + lenwmp if (lenwmp .ne. 0) then locwmp = lenk + 1 else locwmp = lenk endif lenrw = lwm + lenwm if (lrw .lt. lenrw) then c insufficient storage in rwork. iterm = -8 ierr = 100 call errgen(ierr,zero,zero,lrw,lenrw) go to 500 endif c leniw = 20 + mmax + lenimp if (liw .lt. leniw) then c insufficient storage in iwork iterm = -9 ierr = 110 call errgen(ierr,zero,zero,liw,leniw) go to 500 endif liwm = 21 leniwm = mmax + lenimp if (lenimp .ne. 0) then locimp = mmax + 1 else locimp = mmax endif if (itermx .eq. 0) itermx = 200 nbcfmx = 10 if (stptol .eq. zero) stptol = epsmch**(two/three) if (stepmx .eq. zero) then unrm = vnorm(n,u,su) sunrm = dnrm2(n,su,1) stepmx = 1.d3*dmax1(unrm,sunrm) endif if (ftol .eq. zero) ftol = epsmch**(one/three) if (sqteta .eq. zero) sqteta = dsqrt(epsmch) c load initial trust region size tau. if (tau .le. zero) then tau = -one endif iter = 0 c----------------------------------------------------------------------- c call subroutine f to evaluate f at the initial guess. c----------------------------------------------------------------------- call f(n, nu, u, savf) nfe = nfe + 1 fnrm = vnorm(n,savf,sf) f1nrm = fnrm*fnrm/two fnrmold = fnrm c----------------------------------------------------------------------- c test to see if initial conditions satisfy stopping criterion. c----------------------------------------------------------------------- call nkstp0(n, savf, sf, ftol, fnrm, iterm) if (iterm .ne. 0) go to 500 ncscmx = 0 c======================================================================= c block b: iteration section. c======================================================================= 100 continue iter = iter + 1 c----------------------------------------------------------------------- c call model to calculate the approximate newton step using the c appropriate krylov algorithm. c----------------------------------------------------------------------- c modification [12-May-1993] error tol set to h. walker criterion c----------------------------------------------------------------------- c c original criterion - superlinear convergence c if(linerr.eq.1) then epsfac = 0.5d0**(iter) endif c c ds83 criterion - quadratic convergence c if(linerr.eq.2) then epsfac1 = 1.0d0/dble(iter+1) epsfac = dmin1(epsfac1,fnrm) endif c c walker criterion - ?? convergence c if(linerr.eq.3) then if(iter.eq.1) then epsfac = 0.5d0 else epsfac1 = (fnrm - dabs(rhom))/fnrmold epsfac = dmin1(epsfac1,0.99d0) endif endif eps = (epsmch + epsfac)*fnrm call model(n,nu,rwork(lwm),lenwm,iwork(liwm),leniwm,u,savf, * rwork(lx),f,jac,su,sf,pset,psol) if (iersl .ne. 0) then iterm = iersl go to 500 endif c----------------------------------------------------------------------- c call dogdrv or lnsrch to get an acceptable step x. c----------------------------------------------------------------------- if (methn .eq. 1) then call dogdrv (n,nu,rwork(lwm),lenwm,iwork(liwm),leniwm,u, * savf,f1nrm,rwork(lx),su,sf,stepmx,stptol,tau,iret, * rwork(luprv),rwork(lfprv),rwork(lup),f1nrmp, * mxtkn,f,jac,psol) if (iersl .ne. 0) then iterm = iersl go to 500 endif else call lnsrch(n,nu,u,savf,f1nrm,rwork(lx),su,sf,stepmx, * stptol,iret,rwork(lup),f1nrmp,mxtkn,f,jac) if (nbcf .gt. nbcfmx) then iterm = 6 ierr = 120 call errgen(ierr,zero,zero,nbcf,nbcfmx) go to 500 endif endif c----------------------------------------------------------------------- c call nkstop to check if tolerances are met. c----------------------------------------------------------------------- fnrmold = fnrm fnrm = dsqrt(two*f1nrmp) call nkstop(n,nu,u,rwork(lup),savf,fnrm,su,sf,stptol, * rwork(lx),ftol,iret,iter,itermx,mxtkn,ncscmx,iterm) c do 300 i = 1,n u(i) = rwork(i+lup-1) 300 continue f1nrm = f1nrmp if (iprint .ge. 1) write(iunit,400) iter,fnrm,nfe 400 format(' iter= ',i4,' fnrm= ',g26.16,' nfe= ',i6) if (iterm .eq. 0) go to 100 c----------------------------------------------------------------------- c load optional outputs into iwork array and return. c----------------------------------------------------------------------- 500 continue iwork(10) = nni iwork(11) = nli iwork(12) = nfe iwork(13) = nje iwork(14) = npe iwork(15) = nps iwork(16) = ncfl iwork(17) = nbcf iwork(18) = nfe - nni - (nli - nje) - 1 iwork(19) = lenrw iwork(20) = leniw rwork(1) = stepmx rwork(2) = fnrm rwork(3) = tau return c----------------------- end of subroutine nksol ----------------------- end subroutine nkstop(n, nu, u, unew, savf, fnorm, su, sf, stptol, * wk, ftol, iret, iter, itermx, mxtkn, ncscmx, iterm) c----------------------------------------------------------------------- c this routine decides whether to terminate nksol based on one of c several stopping criteria described below. (see description of c iterm.) c c on entry c c n = size of the nonlinear system the length of arrays c unew, savf, su and sf. c c nu = length of array u. c c u = real array containing the previous approximate root. c c unew = real array containing the current approximate root. c c savf = real array containing the n-vector f(unew). c c fnorm = real scalar containing norm(sf*f(unew)), where norm() c denotes the euclidean norm. (currently, not used, but c available if the user supplies his own stopping routine.) c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c stptol = user-supplied tolerance on the minimum step length c s = unew-u. c c wk = real array of length n used for work space by nkstop. c c ftol = user-supplied tolerance on norm(sf*f(unew)). c c iret = output flag from either the dogdrv or lnsrch module. c c iter = current nonlinear iteration count. c c itermx = maximum allowable number of nonlinear iterations. c c mxtkn = logical flag returned from either lnsrch or dogdrv c module indicating if the last step taken was of maximum c length stepmx. c c ncscmx = number of consecutive steps on which the stepsize c was of maximum length stepmx. c c on return c c ncscmx = number of consecutive steps on which the maximum c allowable stepsize length stepmx was taken. if c mxtkn=.true., then ncscmx has been incremented by c one, and ncscmx is reset to zero if mxtkn=.false. c c iterm = output flag. c iterm=0 means no termination criteria satisfied, c and the iteration continues. c iterm=1 means maxnorm(sf*f(unew)) .le. ftol, where c maxnorm() is the maximum norm function. unew c is probably an approximate root of f. c iterm=2 means the scaled distance between the last two c steps is less than stptol. unew may be an c approximate root of f, but it is also possible c that the algorithm is making very slow progress c and is not near a root, or that stptol is too c large. c iterm=3 means iret=1 was returned from either the dogdrv c or lnsrch module, and that the last global step c failed to reduce norm(f) sufficiently. either u c is close to a root of f and no more accuracy is c possible, or the finite-difference approximation c to j*v is inaccurate, or stptol is too large. c iterm=4 means that the maximum allowable number of c nonlinear iterations has been exceeded. c iterm=5 means 5 consecutive steps of length stepmx (the c maximum stepsize limit) have been taken. either c norm(f) asymptotes from above to a finite value c in some direction, or stepmx is too small. c c----------------------------------------------------------------------- integer n integer nu integer iret integer iter integer itermx integer ncscmx integer iterm integer i double precision u double precision unew double precision savf double precision fnorm double precision su double precision sf double precision stptol double precision ftol double precision fmax double precision rlngth double precision wk double precision thsnd dimension u(nu) dimension unew(n) dimension savf(n) dimension su(n) dimension sf(n) dimension wk(n) logical mxtkn c----------------------------------------------------------------------- c nks003 common block. c----------------------------------------------------------------------- double precision pthrsh common /nks003/ pthrsh save data thsnd/1000.0d0/ c iterm = 0 c check for error return from dogdrv or lnsrch. if (iret .eq. 1) then iterm = 3 go to 999 endif c check tolerance on norm(sf*f(unew)). fmax = fnorm call snrmf(n, savf, sf, fmax) if (fmax .le. ftol) then iterm = 1 go to 999 endif c check for scaled distance between last two steps too small. do 200 i = 1,n wk(i) = unew(i) - u(i) 200 continue call slngth(n, nu, u, wk, su, rlngth) if (rlngth .le. stptol) then iterm = 2 go to 999 endif c check for maximum number of iterates exceeded. if (iter .ge. itermx) then iterm = 4 go to 999 endif c check for consecutive number of steps taken of size stepmx. c if mstkn=.false., set ncscmx = 0. if (mxtkn) then ncscmx = ncscmx + 1 else ncscmx = 0 endif if (ncscmx .eq. 5) iterm = 5 c load threshold for re-evaluating preconditioner. pthrsh = fmax/(thsnd*ftol) + rlngth c 999 continue return c----------------------- end of subroutine nkstop ---------------------- end c----------------------------------------------------------------------- subroutine slngth(n, nu, u, s, su, rlngth) c----------------------------------------------------------------------- c this routine calculates the scaled steplength of the current step s. c c on entry c c n = size of the nonlinear system, and the length of arrays c u, unew, savf, su and sf. c c nu = length of array u. c c u = real array containing the previous approximate root. c c s = real array containing the current step from u to unew. c c su = real array containing scaling factors for u. c c on return c c rlngth = real scalar containing the scaled steplength. c----------------------------------------------------------------------- integer i integer n integer nu double precision u double precision s double precision su double precision one double precision rlngth double precision temp double precision zero dimension u(nu) dimension s(n) dimension su(n) data one/1.0d0/, zero/0.0d0/ c rlngth = zero do 10 i = 1,n temp = one/su(i) temp = dmax1(dabs(u(i)),temp) rlngth = dmax1(dabs(s(i))/temp,rlngth) 10 continue return c----------------------- end of subroutine slngth ---------------------- end c----------------------------------------------------------------------- subroutine snrmf(n, savf, sf, fnorm) c----------------------------------------------------------------------- c this routine calculates the scaled norm of f(u) for the current c iterate u. c c on entry c c n = size of the nonlinear system, and the length of arrays c savf and sf. c c savf = real array containing the n-vector f(u). c c sf = real array containing scaling factors for f(u). c c fnorm = real scalar containing norm(sf*f(unew)), where norm() c denotes the euclidean norm. (currently, not used, but c available if the user supplies his own version of this c routine.) c c on return c c fnorm = real scalar containing the scaled norm of f(u). c----------------------------------------------------------------------- integer n integer i double precision savf double precision sf double precision zero double precision temp double precision fnorm double precision fmax dimension savf(n) dimension sf(n) save data zero/0.0d0/ c fmax = zero do 10 i = 1,n temp = sf(i)*dabs(savf(i)) fmax = dmax1(temp,fmax) 10 continue fnorm = fmax return c----------------------- end of subroutine snrmf ----------------------- end c----------------------------------------------------------------------- subroutine nkstp0(n, savf, sf, ftol, fnorm, iterm) c----------------------------------------------------------------------- c this routine decides whether to terminate nksol at iteration zero c because u0 is an approximate root of f(u). c c on entry c c n = size of the nonlinear system, and the length of arrays c savf and sf. c c savf = real array containing the n-vector f(u0). c c ftol = real scalar containing the user-s stopping tolerance. c c fnorm = real scalar containing norm(sf*f(u0)), where norm() c denotes the euclidean norm. (currently, not used, but c available if the user supplies his own stopping routine.) c c on return c c iterm = output flag. c iterm=0 means u0 is not an approximate root of f. c iterm=1 means u0 is an approximate root of f. c c----------------------------------------------------------------------- integer n integer iterm double precision savf double precision sf double precision ftol double precision fmax double precision p01 double precision fnorm dimension savf(n) dimension sf(n) save data p01/1.d-2/ c fmax = fnorm call snrmf(n, savf, sf, fmax) iterm = 0 if (fmax .le. p01*ftol) iterm = 1 c return c----------------------- end of subroutine nkstp0 ---------------------- end c----------------------------------------------------------------------- double precision function d1mach (idum) integer idum c----------------------------------------------------------------------- c this routine computes the unit roundoff of the machine in double c precision. this is defined as the smallest positive machine number c u such that 1.0d0 + u .ne. 1.0d0 (in double precision). c----------------------------------------------------------------------- double precision u double precision comp u = 1.0d0 10 u = u*0.5d0 comp = 1.0d0 + u if (comp .ne. 1.0d0) go to 10 d1mach = u*2.0d0 return c----------------------- end of function d1mach ------------------------ end double precision function vnorm (n, v, s) c----------------------------------------------------------------------- c this function routine computes the scaled euclidean norm c of the vector of length n contained in the array v, with scale factors c contained in the array s of length n.. c vnorm = dsqrt( sum( v(i)*s(i) )**2 ) c----------------------------------------------------------------------- integer n integer i double precision v double precision s double precision sum dimension v(n) dimension s(n) sum = 0.0d0 do 10 i = 1,n 10 sum = sum + (v(i)*s(i))**2 vnorm = dsqrt(sum) return c----------------------- end of function vnorm ------------------------- end c----------------------------------------------------------------------- subroutine model(n, nu, wm, lenwm, iwm, leniwm, u, savf, x, f, * jac, su, sf, pset, psol) c----------------------------------------------------------------------- c this routine interfaces to subroutine solpk for the approximate c solution of the newton equations in the newton iteration. c c on entry c c n = size of the nonlinear system, and the length of arrays c unew, savf, su and sf. c c nu = length of array u. c c u = real array containing the previous approximate root. c c savf = real array containing the n-vector f(u). c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c wm = real work space containing data for the krylov algorithm c (krylov basis vectors, hessenberg matrix, user-supplied c work space for preconditioning, etc.). c c lenwm = length of work array wm. c c iwm = integer work space for the krylov algorithm (pivot c information for the lu-factorization of the hessenberg c matrix, user-supplied work space for preconditioning, c etc.). c c leniwm = length of work array iwm. c c on return c c x = the approximate solution vector of the newton equations. c c iersl = output flag (in common).. c iersl = 0 means no trouble occured. c iersl = 7 means the krylov solver suffered a breakdown, and c so the solution x is undefined. c iersl = 8 means there was a nonrecoverable error in pset c and the nonlinear iteration is halted. c iersl = 9 means there was a nonrecoverable error in psol c and the nonlinear iteration is halted. c c----------------------------------------------------------------------- external f external jac external pset external psol integer nu integer i integer n integer lenwm integer iwm integer leniwm integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf integer ipcur integer ier double precision wm double precision x double precision su double precision sf double precision savf double precision u double precision onept5 dimension wm(lenwm) dimension iwm(leniwm) dimension x(n) dimension u(nu) dimension savf(n) dimension su(n) dimension sf(n) c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- double precision eps, sqteta, rhom common /nks001/ eps, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf c----------------------------------------------------------------------- c nks003 common block. c----------------------------------------------------------------------- double precision pthrsh common /nks003/ pthrsh c save data onept5/1.5d0/ c----------------------------------------------------------------------- c call user-supplied routine pset to load preconditioner matrix c data if threshold achieved. set ipcur=1 if p is re-evaluated. c----------------------------------------------------------------------- 10 continue ipcur = 0 if ( (pthrsh .gt. onept5) .and. (ipflg .ne. 0) ) then ier = 0 call pset (n, nu, u, savf, su, sf, x, f, wm(locwmp), * iwm(locimp), ier) npe = npe + 1 ipcur = 1 if (ier .ne. 0) then iersl = 8 return endif endif c----------------------------------------------------------------------- c load x with -f(u). c----------------------------------------------------------------------- do 100 i = 1,n 100 x(i) = -savf(i) c----------------------------------------------------------------------- c call solpk to solve j*x = -f using the appropriate krylov c algorithm. c----------------------------------------------------------------------- call solpk (n,nu,wm,lenwm,iwm,leniwm,u,savf,x,su,sf,f,jac,psol) if (iersl .lt. 0) then c nonrecoverable error from psol. set iersl and return. iersl = 9 return endif if ( (iersl .gt. 0) .and. (ipflg .ne. 0) ) then if (ipcur .eq. 0) go to 10 endif c iersl=1 on return from solpk means there was a breakdown in the c krylov iteration. set iersl=7 to halt iteration and return. if (iersl .eq. 1) iersl = 7 c return c----------------------- end of subroutine model ----------------------- end c----------------------------------------------------------------------- subroutine solpk (n, nu, wm, lenwm, iwm, leniwm, u, savf, x, * su, sf, f, jac, psol) external f external jac external psol integer n integer nu integer lenwm integer iwm integer leniwm double precision wm double precision x double precision su double precision sf double precision savf double precision u dimension u(nu) dimension savf(n) dimension su(n) dimension sf(n) dimension wm(lenwm) dimension iwm(leniwm) dimension x(n) c----------------------------------------------------------------------- c this routine interfaces to either subroutine spiom or spigmr for c the solution of the linear system j*x = -f arising from a newton c iteration. c c on entry c c n = size of the nonlinear system, and the length of arrays c u, unew, savf, su and sf. c c nu = length of array u past n. c c u = real array containing the previous approximate root. c c savf = real array containing the n-vector f(u). c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c wm = real work space containing data for the krylov algorithm c (krylov basis vectors, hessenberg matrix, user-supplied c work space for preconditioning, etc.). c c lenwm = length of work array wm. c c iwm = integer work space for the krylov algorithm (pivot c information for the lu-factorization of the hessenberg c matrix, user-supplied work space for preconditioning, c etc.). c c leniwm = length of work array iwm. c c on return c c x = the approximate solution vector of the newton equations. c c iersl = output flag (in common).. c iersl = 0 means no trouble occured. c iersl = 1 means the krylov solver suffered a breakdown, and c so the solution x is undefined. c iersl =-1 means there was a nonrecoverable error in psol. c this forces the iteration to halt. c c this routine also uses the common variables c eps, locwmp, locimp, kmp, mmax, methk, nni, nli, npe, nps, ncfn, ncfl c----------------------------------------------------------------------- c methods used so far: c 1. methk = 1 ------> spiom c 2. methk = 2 ------> gmres c----------------------------------------------------------------------- integer i integer ib integer iflag integer ihes integer iwk integer npsl integer iv integer mmaxp1 integer miom integer ihsv integer iq integer mgmr double precision bnrm c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf double precision eps, sqteta, rhom common /nks001/ eps, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf save c iersl = 0 go to (100,200), methk c----------------------------------------------------------------------- c use the spiom algorithm to solve the linear system j*x = -f. c----------------------------------------------------------------------- 100 continue iv = 3 ib = iv + n*mmax ihes = ib + n iwk = ihes + mmax*mmax do 110 i = 1,n 110 wm(i+ib-1) = x(i) call spiom (n, nu, u, savf, wm(ib), su, sf, mmax, kmp, eps, * f, jac, psol, npsl, x, wm(iv), wm(ihes), iwm, miom, * wm(locwmp), iwm(locimp), wm(iwk), ipflg, iflag, rhom) nni = nni + 1 nli = nli + miom nps = nps + npsl if (iflag .ne. 0) ncfl = ncfl + 1 if (iflag .ge. 2) iersl = 1 if (iflag .lt. 0) iersl = -1 return c----------------------------------------------------------------------- c use the spigmr algorithm to solve the linear system j*x = -f. c----------------------------------------------------------------------- 200 continue mmaxp1 = mmax + 1 iv = 3 ib = iv + n*mmax ihes = ib + n + 1 ihsv = ihes + mmax*(mmaxp1+1) + 1 iwk = ihsv + mmax*mmaxp1 iq = iwk + n do 210 i = 1,n 210 wm(i+ib-1) = x(i) call spigmr (n, nu, u, savf, wm(ib), su, sf, mmax, mmaxp1, kmp, * eps, f, jac, psol, npsl, x, wm(iv), wm(ihes), wm(iq), * wm(ihsv), mgmr, wm(locwmp), iwm(locimp), wm(iwk), methn, * bnrm, ipflg, iflag, rhom) nni = nni + 1 nli = nli + mgmr nps = nps + npsl if (iflag .ne. 0) ncfl = ncfl + 1 if (iflag .ge. 2) iersl = 1 if (iflag .lt. 0) iersl = -1 if (iersl .eq. 0) then call dcopy (mmax, wm(ib), 1, wm(ihes), 1) wm(1) = bnrm iwm(1) = mgmr endif return c----------------------- end of subroutine solpk ----------------------- end c----------------------------------------------------------------------- subroutine spiom (n, nu, u, savf, b, su, sf, mmax, iomp, eps, * f, jac, psol, npsl, x, v, hes, ipvt, miom, wmp, * iwmp, wk, ipflg, iflag, rho) external f external jac external psol integer n integer nu integer mmax integer iomp integer ipvt integer miom integer iflag integer iprint integer iunit integer iermsg integer npsl integer iwmp integer ipflg double precision u double precision savf double precision b double precision su double precision sf double precision x double precision eps double precision v double precision hes double precision wmp double precision wk dimension u(nu) dimension savf(n) dimension b(n) dimension su(n) dimension sf(n) dimension x(n) dimension v(n,mmax+1) dimension hes(mmax,mmax) dimension wmp(1) dimension wk(n) dimension iwmp(1) dimension ipvt(mmax) c----------------------------------------------------------------------- c this routine solves the linear system a * x = b using a scaled c preconditioned version of the incomplete orthogonalization method. c an initial guess of x = 0 is assumed. c----------------------------------------------------------------------- c c on entry c c n = problem size, passed to f, jac and psol. also, the order c of the matrix a, and the lengths of the vectors u, savf, c b, su, sf, and x. c c nu = length of array u. c c u = array containing current approximate solution to f(u) = 0. c c savf = array containing current value of f(u). c c b = the right hand side of the system a*x = b. c b is also used as work space when computing the c final approximation. c c su = array of length n containing scale factors for u. c c sf = array of length n containing scale factors for f. c c mmax = the maximum allowable order of the matrix hes. c c iomp = the number of previous vectors the new vector vnew c must be made orthogonal to. iomp .le. mmax. c c eps = tolerance on residuals b-a*x in scaled euclidean norm. c c wmp = real work array used by preconditioner psol. c c iwmp = integer work array used by preconditioner psol. c c ipflg = integer flag c ipflg=0 means no preconditioning is done. c ipflg.ne.0 means preconditioning is being performed. c c on return c c x = the final computed approximation to the solution c of the system a*x = b. c c rho = the scaled euclidean norm of the residual associated c with x. c c v = the n x (miom+1) array containing the miom c orthogonal vectors v(*,1) to v(*,miom). c c hes = the lu factorization of the miom x miom upper c hessenberg matrix whose entries are the c scaled inner products of a*v(*,k) and v(*,i). c c ipvt = an integer array containg pivoting information. it c is loaded in dhefa and used in dhesl. c c miom = the number of iterations performed, and current c order of the upper hessenberg matrix hes. c c npsl = the number of calls to psol. c c iflag = integer output flag.. c iflag=0 means convergence in miom iterations, miom.le.mmax. c iflag=1 means the convergence test did not pass in mmax c iterations. c iflag=2 means there was a recoverable error in psol c caused by the preconditioner being out of date. c iflag=-1 means there was a nonrecoverable error in psol. c c note: the x array is used as work space in routines psol and atv. c----------------------------------------------------------------------- integer i integer info integer j integer k integer m integer mm1 integer ier double precision bnrm double precision prod double precision rho double precision snormw double precision dnrm2 double precision tem c----------------------------------------------------------------------- c nks002 common block. c----------------------------------------------------------------------- common /nks002/ iprint, iunit, iermsg save c iflag = 0 miom = 0 npsl = 0 c zero out the hes array. ---------------------------------------------- do 15 j = 1,mmax do 10 i = 1,mmax 10 hes(i,j) = 0.0d0 15 continue c----------------------------------------------------------------------- c the initial residual is the vector b. apply scaling to b, and c then normalize as v(*,1). c----------------------------------------------------------------------- do 20 i = 1,n 20 v(i,1) = b(i)*sf(i) bnrm = dnrm2(n, v, 1) tem = 1.0d0/bnrm call dscal (n, tem, v(1,1), 1) c----------------------------------------------------------------------- c main loop to compute the vectors v(*,2) to v(*,mmax). c the running product prod is needed for the convergence test. c----------------------------------------------------------------------- prod = 1.0d0 if (iprint .gt. 1) write(iunit,30) 30 format(' ------ in routine spiom ------ ') do 90 m = 1,mmax miom = m c----------------------------------------------------------------------- c call routine atv to compute vnew = a*v(m). c call routine orthog to orthogonalize the new vector vnew = v(*,m+1). c call routine dhefa to update the factors of hes. c----------------------------------------------------------------------- call atv (n, nu, u, savf, v(1,m), su, sf, x, f, jac, psol, * v(1,m+1), wk, wmp, iwmp, ier, npsl) if (ier .ne. 0) go to 300 call orthog (v(1,m+1),v,hes,n,m,mmax,mmax,iomp,snormw) call dhefa (hes, mmax, m, ipvt, info, m) mm1 = m - 1 if (m .gt. 1 .and. ipvt(mm1) .eq. mm1) prod = prod*hes(m,mm1) if (info .ne. m) go to 50 c----------------------------------------------------------------------- c the last pivot in hes was found to be zero. c if vnew = 0 or l = mmax, take an error return with iflag = 2. c otherwise, continue the iteration without a convergence test. c----------------------------------------------------------------------- if (snormw .eq. 0.0d0) go to 120 if (m .eq. mmax) go to 120 go to 60 c----------------------------------------------------------------------- c update rho, the estimate of the norm of the residual b - a*x(m). c test for convergence. if passed, compute approximation x(m). c if failed and m .lt. mmax, then continue iterating. c----------------------------------------------------------------------- 50 continue rho = bnrm*snormw*dabs(prod/hes(m,m)) if (iprint .gt. 1) write(iunit,55) m, rho, eps 55 format(' m , res, eps ',i4,e25.16,1x,e25.16) if (rho .le. eps) go to 200 if (m .eq. mmax) go to 150 c if l .lt. mmax, store hes(m+1,m) and normalize the vector v(*,m+1). 60 continue hes(m+1,m) = snormw tem = 1.0d0/snormw call dscal (n, tem, v(1,m+1), 1) 90 continue c----------------------------------------------------------------------- c l has reached mmax without passing the convergence test.. c compute a solution anyway and return with iflag = 1. c otherwise, return with iflag = 2. c----------------------------------------------------------------------- 120 continue iflag = 2 return 150 iflag = 1 c----------------------------------------------------------------------- c compute the approximation x(m) to the solution. c since the vector x was used as work space in routine orthog, c and since initial guess of the newton correction is zero, x must be c set back to zero. c----------------------------------------------------------------------- 200 continue m = miom do 210 k = 1,m 210 b(k) = 0.0d0 b(1) = bnrm call dhesl(hes,mmax,m,ipvt,b) do 220 k = 1,n 220 x(k) = 0.0d0 do 230 i = 1,m call daxpy (n, b(i), v(1,i), 1, x, 1) 230 continue do 240 i = 1,n 240 x(i) = x(i)/su(i) if (ipflg .eq. 1) then ier = 0 call psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier) npsl = npsl + 1 if (ier .ne. 0) go to 300 endif return c----------------------------------------------------------------------- c this block handles error returns forced by routine psol. c----------------------------------------------------------------------- 300 continue if (ier .lt. 0) iflag = -1 if (ier .gt. 0) iflag = 3 return c----------------------- end of subroutine spiom ----------------------- end c----------------------- end of subroutine spiom ----------------------- subroutine atv (n, nu, u, savf, v, su, sf, ftem, f, jac, psol, * z, vtem, wmp, iwmp, ier, npsl) external f external jac external psol integer n integer nu integer iwmp integer ier integer npsl integer i double precision u double precision savf double precision v double precision su double precision sf double precision ftem double precision vtem double precision z double precision wmp dimension u(nu) dimension savf(n) dimension v(n) dimension su(n) dimension sf(n) dimension ftem(n) dimension vtem(n) dimension z(n) dimension iwmp(1) dimension wmp(1) c----------------------------------------------------------------------- c this routine computes the product c c sf*j*(p-inverse)*(su-inverse*v), c c where su and sf are diagonal scaling matrices, j is the jacobian c matrix, and p is a preconditioning matrix. the product is stored c in z. this is computed either by a call to jac and psol, or by a c difference quotient, which involves calls to f and psol. c----------------------------------------------------------------------- c c on entry c c n = problem size, passed to f, jac and psol. c c nu = length of array u. c c u = array containing current approximate to a solution of c f(u) = 0. c c savf = array containing current value of f(u). c c v = real array of length n. c c ftem = work array of length n. c c vtem = work array of length n used to store the unscaled c version of v. c c su = array of length n containing scale factors for u. c c sf = array of length n containing scale factors for f. c c wmp = real work array used by preconditioner psol. c c iwmp = integer work array used by preconditioner psol. c c on return c c z = array of length n containing desired scaled c matrix-vector product. c c in addition, this routine uses the common variables c mfdif, nfe, and nje. c----------------------------------------------------------------------- c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- double precision delt double precision sqteta double precision rhom integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf c double precision sigma,tmp,utv,vtv,suitv,ddot,zero save data zero/0.0d0/ c if (mfdif .eq. 2) go to 200 c----------------------------------------------------------------------- c mfdif = 1. call user-supplied routine for computing j*v. c----------------------------------------------------------------------- do 110 i = 1,n 110 vtem(i) = v(i)/su(i) if (ipflg .eq. 1) then ier = 0 call psol (n, nu, u, savf, f, jac, z, wmp, iwmp, vtem, ier) npsl = npsl + 1 if (ier .ne. 0) return endif call jac (n, nu, u, savf, vtem, z) nje = nje + 1 do 120 i = 1,n 120 z(i) = z(i)*sf(i) return c----------------------------------------------------------------------- c mfdif = 2. internally generated vector j*v. c----------------------------------------------------------------------- 200 continue c set vtem = (su-inverse) * v. do 210 i = 1,n vtem(i) = v(i)/su(i) 210 continue if (ipflg .eq. 0) then c ipflg = 0. save u in z and increment u by sigma*vtem. do 215 i = 1,n 215 z(i) = u(i)*su(i) utv = ddot(n, z, 1, v, 1) suitv = zero do 220 i = 1,n 220 suitv = suitv + dabs(v(i)) c vtv = 1 in this case. sigma = sqteta*dmax1(dabs(utv),suitv) sigma = dsign(sigma,utv) do 230 i = 1,n 230 z(i) = u(i) do 240 i = 1,n 240 u(i) = z(i) + sigma*vtem(i) else c ipflg = 1. apply inverse of right preconditioner ier = 0 call psol (n, nu, u, savf, f, jac, ftem, wmp, iwmp, vtem, ier) npsl = npsl + 1 if (ier .ne. 0) return vtv = zero suitv = zero do 260 i = 1,n tmp = vtem(i)*su(i) z(i) = tmp*su(i) vtv = vtv + tmp*tmp 260 suitv = suitv + dabs(tmp) utv = ddot(n, u, 1, z, 1) sigma = sqteta*dmax1(dabs(utv),suitv)/vtv sigma = dsign(sigma,utv) do 270 i = 1,n 270 z(i) = u(i) do 280 i = 1,n 280 u(i) = z(i) + sigma*vtem(i) endif call f(n, nu, u, ftem) nfe = nfe + 1 do 290 i = 1,n 290 u(i) = z(i) do 300 i = 1,n 300 z(i) = (ftem(i) - savf(i))/sigma do 310 i = 1,n 310 z(i) = z(i)*sf(i) return c----------------------- end of subroutine atv ------------------------- end c----------------------------------------------------------------------- subroutine orthog (vnew, v, hes, n, m, mmax, ldhes, iomp, snormw) integer n integer m integer mmax integer iomp integer ldhes double precision vnew double precision v double precision hes double precision snormw dimension vnew(n) dimension v(n,mmax) dimension hes(ldhes,mmax) c----------------------------------------------------------------------- c this routine orthogonalizes the vector vnew against the previous c iomp vectors in the v array. it uses a modified gram-schmidt c orthogonalization procedure with conditional reorthogonalization. c this is the version of 25 september 1985. c----------------------------------------------------------------------- c c npsl = the number of calls to psol. c on entry c c vnew = the vector of length n containing a scaled product c of the jacobian and the vector v(*,m). c c v = the n x m array containing the previous m c orthogonal vectors v(*,1) to v(*,m). c c hes = an m x m upper hessenberg matrix containing, c in hes(i,k), k.lt.m, scaled inner products of c a*v(*,k) and v(*,i). c c ldhes = the leading dimension of the hes array. c c n = the order of the matrix a, and the length of vnew. c c m = the current order of the matrix hes. c c mmax = the maximum allowable order of the matrix hes. c c iomp = the number of previous vectors the new vector vnew c must be made orthogonal to (iomp .le. mmax). c c on return c c vnew = the new vector orthogonal to v(*,1) to v(*,m). c c hes = upper hessenberg matrix with column m filled in with c scaled inner products of a*v(*,m) and v(*,i). c c snormw = l-2 norm of vnew. c c----------------------------------------------------------------------- integer i, i0 double precision arg, ddot, dnrm2, sumdsq, tem, vnrm save c get norm of unaltered vnew for later use. ---------------------------- vnrm = dnrm2 (n, vnew, 1) c----------------------------------------------------------------------- c do modified gram-schmidt on vnew = a*v(m). c scaled inner products give new column of hes. c projections of earlier vectors are subtracted from vnew. c----------------------------------------------------------------------- i0 = max0(1,m-iomp+1) do 10 i = i0,m hes(i,m) = ddot(n, v(1,i), 1, vnew, 1) tem = -hes(i,m) call daxpy (n, tem, v(1,i), 1, vnew, 1) 10 continue c----------------------------------------------------------------------- c compute snormw = norm of vnew. c if vnew is small compared to its input value (in norm), then c reorthogonalize vnew to v(*,1) through v(*,m). c correct if relative correction exceeds 1000*(unit roundoff). c finally, correct snormw using the dot products involved. c----------------------------------------------------------------------- snormw = dnrm2 (n, vnew, 1) if (vnrm + 0.001d0*snormw .ne. vnrm) return sumdsq = 0.0d0 do 30 i = i0,m tem = -ddot(n, v(1,i), 1, vnew, 1) if (hes(i,m) + 0.001d0*tem .eq. hes(i,m)) go to 30 hes(i,m) = hes(i,m) - tem call daxpy (n, tem, v(1,i), 1, vnew, 1) sumdsq = sumdsq + tem**2 30 continue if (sumdsq .eq. 0.0d0) return arg = dmax1(0.0d0,snormw**2 - sumdsq) snormw = dsqrt(arg) c return c----------------------- end of subroutine orthog ---------------------- end subroutine dhefa (a, lda, n, ipvt, info, job) integer lda, n, ipvt(n), info, job double precision a(lda,n) c----------------------------------------------------------------------- c this routine is a modification of the linpack routine sgefa and c performs an lu decomposition of an upper hessenberg matrix a. c there are two options available.. c c (1) performing a fresh factorization c (2) updating the lu factors by adding a row and a c column to the matrix a. c----------------------------------------------------------------------- c dhefa factors an upper hessenberg matrix by elimination. c c on entry c c a real(lda, n) c the matrix to be factored. c c lda integer c the leading dimension of the array a . c c n integer c the order of the matrix a . c c job integer c job = 1 means that a fresh factorization of the c matrix a is desired. c job .ge. 2 means that the current factorization of a c will be updated by the addition of a row c and a column. c on return c c a an upper triangular matrix and the multipliers c which were used to obtain it. c the factorization can be written a = l*u where c l is a product of permutation and unit lower c triangular matrices and u is upper triangular. c c ipvt integer(n) c an integer vector of pivot indices. c c info integer c = 0 normal value. c = k if u(k,k) .eq. 0.0 . this is not an error c condition for this subroutine, but it does c indicate that dhesl will divide by zero if called. c c modification of linpack. this version dated 07/20/83 . c peter brown, university of houston, lawrence livermore lab. c c subroutines and functions c c blas daxpy,idamax c c----------------------------------------------------------------------- c internal variables c double precision t integer idamax, j, k, km1, kp1, l, nm1 save c if (job .gt. 1) go to 80 c----------------------------------------------------------------------- c a new facorization is desired. this is essentially the linpack c code with the exception that we know there is only one nonzero c element below the main diagonal. c----------------------------------------------------------------------- c c gaussian elimination with partial pivoting c info = 0 nm1 = n - 1 if (nm1 .lt. 1) go to 70 do 60 k = 1, nm1 kp1 = k + 1 c c find l = pivot index c l = idamax(2,a(k,k),1) + k - 1 ipvt(k) = l c c zero pivot implies this column already triangularized c if (a(l,k) .eq. 0.0d0) go to 40 c c interchange if necessary c if (l .eq. k) go to 10 t = a(l,k) a(l,k) = a(k,k) a(k,k) = t 10 continue c c compute multipliers c t = -1.0d0/a(k,k) a(k+1,k) = a(k+1,k)*t c c row elimination with column indexing c do 30 j = kp1, n t = a(l,j) if (l .eq. k) go to 20 a(l,j) = a(k,j) a(k,j) = t 20 continue call daxpy(n-k,t,a(k+1,k),1,a(k+1,j),1) 30 continue go to 50 40 continue info = k 50 continue 60 continue 70 continue ipvt(n) = n if (a(n,n) .eq. 0.0d0) info = n return c----------------------------------------------------------------------- c the old factorization of a will be updated. a row and a column c has been added to the matrix a. c n-1 is now the old order of the matrix. c----------------------------------------------------------------------- 80 continue nm1 = n - 1 c----------------------------------------------------------------------- c perform row interchanges on the elements of the new column, and c perform elimination operations on the elements using the multipliers. c----------------------------------------------------------------------- if (nm1 .le. 1) go to 105 do 100 k = 2,nm1 km1 = k - 1 l = ipvt(km1) t = a(l,n) if (l .eq. km1) go to 90 a(l,n) = a(km1,n) a(km1,n) = t 90 continue a(k,n) = a(k,n) + a(k,km1)*t 100 continue 105 continue c----------------------------------------------------------------------- c complete update of factorization by decomposing last 2x2 block. c----------------------------------------------------------------------- info = 0 c c find l = pivot index c l = idamax(2,a(nm1,nm1),1) + nm1 - 1 ipvt(nm1) = l c c zero pivot implies this column already triangularized c if (a(l,nm1) .eq. 0.0d0) go to 140 c c interchange if necessary c if (l .eq. nm1) go to 110 t = a(l,nm1) a(l,nm1) = a(nm1,nm1) a(nm1,nm1) = t 110 continue c c compute multipliers c t = -1.0d0/a(nm1,nm1) a(n,nm1) = a(n,nm1)*t c c row elimination with column indexing c t = a(l,n) if (l .eq. nm1) go to 120 a(l,n) = a(nm1,n) a(nm1,n) = t 120 continue a(n,n) = a(n,n) + t*a(n,nm1) go to 150 140 continue info = nm1 150 continue ipvt(n) = n if (a(n,n) .eq. 0.0d0) info = n return c----------------------- end of subroutine dhefa ----------------------- end subroutine dhesl(a,lda,n,ipvt,b) integer lda, n, ipvt(n) double precision a(lda,n), b(n) c----------------------------------------------------------------------- c this is essentially the linpack routine sgesl except for changes c due to the fact that a is an upper hessenberg matrix. c----------------------------------------------------------------------- c dhesl solves the real system a * x = b c using the factors computed by dhefa. c c on entry c c a real(lda, n) c the output from dhefa. c c lda integer c the leading dimension of the array a . c c n integer c the order of the matrix a . c c ipvt integer(n) c the pivot vector from dhefa. c c b real(n) c the right hand side vector. c c c on return c c b the solution vector x . c c c modification of linpack. this version dated 07/20/83 . c peter brown, university of houston, lawrence livermore lab. c c subroutines and functions c c blas daxpy c----------------------------------------------------------------------- c internal variables c double precision t integer k,kb,l,nm1 save c nm1 = n - 1 c c solve a * x = b c first solve l*y = b c if (nm1 .lt. 1) go to 30 do 20 k = 1, nm1 l = ipvt(k) t = b(l) if (l .eq. k) go to 10 b(l) = b(k) b(k) = t 10 continue b(k+1) = b(k+1) + t*a(k+1,k) 20 continue 30 continue c c now solve u*x = y c do 40 kb = 1, n k = n + 1 - kb b(k) = b(k)/a(k,k) t = -b(k) call daxpy(k-1,t,a(1,k),1,b(1),1) 40 continue return c----------------------- end of subroutine dhesl ----------------------- end subroutine spigmr (n, nu, u, savf, b, su, sf, mmax, mmaxp1, igmp, * eps, f, jac, psol, npsl, x, v, hes, q, hessv, * mgmr, wmp, iwmp, wk, methn, bnrm, ipflg, iflag, * rho) external f external jac external psol c----------------------------------------------------------------------- c this routine solves the linear system a * x = b using a scaled c preconditioned version of the generalized minimum residual method. c----------------------------------------------------------------------- c c on entry c c n = problem size, passed to f, jac and psol. also, the order c of the matrix a, and the lengths of the vectors savf, c b, su, sf, and x. c c nu = length of array u. c c u = array containing current approximate solution to c f(u) = 0. c c savf = array containing current value of f(u). c c b = the right hand side of the system a*x = b. c b is also used as work space when computing the c final approximation. c c su = array of length n containing scale factors for u. c c sf = array of length n containing scale factors for f. c c x = the initial guess of the solution of the system c a*x = b. c c eps = tolerance on residuals b-a*x in scaled euclidean norm. c c mmax = the maximum allowable order of the matrix h. c c igmp = the number of previous vectors the new vector vnew c must be made orthogonal to. igmp .le. mmax. c c wmp = real work array used by preconditioner psol. c c iwmp = integer work array used by preconditioner psol. c c methn = integer flag indicating which global strategy is c being used (=1 dogleg, =2 linesearch backtrack). c when methn=1, the x array is computed in dogstp, and c only the ygm value is returned in b. otherwise, x c contains the gmres solution. c c ipflg = integer flag c ipflg=0 means no preconditioning is done. c ipflg.ne.0 means preconditioning is being performed. c c on return c c x = the final computed approximation to the solution c of the system a*x = b (when methn = 2). c c rho = the scaled euclidean norm of the residual associated c with x. c c b = the ygm value (when methn = 1). c c bnrm = the scaled euclidean norm of the right hand c side in a*x = b. c c mgmr = the number of iterations performed and c the current order of the upper hessenberg c matrix hes. c c npsl = the number of calls to psol. c c v = the n x (mgmr+1) array containing the mgmr c orthogonal vectors v(*,1) to v(*,mgmr). c c hes = the upper triangular factor of the qr decomposition c of the (mgmr+1) x mgmr upper hessenberg matrix whose c entries are the scaled inner-products of a*v(*,i) c and v(*,k). c c hessv = the (mgmr+1) x mgmr upper hessenberg matrix whose c entries are the scaled inner-products of a*v(*,i) c and v(*,k). c c q = a real array containing the components of the givens c rotations used in the qr decomposition of hes. it c is loaded in dheqr and used in dhels. c c iflag = integer output flag.. c iflag=0 means convergence in mgmr iterations, mgmr.le.mmax. c iflag=1 means the convergence test did not pass in mmax c iterations. c iflag=2 means there was a recoverable error in psol c caused by the preconditioner being out of date. c iflag=-1 means there was a nonrecoverable error in psol. c c note: the x array is used as work space in routines psol and atv. c----------------------------------------------------------------------- double precision v double precision hes double precision hessv double precision su double precision sf double precision q double precision b double precision savf double precision x double precision u double precision wk double precision wmp double precision dnrm2 integer n integer nu integer iwmp integer mmax integer mmaxp1 integer iprint integer iunit integer iermsg integer ier integer npsl integer methn integer ipflg integer j integer i integer iflag integer info integer igmp integer k integer mgmr integer m integer mp1 dimension v(n,mmaxp1) dimension hes(mmaxp1,mmax) dimension hessv(mmaxp1,mmax) dimension su(n) dimension sf(n) dimension q(2*mmax) dimension b(n) dimension savf(n) dimension x(n) dimension u(nu) dimension wmp(1) dimension wk(n) dimension iwmp(1) double precision bnrm double precision eps double precision prod double precision rho double precision snormw double precision tem c----------------------------------------------------------------------- c nks002 common block. c----------------------------------------------------------------------- common /nks002/ iprint, iunit, iermsg save c iflag = 0 mgmr = 0 npsl = 0 c zero out the hes and hessv arrays ------------------------------------ do 15 j = 1,mmax do 10 i = 1,mmaxp1 hes(i,j) = 0.0d0 hessv(i,j) = 0.0d0 10 continue 15 continue c----------------------------------------------------------------------- c the initial residual is the vector b. apply scaling to b, and c then normalize as v(*,1). c----------------------------------------------------------------------- do 20 i = 1,n 20 v(i,1) = b(i)*sf(i) bnrm = dnrm2(n, v, 1) tem = 1.0d0/bnrm call dscal (n, tem, v(1,1), 1) c----------------------------------------------------------------------- c main loop to compute the vectors v(*,2) to v(*,mmax). c the running product prod is needed for the convergence test. c----------------------------------------------------------------------- prod = 1.0d0 if (iprint .gt. 1) write(iunit,30) 30 format(' ------ in routine spigmr ------') do 100 m = 1,mmax mgmr = m c----------------------------------------------------------------------- c call routine atv to compute vnew = a*v(m). c call routine orthog to orthogonalize the new vector vnew = v(*,m+1). c call routine dheqr to update the factors of hes. c----------------------------------------------------------------------- call atv (n, nu, u, savf, v(1,m), su, sf, x, f, jac, psol, * v(1,m+1), wk, wmp, iwmp, ier, npsl) if (ier .ne. 0) go to 300 call orthog (v(1,m+1),v,hes,n,m,mmax,mmaxp1,igmp,snormw) hes(m+1,m) = snormw hessv(m+1,m) = snormw do 60 i = 1,mgmr hessv(i,m) = hes(i,m) 60 continue call dheqr(hes,mmaxp1,m,q,info,m) if (info .eq. m) go to 105 c----------------------------------------------------------------------- c update rho, the estimate of the norm of the residual b - a*xl. c----------------------------------------------------------------------- prod = prod*q(2*m) rho = dabs(prod*bnrm) c----------------------------------------------------------------------- c test for convergence. if passed, compute approximation xl. c if failed and m .lt. mmax continue iterating. c----------------------------------------------------------------------- if (iprint .gt. 1) write(iunit,65) m, rho, eps 65 format(' m , res, eps ',i4,e25.16,1x,e25.16) if (rho .le. eps) go to 200 if (m .eq. mmax) go to 110 c----------------------------------------------------------------------- c rescale so that the norm of v(1,m+1) is one. c----------------------------------------------------------------------- tem = 1.0d0/snormw call dscal (n, tem, v(1,m+1), 1) 100 continue 105 continue iflag = 2 return 110 iflag = 1 c----------------------------------------------------------------------- c compute the approximation xl to the solution. c since the vector x was used as work space in routine orthog and since c the initial guess of the newton correction is zero, x must be set c back to zero. c----------------------------------------------------------------------- 200 continue m = mgmr mp1 = m + 1 do 210 k = 1,mp1 210 b(k) = 0.0d0 b(1) = bnrm call dhels(hes,mmaxp1,m,q,b) if (methn .eq. 2) then do 220 k = 1,n 220 x(k) = 0.0d0 do 230 i = 1,m call daxpy(n,b(i),v(1,i),1,x,1) 230 continue do 240 i = 1,n 240 x(i) = x(i)/su(i) if (ipflg .eq. 1) then ier = 0 call psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier) npsl = npsl + 1 if (ier .ne. 0) go to 300 endif endif return c----------------------------------------------------------------------- c this block handles error returns forced by routine psol. c----------------------------------------------------------------------- 300 continue if (ier .lt. 0) iflag = -1 if (ier .gt. 0) iflag = 3 c return c----------------------- end of subroutine spigmr ---------------------- end c----------------------------------------------------------------------- subroutine dheqr(a, lda, n, q, info, ijob) c----------------------------------------------------------------------- c this routine performs a qr decomposition of an upper c hessenberg matrix a. there are two options available: c c (1) performing a fresh decomposition c (2) updating the qr factors by adding a row and a c column to the matrix a. c----------------------------------------------------------------------- c dheqr decomposes an upper hessenberg matrix by using givens c rotations. c c on entry c c a real(lda, n) c the matrix to be decomposed. c c lda integer c the leading dimension of the array a . c c n integer c a is an (n+1) by n hessenberg matrix. c c ijob integer c = 1 means that a fresh decomposition of the c matrix a is desired. c .ge. 2 means that the current decomposition of a c will be updated by the addition of a row c and a column. c on return c c a the upper triangular matrix r. c the factorization can be written q*a = r, where c q is a product of givens rotations and r is upper c triangular. c c q real(2*n) c the factors c and s of each givens rotation used c in decomposing a. c c info integer c = 0 normal value. c = k if a(k,k) .eq. 0.0 . this is not an error c condition for this subroutine, but it does c indicate that dhels will divide by zero c if called. c c this version dated 1/13/86. c peter brown, university of houston, lawrence livermore lab. c c subroutines and functions c c dabs, dsqrt c c----------------------------------------------------------------------- integer lda integer n integer info integer ijob double precision a(lda,n) double precision q(2*n) double precision c double precision s double precision t double precision t1 double precision t2 integer i integer j integer k integer kp1 integer iq integer nm1 integer km1 save c if (ijob .gt. 1) go to 70 c----------------------------------------------------------------------- c a new facorization is desired. c----------------------------------------------------------------------- c c qr decomposition without pivoting c info = 0 do 60 k = 1, n km1 = k - 1 kp1 = k + 1 c c compute kth column of r. c first, multiply the kth column of a by the previous c k-1 givens rotations. c if (km1 .lt. 1) go to 20 do 10 j = 1, km1 i = 2*(j-1) + 1 t1 = a(j,k) t2 = a(j+1,k) c = q(i) s = q(i+1) a(j,k) = c*t1 - s*t2 a(j+1,k) = s*t1 + c*t2 10 continue c c compute givens components c and s c 20 continue iq = 2*km1 + 1 t1 = a(k,k) t2 = a(kp1,k) if (t2 .ne. 0.0d0) go to 30 c = 1.0d0 s = 0.0d0 go to 50 30 continue if (dabs(t2) .lt. dabs(t1)) go to 40 t = t1/t2 s = -1.0d0/dsqrt(1.0d0+t*t) c = -s*t go to 50 40 continue t = t2/t1 c = 1.0d0/dsqrt(1.0d0+t*t) s = -c*t 50 continue q(iq) = c q(iq+1) = s a(k,k) = c*t1 - s*t2 if (a(k,k) .eq. 0.0d0) info = k 60 continue return c----------------------------------------------------------------------- c the old factorization of a will be updated. a row and a column c has been added to the matrix a. c n by n-1 is now the old size of the matrix. c----------------------------------------------------------------------- 70 continue nm1 = n - 1 c----------------------------------------------------------------------- c multiply the new column by the n previous givens rotations. c----------------------------------------------------------------------- do 100 k = 1,nm1 i = 2*(k-1) + 1 t1 = a(k,n) t2 = a(k+1,n) c = q(i) s = q(i+1) a(k,n) = c*t1 - s*t2 a(k+1,n) = s*t1 + c*t2 100 continue c----------------------------------------------------------------------- c complete update of decomposition by forming last givens rotation, c and multiplying it times the column vector (a(n,n),a(np1,n)). c----------------------------------------------------------------------- info = 0 t1 = a(n,n) t2 = a(n+1,n) if (t2 .ne. 0.0d0) go to 110 c = 1.0d0 s = 0.0d0 go to 130 110 continue if (dabs(t2) .lt. dabs(t1)) go to 120 t = t1/t2 s = -1.0d0/dsqrt(1.0d0+t*t) c = -s*t go to 130 120 continue t = t2/t1 c = 1.0d0/dsqrt(1.0d0+t*t) s = -c*t 130 continue iq = 2*n - 1 q(iq) = c q(iq+1) = s a(n,n) = c*t1 - s*t2 if (a(n,n) .eq. 0.0d0) info = n return c----------------------- end of subroutine dheqr ----------------------- end subroutine dhels(a, lda, n, q, b) integer lda integer n double precision a(lda,n) double precision b(n) double precision q(2*n) c----------------------------------------------------------------------- c this is part of the linpack routine sgesl with changes c due to the fact that a is an upper hessenberg matrix. c----------------------------------------------------------------------- c dhels solves the least squares problem c c min (b-a*x,b-a*x) c c using the factors computed by dheqr. c c on entry c c a real(lda, n) c the output from dheqr which contains the upper c triangular factor r in the qr decomposition of a. c c q real(2*n) c the coefficients of the n givens rotations c used in the qr factorization of a. c c lda integer c the leading dimension of the array a . c c n integer c a is originally an (n+1) by n matrix. c c b real(n) c the right hand side vector. c c c on return c c b the solution vector x . c c c modification of linpack. this version dated 1/13/86. c peter brown, university of houston, lawrence livermore lab. c c subroutines and functions c c blas daxpy c----------------------------------------------------------------------- c c internal variables c double precision c double precision s double precision t double precision t1 double precision t2 integer k integer kb integer kp1 integer iq save c c minimize (b-a*x,b-a*x) c first form q*b. c c do 20 k = 1, n c modified - jm do 20 k = 1, n-1 kp1 = k + 1 iq = 2*(k-1) + 1 c = q(iq) s = q(iq+1) t1 = b(k) t2 = b(kp1) b(k) = c*t1 - s*t2 b(kp1) = s*t1 + c*t2 20 continue c c now solve r*x = q*b. c do 40 kb = 1, n k = n + 1 - kb b(k) = b(k)/a(k,k) t = -b(k) call daxpy(k-1,t,a(1,k),1,b(1),1) 40 continue return c----------------------- end of subroutine dhels ----------------------- end subroutine dogdrv (n, nu, wm, lenwm, iwm, leniwm, u, savf, f1nrm, * x, su, sf, stepmx, stptol, tau, iret, uprev, * fprev, unew, f1new, mxtkn, f, jac, psol) c----------------------------------------------------------------------- c this is the real version of subroutine dogdrv, which is the driver for c the dogleg step. its purpose is to find a unew on the dogleg curve c such that f(unew) .le. f(u) + alpha*gt(unew-u) (alpha=1.d-4 used), c and scaled steplength = tau, starting with the input tau but c increasing or decreasing tau if necessary. also, it produces the c starting trust region size tau for the next iteration. c c on entry c c n = size of the nonlinear system, and the length of arrays c unew, savf, su and sf. c c nu = length of u array. c c wm = real array of length lenwm used as work space in the c spigmr routine. it contains the basis vectors v(i) c as well as the hessenberg matrix and its qr factors. c c lenwm = length of wm array. c c iwm = integer array of length leniwm used as work space. c it contains the current size m of the krylov subspace c (in iwm(1)), and the integer work space used by the c psol routine. c c leniwm = length of iwm array. c c u = real array containing the previous approximate root. c c f1nrm = real scalar containing 0.5*norm(sf*f(u))**2, where c norm() denotes the euclidean norm. c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c stepmx = real scalar containing the maximum allowable stepsize c length. c c stptol = user-supplied tolerance on the minimum step length c s = unew-u. c c tau = the current size of the trust region. it is the trust c region size tried at the beginning of the dogleg step. c c uprev = real array of length n used to hold a previous value c of u in the dogleg step. c c fprev = real array of length n used to hold f(uprev). c c c on return c c unew = real array containing the current approximate root. c c savf = real array containing the n-vector f(unew). c c f1new = real scalar containing 0.5*norm(sf*f(unew))**2, where c norm() denotes the euclidean norm. c c x = real array of length n containing the step computed c by the dogleg strategy. c c mxtkn = logical flag indicating if the step just taken was of c maximum length stepmx. c c iret = output flag. c iret=0 means that a satisfactory unew was found. c iret=1 means that the routine failed to find a unew c that was sufficiently distinct from u. this c failure causes the nonlinear iteration to halt. c c----------------------------------------------------------------------- external f external jac external psol integer n integer nu integer lenwm integer iwm integer leniwm integer iret integer np1 integer mmaxp1 integer m integer mp1 integer iv integer iwk integer iycp integer iynew integer iygm integer ihsv double precision u double precision uprev double precision fprev double precision unew double precision f1nrm double precision f1prv double precision f1new double precision x double precision su double precision sf double precision beta double precision tau double precision stepmx double precision stptol double precision cpl double precision gml double precision wm double precision xl double precision savf dimension wm(lenwm) dimension iwm(leniwm) dimension x(n) dimension u(nu) dimension uprev(n) dimension unew(n) dimension savf(n) dimension fprev(n) dimension su(n) dimension sf(n) logical mxtkn, dog1, nwttkn c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf double precision delt double precision sqteta double precision rhom common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf c----------------------------------------------------------------------- c nks002 common block. c----------------------------------------------------------------------- integer iprint integer iunit integer iermsg common /nks002/ iprint, iunit, iermsg save c iret = 4 dog1 = .true. if (iprint .gt. 1) write(iunit,5) 5 format(' ------ in routine dogdrv ------ ') c----------------------------------------------------------------------- c initialize pointers into the wm array. c----------------------------------------------------------------------- np1 = n + 1 mmaxp1 = mmax + 1 beta = wm(1) m = iwm(1) mp1 = m + 1 iv = 3 iwk = iv + n*mmax iygm = iwk + n + 1 iycp = iygm + mmax iynew = iycp + mmax ihsv = iwk + n + 1 + mmax*(mmaxp1+1) + 1 iersl = 0 10 continue c----------------------------------------------------------------------- c find new step by the dogleg algorithm. c----------------------------------------------------------------------- call dogstp (m, mp1, mmaxp1, wm(iygm), wm(iycp), beta, wm(ihsv), * tau, wm(iynew), stepmx, dog1, nwttkn, cpl, gml, n, * nu, wm(iv), x, xl, wm(iwk), wm(locwmp), iwm(locimp), * u, su, savf, f, jac, psol) if (iersl .lt. 0) then c nonrecoverable error from psol. set iersl and return. iersl = 9 return endif c----------------------------------------------------------------------- c check new point and update trust radius. c----------------------------------------------------------------------- call trgupd (m, mp1, mmaxp1, n, nu, np1, u, savf, f1nrm, x, xl, * wm(iynew), su, sf, nwttkn, stepmx, beta, wm(ihsv), * stptol, mxtkn, tau, uprev, fprev, f1prv, unew, * f1new, wm(iwk), iret, f) c----------------------------------------------------------------------- c if iret .eq. 2 or 3, go to 10 to repeat the step. c if iret .eq. 0 or 1, return to calling program. c----------------------------------------------------------------------- if (iprint .gt. 1) write(iunit,9000) tau,cpl,gml,iret if (iret .ge. 2) go to 10 return 9000 format(' tau= ',g12.4,' cpl= ',g12.4,' gml= ',g12.4,' iret= ',i2) c----------------------- end of subroutine dogdrv ---------------------- end c----------------------------------------------------------------------- subroutine dogstp (m, mp1, mmaxp1, ygm, ycp, beta, hes, tau, ynew, * stepmx, dog1, nwttkn, cpl, gml, n, nu, v, xnew, * xnewl, wk, wmp, iwmp, u, su, savf, f, jac, psol) external f external jac external psol integer m integer mp1 integer mmaxp1 integer n integer nu integer iwmp double precision ygm double precision ycp double precision beta double precision hes double precision tau double precision ynew double precision xnew double precision xnewl double precision v double precision wk double precision wmp double precision u double precision su double precision savf dimension ygm(m) dimension ycp(m) dimension hes(mmaxp1,m) dimension ynew(mp1) dimension xnew(n) dimension v(n,m) dimension wk(n) dimension wmp(1) dimension iwmp(1) dimension u(nu) dimension su(n) dimension savf(n) logical dog1 logical nwttkn c----------------------------------------------------------------------- c this is subroutine dogstp, which computes the dogleg step for a c given trust region size tau. c c on entry c c m = the current dimension of the krylov subspace returned c by routine spigmr. c c mp1 = m + 1. c c mmaxp1 = mmax + 1, where mmax is the maximum allowable krylov c subspace dimension. c c ygm = real array of length m containing the solution of the c m-dimensional least-squares problem computed in spigmr. c c hes = real mmaxp1-by-m array containing the hessenberg c matrix used in spigmr. c c tau = real scalar containing the current trust region size. c c stepmx = real scalar containing the maximum allowable stepsize c length. c c dog1 = logical variable. c dog1=.true. means that this is the first call to c this routine by dogdrv for the current step. c the length of ygm is calculated and ycp c is calculated. c dog1=.false. means this is not the first call to this c routine during the current step. c c n = size of the nonlinear system, and the length of arrays c u, xnew, savf, su and sf. c c v = real n-by-m array containing the basis vectors for c the krylov space. c wmp = real work array passed directly to psol routine. c c iwmp = integer work array passed directly to psol routine. c c wk = real array of length n used as work space. c c u = real array containing the previous approximate root. c c beta = real scalar containing norm(sf*f(u)), where norm() is c the euclidean norm. c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c on return c c ycp = real array of length m containing the cauchy point on c the dogleg curve. (only calculated when nwttkn=.false.) c c cpl = real scalar containing the length of the vector ycp. c (cpl = 0.d0 when nwttkn=.true.) c c gml = real scalar containing the length as the vector ygm. c c ynew = real array of length m+1 used in calculating xnew and ycp. c c xnew = real array containing the current dogleg step. c c xnewl = real scalar containing the scaled length of the c vector p*xnew, where p is the preconditioner matrix, and c is equal to norm(ynew). c c nwttkn = logical variable. c nwttkn=.true. means that tau .ge. the length of the c gmres step, and so the full gmres step c is returned in xnew. c nwttkn=.false. means that tau .lt. the length of the c gmres step, and so the cauchy point c ycp must be calculated and the point c on the dogleg curve found. c c----------------------------------------------------------------------- integer ier integer i double precision a double precision b double precision c double precision cpl double precision gml double precision s double precision t double precision tem1 double precision tem2 double precision stepmx double precision ddot double precision dnrm2 c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf double precision delt double precision sqteta double precision rhom common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf c save c if (dog1) then gml = dnrm2(m, ygm, 1) cpl = 0.0d0 endif if (gml .le. tau) then nwttkn = .true. do 10 i = 1,m ynew(i) = ygm(i) 10 continue tau = gml c return else if (dog1) then c calculate cauchy point. dog1 = .false. do 20 i = 1,m ycp(i) = hes(1,i) 20 continue tem1 = ddot(m, ycp, 1, ycp, 1) do 25 i = 1,mp1 ynew(i) = 0.0d0 25 continue do 30 i = 1,m call daxpy(mp1, ycp(i), hes(1,i), 1, ynew, 1) 30 continue tem2 = ddot(mp1, ynew, 1, ynew, 1) t = beta*(tem1/tem2) call dscal(m, t, ycp, 1) cpl = dnrm2(m, ycp, 1) c if this is the first iteration and no initial trust region was c provided by the user, then set tau = min(cpl,stepmx). if (tau .eq. -1.0d0) tau = dmin1(cpl, stepmx) endif if (tau .le. cpl) then t = tau/cpl do 40 i = 1,m ynew(i) = t*ycp(i) 40 continue c return else do 50 i = 1,m ynew(i) = ycp(i) - ygm(i) 50 continue a = ddot(m, ynew, 1, ynew, 1) b = ddot(m, ygm, 1, ynew, 1) c = gml*gml - tau*tau s = ( -b - dsqrt(dmax1(b*b-a*c,0.d0)) )/a do 60 i = 1,m ynew(i) = ygm(i) + s*ynew(i) 60 continue c return endif endif c----------------------------------------------------------------------- c calulate the dogleg step xnew = [v1,v2,...,vm]*ynew. c----------------------------------------------------------------------- xnewl = dnrm2(m, ynew, 1) do 90 i = 1,n xnew(i) = 0.d0 90 continue do 100 i = 1,m call daxpy (n, ynew(i), v(1,i), 1, xnew, 1) 100 continue do 110 i = 1,n 110 xnew(i) = xnew(i)/su(i) if (ipflg .ge. 1) then ier = 0 call psol (n, nu, u, savf, f, jac, wk, wmp, iwmp, xnew, ier) nps = nps + 1 if (ier .ne. 0) iersl = -1 endif return c----------------------- end of subroutine dogstp ---------------------- end c----------------------------------------------------------------------- subroutine trgupd (m, mp1, mmaxp1, n, nu, np1, u, savf, f1nrm, x, * xl, ynew, su, sf, nwttkn, stepmx, beta, hes, * stptol, mxtkn, tau, uprev, fprev, f1prv, upls, * f1pls, wk, iret, f) external f integer m integer mp1 integer mmaxp1 integer n integer nu integer np1 integer iret integer i double precision u double precision f1nrm double precision x double precision xl double precision ynew double precision su double precision sf double precision stepmx double precision beta double precision hes double precision stptol double precision tau double precision uprev double precision fprev double precision f1prv double precision upls double precision f1pls double precision wk double precision savf dimension u(nu) dimension x(n) dimension ynew(m) dimension savf(n) dimension su(n) dimension sf(n) dimension hes(mmaxp1,m) dimension uprev(n) dimension fprev(n) dimension upls(n) dimension wk(np1) c----------------------------------------------------------------------- c this is the real version of subroutine trgupd, which determines c if the x(tau) returned by dogstp satisfies c c f(unew) .le. f(u) + alpha*gt(unew-u) (alpha=1.d-4 used), c c where unew = u + x(tau). if not, then a new tau is computed which c hopefully will give a satisfactory x(tau) (iret=0). if this cannot c be accomplished, then iret=1 is returned, and the nonlinear iteration c is halted. c c on entry c c m = the current dimension of the krylov subspace returned c by routine spigmr. c c mp1 = m + 1. c c mmaxp1 = mmax + 1, where mmax is the maximum allowable krylov c subspace dimension. c c n = size of the nonlinear system, and the length of arrays c unew, savf, su and sf. c c nu = length of u array. c c np1 = n + 1. c c u = real array containing the previous approximate root. c c savf = real array containing f(u). c c f1nrm = real scalar containing 0.5*norm(sf*f(u))**2, where c norm() denotes the euclidean norm. c c x = real array of length n containing the current dogleg c step x(tau). c c xl = the scaled length of p*x(tau), where p is the c preconditioner matrix. c c ynew = real array of length m used in calculating x(tau). c x(tau) = [v1,...,vm]*ynew, where v1,...,vm are the c krylov basis vectors. c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c nwttkn = logical variable. c nwttkn=.true. means that tau .ge. the length of the c gmres step, and so the full gmres step c is in x. c nwttkn=.false. means that tau .lt. the length of the c gmres step, and so the cauchy point c ycp has been calculated in dogstp. c c stepmx = real scalar containing the maximum allowable stepsize c length. c c beta = real scalar containing norm(sf*f(u)), where norm() is c the euclidean norm. c c hes = real mmaxp1-by-m array containing the hessenberg c matrix used in spigmr. c c stptol = user-supplied tolerance on the minimum step length c s = unew-u. c c tau = the current size of the trust region. it is the trust c region size tried at the beginning of the dogleg step. c c uprev = real array of length n used to hold a previous value c of u in the dogleg step. c c fprev = real array of length n used to hold f(uprev). c c f1prv = real scalar used to hold 0.5*norm(sf*f(uprev))**2, where c norm() is the euclidean norm. c c wk = real array of length n used as work space. c c on return c c upls = real array containing the current approximate root. c c savf = real array containing the n-vector f(upls). c c f1pls = real scalar containing 0.5*norm(sf*f(upls))**2, where c norm() denotes the euclidean norm. c c mxtkn = logical flag indicating if the step just taken was of c maximum length stepmx. c c iret = output flag. c iret=0 means that a satisfactory upls was found. c iret=1 means that the routine failed to find a upls c that was sufficiently distinct from u. c iret=2 means that the current tau was too big. tau c is reduced and the global step is repeated. c iret=3 means that the current tau may be too small. c tau is increased and the global step is retried. c c tau = the new trust region size to be used at the beginning c of the next call to dogdrv, or for repeating the c current global step. c c----------------------------------------------------------------------- double precision alpha double precision pt1 double precision pt5 double precision pt75 double precision pt99 double precision zero double precision two double precision ddot double precision delf double precision dfpred double precision tautmp double precision fnrmp double precision rlngth double precision slpi double precision vnorm logical mxtkn logical nwttkn c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf double precision delt double precision sqteta double precision rhom common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf save data alpha/1.d-4/, pt1/0.1d0/, pt5/0.5d0/, pt75/0.75d0/, * pt99/0.99d0/, zero/0.0d0/, two/2.0d0/ c mxtkn = .false. c----------------------------------------------------------------------- c calculate upls = u + x. c----------------------------------------------------------------------- call dcopy (n, u, 1, upls, 1) do 20 i = 1,n u(i) = upls(i) + x(i) 20 continue call f (n, nu, u, savf) nfe = nfe + 1 call dswap(n, u, 1, upls, 1) fnrmp = vnorm(n, savf, sf) f1pls = pt5*fnrmp*fnrmp delf = f1pls - f1nrm slpi = -beta*ddot(m, hes, mmaxp1, ynew, 1) if (iret .ne. 3) f1prv = zero if ( (iret.eq.3) .and. * ((f1pls.ge.f1prv) .or. (delf.gt.alpha*slpi)) ) then c reset upls to uprev and terminate global step. iret = 0 call dcopy (n, uprev, 1, upls, 1) call dcopy (n, fprev, 1, savf, 1) f1pls = f1prv return elseif (delf .ge. alpha*slpi) then c f1(upls) too large. call slngth(n, nu, u, x, su, rlngth) if (rlngth .lt. stptol) then c (upls - u) too small, terminate global step. iret = 1 call dcopy (n, u, 1, upls, 1) return else c reduce tau, continue global step. iret = 2 tautmp = -slpi*xl/(two*(delf-slpi)) if (tautmp .lt. pt1*tau) then tau = pt1*tau elseif (tautmp .gt. pt5*tau) then tau = pt5*tau else tau = tautmp endif return endif else c f1(upls) sufficiently small. do 45 i = 1,mp1 wk(i) = zero 45 continue do 50 i = 1,m call daxpy (mp1, ynew(i), hes(1,i), 1, wk, 1) 50 continue dfpred = slpi + pt5*ddot (mp1, wk, 1, wk, 1) if ( (iret.ne.2) .and. * ( (dabs(dfpred-delf).le.pt1*dabs(delf)) .or. * (delf.le.slpi) ) .and. (.not.nwttkn) .and. * (tau.le.pt99*stepmx) ) then c double tau and continue global step. iret = 3 call dcopy (n, upls, 1, uprev, 1) call dcopy (n, savf, 1, fprev, 1) f1prv = f1pls tau = dmin1( two*tau, stepmx ) return else c accept upls as new iterate. choose new tau. iret = 0 if (xl .gt. pt99*stepmx) mxtkn = .true. if (delf .ge. pt1*dfpred) then c decrease tau for next iteration. tau = tau/two elseif (delf .le. pt75*dfpred) then c increase tau for next iteration. tau = dmin1( two*tau, stepmx ) else c leave tau unchanged. endif endif endif c return c----------------------- end of subroutine trgupd ---------------------- end subroutine lnsrch(n, nu, u, savf, f1nrm, p, su, sf, stepmx, * stptol, iret, unew, f1nrmp, mxtkn, f, jac) c----------------------------------------------------------------------- c this routine is the main driver for the linesearch algorithm. c its purpose is to find a unew = u + rl*p in the direction p from u c such that c t c f(unew) .le. f(u) + alpha*g (unew-u) (alpha=1.d-4 used), c c and c t c f(unew) .ge. f(u) + beta*g (unew-u) (beta=0.9d0 used), c c where 0 .lt. rl .le. 1. c----------------------------------------------------------------------- c c on entry c c n = size of the nonlinear system, and the length of arrays c unew, savf, su and sf. c c nu = length of array u. c c u = real array containing the previous approximate root. c c f1nrm = real scalar containing 0.5*norm(sf*f(u))**2, where c norm() denotes the euclidean norm. c c p = step direction returned by routine model. c c su = real array containing scaling factors for u. c c sf = real array containing scaling factors for f(u). c c stepmx = real scalar containing the maximum allowable stepsize c length. c c stptol = user-supplied tolerance on the minimum step length c s = unew-u. c c on return c c unew = real array containing the current approximate root. c c savf = real array containing the n-vector f(unew). c c f1nrmp = real scalar containing 0.5*norm(sf*f(unew))**2, where c norm() denotes the euclidean norm. c c iret = output flag. c iret=0 means that a satisfactory unew was found. c iret=1 means that the routine failed to find a unew c that was sufficiently distinct from u. c c mxtkn = logical flag indicating if the step just taken was of c maximum length stepmx. c c nbcf = number of steps for which beta-condition was not met c (in common). this value is incremented by one if c the beta-condition could not be met on this call. c c----------------------------------------------------------------------- external f external jac integer n integer iret integer i integer nu double precision savf double precision u double precision unew double precision f1nrm double precision f1nrmp double precision p double precision su double precision sf double precision stptol double precision stepmx double precision pnrm double precision ratio double precision slpi double precision rlngth double precision rl double precision fnrmp double precision rltmp double precision vnorm double precision beta double precision f1lo double precision f1nprv double precision rldiff double precision rlincr double precision rlmin double precision rllo double precision rlmax double precision rlprev dimension savf(n) dimension u(nu) dimension unew(n) dimension p(n) dimension su(n) dimension sf(n) double precision pt1 double precision pt1trl double precision pt99 double precision one double precision two double precision alpha logical mxtkn c----------------------------------------------------------------------- c nks001 common block. c----------------------------------------------------------------------- integer locwmp integer locimp integer iersl integer kmp integer mmax integer methn integer methk integer ipflg integer mfdif integer nfe integer nje integer nni integer nli integer npe integer nps integer ncfl integer nbcf double precision delt double precision sqteta double precision rhom common /nks001/ delt, rhom, sqteta, locwmp, locimp, iersl, kmp, * mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, * nli, npe, nps, ncfl, nbcf c----------------------------------------------------------------------- c nks002 common block. c----------------------------------------------------------------------- integer iprint integer iunit integer iermsg common /nks002/ iprint, iunit, iermsg c save data pt1/0.1d0/,alpha/1.d-4/,one/1.0d0/,two/2.0d0/ data pt99/0.99d0/,beta/0.9d0/ c mxtkn = .false. pnrm = vnorm(n,p,su) ratio = one if (pnrm .gt. stepmx) then ratio = stepmx/pnrm do 10 i = 1,n p(i) = p(i)*ratio 10 continue pnrm = stepmx endif slpi = -two*f1nrm*ratio if (methk .eq. 2) slpi = slpi + rhom*rhom*ratio call slngth(n, nu, u, p, su, rlngth) rlmin = stptol/rlngth rl = one if (iprint .gt. 1) write(iunit,20) rlmin 20 format(' ------ in routine lnsrch (min lambda=',e12.4,') ------ ') c----------------------------------------------------------------------- c begin iteration to find rl value satisfying alpha- and beta- c conditions. if rl becomes .lt. rlmin, then terminate with iret=1. c----------------------------------------------------------------------- 115 continue call dcopy(n, u, 1, unew, 1) do 120 i = 1,n u(i) = unew(i) + rl*p(i) 120 continue call f(n, nu, u, savf) nfe = nfe + 1 call dswap(n, u, 1, unew, 1) fnrmp = vnorm(n,savf,sf) f1nrmp = fnrmp*fnrmp/two if (iprint .gt. 1) write(iunit,125) rl,f1nrm,f1nrmp 125 format(' lambda, f1, f1new ',3d20.8) if (f1nrmp .gt. f1nrm + alpha*slpi*rl) go to 200 c alpha-condition satisfied. now check for beta-condition. if (f1nrmp .lt. f1nrm + beta*slpi*rl) then if ( (rl.eq.one) .and. (pnrm.lt.stepmx) ) then rlmax = stepmx/pnrm 130 continue rlprev = rl f1nprv = f1nrmp rl = dmin1(two*rl,rlmax) call dcopy(n, u, 1, unew, 1) do 140 i = 1,n 140 u(i) = unew(i) + rl*p(i) call f(n, nu, u, savf) nfe = nfe + 1 call dswap(n, u, 1, unew, 1) fnrmp = vnorm(n,savf,sf) f1nrmp = fnrmp*fnrmp/two if (iprint .gt. 1) write(iunit,125) rl,f1nrm,f1nrmp if ( (f1nrmp .le. f1nrm + alpha*slpi*rl) .and. * (f1nrmp .lt. f1nrm + beta*slpi*rl) .and. * (rl .lt. rlmax) ) go to 130 endif if ( (rl.lt.one) .or. * ((rl.gt.one).and.(f1nrmp.gt.f1nrm+alpha*slpi*rl)) ) then rllo = dmin1(rl,rlprev) rldiff = dabs(rlprev-rl) if (rl .lt. rlprev) then f1lo = f1nrmp else f1lo = f1nprv endif 150 continue rlincr = rldiff/two rl = rllo + rlincr call dcopy(n, u, 1, unew, 1) do 160 i = 1,n 160 u(i) = unew(i) + rl*p(i) call f(n, nu, u, savf) nfe = nfe + 1 call dswap(n, u, 1, unew, 1) fnrmp = vnorm(n,savf,sf) f1nrmp = fnrmp*fnrmp/two if (iprint .gt. 1) write(iunit,125) rl,f1nrm,f1nrmp if (f1nrmp .gt. f1nrm + alpha*slpi*rl) then rldiff = rlincr elseif (f1nrmp .lt. f1nrm + beta*slpi*rl) then rllo = rl rldiff = rldiff - rlincr f1lo = f1nrmp endif if ( ( (f1nrmp .gt. f1nrm + alpha*slpi*rl) .or. * (f1nrmp .lt. f1nrm + beta*slpi*rl) ) .and. * (rldiff .gt. rlmin) ) go to 150 if (f1nrmp .lt. f1nrm + beta*slpi*rl) then c beta condition could not be satisfied. set unew to last c u value that satisfied alpha-condition and continue. c increment counter on number of steps beta-condition not satisfied. f1nrmp = f1lo do 170 i = 1,n 170 unew(i) = u(i) + rllo*p(i) nbcf = nbcf + 1 endif endif endif iret = 0 if (rl*pnrm .gt. pt99*stepmx) mxtkn = .true. return c----------------------------------------------------------------------- c alpha-condition not satisfied. perform quadratic backtrack to c compute new rl value. c----------------------------------------------------------------------- 200 continue if (rl .lt. rlmin) then c no satisfactory unew can be found sufficiently distinct from u. c copy u into unew, set return code and return. iret = 1 call dcopy (n, u, 1, unew, 1) return endif c compute new rl using a quadratic backtrack. rltmp = -slpi/(two*(f1nrmp - f1nrm - slpi)) if (rltmp .gt. rl/two) rltmp = rl/two rlprev = rl f1nprv = f1nrmp pt1trl = pt1*rl if (rltmp .le. pt1trl) then rl = pt1trl else rl = rltmp endif go to 115 c----------------------- end of subroutine lnsrch ---------------------- end subroutine errgen (ierr, v1, v2, i1, i2) c----------------------------------------------------------------------- c this routine prints error messages from the driver nksol. the output c from this routine can be turned off by setting a flag in iwork. c----------------------------------------------------------------------- integer ierr integer i1 integer i2 double precision v1 double precision v2 c----------------------------------------------------------------------- c nks002 common block. c----------------------------------------------------------------------- integer iprint integer iunit integer iermsg common /nks002/ iprint, iunit, iermsg save c if (iermsg .gt. 0) return if (ierr .eq. 10) then write(iunit,9000) return endif if (ierr .eq. 20) then write(iunit,9010) return endif if (ierr .eq. 30) then write(iunit,9020) return endif if (ierr .eq. 40) then write(iunit,9030) i1,i2 return endif if (ierr .eq. 50) then write(iunit,9040) i1,v1 return endif if (ierr .eq. 100) then write(iunit,9100) i1,i2 return endif if (ierr .eq. 110) then write(iunit,9110) i1,i2 return endif if (ierr .eq. 120) then write(iunit,9120) i1,i2 return endif if (ierr .eq. 130) then write(iunit,*)'nu must be at least as large as n' return endif c----------------------------------------------------------------------- c formats. c----------------------------------------------------------------------- 9000 format(// * ' nksol --- illegal value for mf. mf must be between ' */' 1 and 3.' *) 9010 format(// * ' nksol --- illegal value for mdif. mdif must either 0 or 1. ' *) 9020 format(// * ' nksol --- illegal value for ipflag. ipflag must be either ' */' 0 or 1.' *) 9030 format(// * ' nksol --- illegal value for optional input in iwork. ' */' iwork(',i2,') = ',i3,' must be nonnegative.' *) 9040 format(// * ' nksol --- illegal value for optional input in rwork. ' */' rwork(',i2,') = ',e12.4,' must be nonnegative.' *) 9100 format(// * ' nksol --- insufficient length for rwork.' */' rwork length given - ',i8, */' rwork length needed - ',i8 *) 9110 format(// * ' nksol --- insufficient length for iwork.' */' iwork length given - ',i8, */' iwork length needed - ',i8 *) 9120 format(// * ' nksol --- maximum number of beta-condition test failures', */' exceeded.' */' number of beta-condition failures - ',i8, */' maximum number of failures allowed - ',i8 *) return c------------- end of subroutine errgen ------------------------------ end subroutine daxpy(n,da,dx,incx,dy,incy) c c constant times a vector plus a vector. c uses unrolled loops for increments equal to one. c jack dongarra, linpack, 3/11/78. c integer i integer incx integer incy integer m integer mp1 integer n integer ix integer iy double precision dx(n*incx) double precision dy(n*incy) double precision da c if(n.le.0)return if (da .eq. 0.0d0) return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dy(iy) = dy(iy) + da*dx(ix) ix = ix + incx iy = iy + incy 10 continue return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,4) if( m .eq. 0 ) go to 40 do 30 i = 1,m dy(i) = dy(i) + da*dx(i) 30 continue if( n .lt. 4 ) return 40 mp1 = m + 1 do 50 i = mp1,n,4 dy(i) = dy(i) + da*dx(i) dy(i + 1) = dy(i + 1) + da*dx(i + 1) dy(i + 2) = dy(i + 2) + da*dx(i + 2) dy(i + 3) = dy(i + 3) + da*dx(i + 3) 50 continue return end subroutine dcopy(n,dx,incx,dy,incy) c c copies a vector, x, to a vector, y. c uses unrolled loops for increments equal to one. c jack dongarra, linpack, 3/11/78. c integer i integer incx integer incy integer ix integer iy integer m integer mp1 integer n double precision dx(n*incx) double precision dy(n*incy) c if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dy(iy) = dx(ix) ix = ix + incx iy = iy + incy 10 continue return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,7) if( m .eq. 0 ) go to 40 do 30 i = 1,m dy(i) = dx(i) 30 continue if( n .lt. 7 ) return 40 mp1 = m + 1 do 50 i = mp1,n,7 dy(i) = dx(i) dy(i + 1) = dx(i + 1) dy(i + 2) = dx(i + 2) dy(i + 3) = dx(i + 3) dy(i + 4) = dx(i + 4) dy(i + 5) = dx(i + 5) dy(i + 6) = dx(i + 6) 50 continue return end double precision function ddot(n,dx,incx,dy,incy) c c forms the dot product of two vectors. c uses unrolled loops for increments equal to one. c jack dongarra, linpack, 3/11/78. c integer i integer incx integer incy integer ix integer iy integer m integer mp1 integer n double precision dx(n*incx) double precision dy(n*incy) double precision dtemp c ddot = 0.0d0 dtemp = 0.0d0 if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dtemp = dtemp + dx(ix)*dy(iy) ix = ix + incx iy = iy + incy 10 continue ddot = dtemp return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,5) if( m .eq. 0 ) go to 40 do 30 i = 1,m dtemp = dtemp + dx(i)*dy(i) 30 continue if( n .lt. 5 ) go to 60 40 mp1 = m + 1 do 50 i = mp1,n,5 dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + * dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4) 50 continue 60 ddot = dtemp return end double precision function dnrm2 ( n, dx, incx) integer next integer n integer nn integer incx integer i integer j double precision dx(n*incx) double precision cutlo double precision cuthi double precision hitest double precision sum double precision xmax double precision zero double precision one data zero, one /0.0d0, 1.0d0/ c c euclidean norm of the n-vector stored in dx() with storage c increment incx . c if n .le. 0 return with result = 0. c if n .ge. 1 then incx must be .ge. 1 c c c.l.lawson, 1978 jan 08 c c four phase method using two built-in constants that are c hopefully applicable to all machines. c cutlo = maximum of dsqrt(u/eps) over all known machines. c cuthi = minimum of dsqrt(v) over all known machines. c where c eps = smallest no. such that eps + 1. .gt. 1. c u = smallest positive no. (underflow limit) c v = largest no. (overflow limit) c c brief outline of algorithm.. c c phase 1 scans zero components. c move to phase 2 when a component is nonzero and .le. cutlo c move to phase 3 when a component is .gt. cutlo c move to phase 4 when a component is .ge. cuthi/m c where m = n for x() real and m = 2*n for complex. c c values for cutlo and cuthi.. c from the environmental parameters listed in the imsl converter c document the limiting values are as follows.. c cutlo, s.p. u/eps = 2**(-102) for honeywell. close seconds are c univac and dec at 2**(-103) c thus cutlo = 2**(-51) = 4.44089e-16 c cuthi, s.p. v = 2**127 for univac, honeywell, and dec. c thus cuthi = 2**(63.5) = 1.30438e19 c cutlo, d.p. u/eps = 2**(-67) for honeywell and dec. c thus cutlo = 2**(-33.5) = 8.23181d-11 c cuthi, d.p. same as s.p. cuthi = 1.30438d19 c data cutlo, cuthi / 8.232d-11, 1.304d19 / c data cutlo, cuthi / 4.441e-16, 1.304e19 / data cutlo, cuthi / 8.232d-11, 1.304d19 / c if(n .gt. 0) go to 10 dnrm2 = zero go to 300 c 10 assign 30 to next sum = zero nn = n * incx c begin main loop i = 1 20 go to next,(30, 50, 70, 110) 30 if( dabs(dx(i)) .gt. cutlo) go to 85 assign 50 to next xmax = zero c c phase 1. sum is zero c 50 if( dx(i) .eq. zero) go to 200 if( dabs(dx(i)) .gt. cutlo) go to 85 c c prepare for phase 2. assign 70 to next go to 105 c c prepare for phase 4. c 100 i = j assign 110 to next sum = (sum / dx(i)) / dx(i) 105 xmax = dabs(dx(i)) go to 115 c c phase 2. sum is small. c scale to avoid destructive underflow. c 70 if( dabs(dx(i)) .gt. cutlo ) go to 75 c c common code for phases 2 and 4. c in phase 4 sum is large. scale to avoid overflow. c 110 if( dabs(dx(i)) .le. xmax ) go to 115 sum = one + sum * (xmax / dx(i))**2 xmax = dabs(dx(i)) go to 200 c 115 sum = sum + (dx(i)/xmax)**2 go to 200 c c c prepare for phase 3. c 75 sum = (sum * xmax) * xmax c c c for real or d.p. set hitest = cuthi/n c for complex set hitest = cuthi/(2*n) c 85 hitest = cuthi/float( n ) c c phase 3. sum is mid-range. no scaling. c do 95 j =i,nn,incx if(dabs(dx(j)) .ge. hitest) go to 100 95 sum = sum + dx(j)**2 dnrm2 = dsqrt( sum ) go to 300 c 200 continue i = i + incx if ( i .le. nn ) go to 20 c c end of main loop. c c compute square root and adjust for scaling. c dnrm2 = xmax * dsqrt(sum) 300 continue return end subroutine dscal(n,da,dx,incx) c c scales a vector by a constant. c uses unrolled loops for increment equal to one. c jack dongarra, linpack, 3/11/78. c integer i integer incx integer m integer mp1 integer n integer nincx double precision da double precision dx(n*incx) c if(n.le.0)return if(incx.eq.1)go to 20 c c code for increment not equal to 1 c nincx = n*incx do 10 i = 1,nincx,incx dx(i) = da*dx(i) 10 continue return c c code for increment equal to 1 c c c clean-up loop c 20 m = mod(n,5) if( m .eq. 0 ) go to 40 do 30 i = 1,m dx(i) = da*dx(i) 30 continue if( n .lt. 5 ) return 40 mp1 = m + 1 do 50 i = mp1,n,5 dx(i) = da*dx(i) dx(i + 1) = da*dx(i + 1) dx(i + 2) = da*dx(i + 2) dx(i + 3) = da*dx(i + 3) dx(i + 4) = da*dx(i + 4) 50 continue return end subroutine dswap (n,dx,incx,dy,incy) c c interchanges two vectors. c uses unrolled loops for increments equal one. c jack dongarra, linpack, 3/11/78. c integer i integer incx integer incy integer ix integer iy integer m integer mp1 integer n double precision dx(n*incx) double precision dy(n*incy) double precision dtemp c if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments not equal c to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dtemp = dx(ix) dx(ix) = dy(iy) dy(iy) = dtemp ix = ix + incx iy = iy + incy 10 continue return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,3) if( m .eq. 0 ) go to 40 do 30 i = 1,m dtemp = dx(i) dx(i) = dy(i) dy(i) = dtemp 30 continue if( n .lt. 3 ) return 40 mp1 = m + 1 do 50 i = mp1,n,3 dtemp = dx(i) dx(i) = dy(i) dy(i) = dtemp dtemp = dx(i + 1) dx(i + 1) = dy(i + 1) dy(i + 1) = dtemp dtemp = dx(i + 2) dx(i + 2) = dy(i + 2) dy(i + 2) = dtemp 50 continue return end integer function idamax(n,dx,incx) c c finds the index of element having max. absolute value. c jack dongarra, linpack, 3/11/78. c integer i integer incx integer ix integer n double precision dx(n*incx) double precision dmax c idamax = 0 if( n .lt. 1 ) return idamax = 1 if(n.eq.1)return if(incx.eq.1)go to 20 c c code for increment not equal to 1 c ix = 1 dmax = dabs(dx(1)) ix = ix + incx do 10 i = 2,n if(dabs(dx(ix)).le.dmax) go to 5 idamax = i dmax = dabs(dx(ix)) 5 ix = ix + incx 10 continue return c c code for increment equal to 1 c 20 dmax = dabs(dx(1)) do 30 i = 2,n if(dabs(dx(i)).le.dmax) go to 30 idamax = i dmax = dabs(dx(i)) 30 continue return end subroutine jac(n,nu,u,savf,v,z) integer n integer nu double precision u(nu),savf(n),v(n),z(n) return end subroutine pset(n, nu, u, savf, su, sf, wk, f, wmp, iwmp, ier) integer n integer nu integer iwmp integer ier double precision u double precision savf double precision su double precision sf double precision wk double precision wmp dimension u(nu) dimension savf(n) dimension su(n) dimension sf(n) dimension wk(n) dimension wmp(1) dimension iwmp(1) external f ier = 0 return end subroutine psol(n, nu, u, savf, f, jac, wk, wmp, iwmp, x, ier) integer n integer nu integer iwmp integer ier double precision u double precision savf double precision wk double precision wmp double precision x dimension u(nu) dimension savf(n) dimension wk(n) dimension wmp(1) dimension iwmp(1) dimension x(n) external f external jac return end subroutine boundy(u,nx,ny,type) integer i,j,nx,ny,type double precision u(0:nx+1,0:ny+1) if (type .eq. 1) then * Dirichlet: do 10 i=1,nx u(i,0) = 0.D0 u(i,ny+1) = 0.0D0 10 continue do 15 j=1,ny u(0,j) = 0.D0 u(nx+1,j) = 0.D0 15 continue endif if (type .eq. 2) then * mixed: do 20 i=1,nx u(i,0) = u(i,ny) u(i,ny+1) = u(i,1) 20 continue do 25 j=1,ny u(0,j) = 1.D0 u(nx+1,j) = 0.D0 25 continue endif end subroutine jacobm(n, nu, u, savf, v, z) * *************************************************** * optional routine to provide the jacobian to the nksol. * z(k) = sum_j df_k/du_j (v) * *************************************************** integer n,nu,i,j,k,nx,ny double precision u(nu), savf(n), v(n), z(n), + xlam,xeta,fretd common/epscal/xlam,xeta common/ijsize/nx,ny * first column z(1) = (1.D0 + 4.D0*xlam)*v(1) - + xlam*( v(ny+1) + v(2) + v(ny) ) - + xeta*fretd(u(1))*v(1) do 10 j=2,ny-1 z(j) = (1.D0 + 4.D0*xlam)*v(j) - + xlam*( v(ny+j) + v(j-1) + v(j+1) ) - + xeta*fretd(u(j))*v(j) 10 continue z(ny) = (1.D0 + 4.D0*xlam)*v(ny) - + xlam*( v(2*ny) + v(ny-1) + v(1) ) - + xeta*fretd(u(ny))*v(ny) * 2nd through the next to the last columns. k=ny do 20 i=2,nx-1 k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+ny) + + v(k+1) + v(k+ny-1) ) - + xeta*fretd(u(k))*v(k) do 25 j=2,ny-1 k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+ny) + + v(k-1) + v(k+1) ) - + xeta*fretd(u(k))*v(k) 25 continue k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+ny) + + v(k-1) + v(k-ny+1) ) - + xeta*fretd(u(k))*v(k) 20 continue * last column k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k+1) + v(k+ny-1) ) - + xeta*fretd(u(k))*v(k) do 30 j=2,ny-1 k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k-1) + v(k+1) ) - + xeta*fretd(u(k))*v(k) 30 continue k=k+1 z(k) = (1.D0 + 4.D0*xlam)*v(k) - + xlam*( v(k-ny) + v(k-1) + v(k-ny+1) ) - + xeta*fretd(u(k))*v(k) end