Main Page | Directories | File List | File Members

linbcg.h

Go to the documentation of this file.
00001 /*
00002    This file belongs to Aeneas. Aeneas is a GNU package released under GPL 3.
00003    This code is a simulator for Submicron 3D Semiconductor Devices. 
00004    It implements the Monte Carlo transport in 3D tetrahedra meshes
00005    for the simulation of the semiclassical Boltzmann equation for both electrons.
00006    It also includes all the relevant quantum effects for nanodevices.
00007 
00008    Copyright (C) 2007 Jean Michel Sellier <sellier@dmi.unict.it>
00009  
00010    This program is free software; you can redistribute it and/or modify
00011    it under the terms of the GNU General Public License as published by
00012    the Free Software Foundation; either version 3, or (at your option)
00013    any later version.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    You should have received a copy of the GNU General Public License
00021    along with this program. If not, see <http://www.gnu.org/licenses/>.
00022 */
00023 
00024 // Created on : 07 june 2007, Siracusa, Jean Michel Sellier
00025 // Last modified : 16 september 2007, Siracusa, Jean Michel Sellier
00026 
00027 // Solves A.x = b for x[1..n], given b[1..n], by the iterative biconjugate
00028 // gradient method. On input x[1..n] should be set to an initial guess of
00029 // the solution (or all zeros); 
00030 // itol is 1,2,3 or 4, specifying which convergence test is applied (see below)
00031 // itmax is the maximum number of allowed iterations
00032 // tol is the desired convergence tollerance
00033 // On output, x[1..n] is reset to the improved solution
00034 // iter is the number of iterations actually taken
00035 // err is the estimated error
00036 // The matrix A is referenced only through the user-supplied routines atimes,
00037 // which computes the product of either A or its transpose on a vector;
00038 // and asolve.
00039 
00040 void linbcg(int n,double b[],double x[],int itol,double tol,int itmax)
00041 {
00042  int j;
00043  double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zminrm,znrm;
00044  int iter;
00045  double err=1.e12;
00046 
00047 // reset the allocated arrays
00048  for(j=1;j<=n;j++){
00049    p[j]=0.0;
00050    r[j]=0.0;
00051    z[j]=0.0;
00052    pp[j]=0.0;
00053    rr[j]=0.0;
00054    zz[j]=0.0;
00055  }
00056 
00057 // {int i; for(i=1;i<=Ng;i++) printf("b[%d]=%g\n",i,b[i]);}
00058 // calculate the initial residual
00059  iter=0;
00060  atimes(n,x,r,0);
00061  for(j=1;j<=n;j++){
00062     r[j]=b[j]-r[j];
00063     rr[j]=r[j];
00064 //    printf("j = %d r = %g b = %g rr = %g\n",j,r[j],b[j],rr[j]);
00065  } 
00066 
00067 // uncomment this line to get the "minimum residual" variant of the algorithm
00068 // atimes(n,r,rr,0); 
00069 
00070  if(itol==1){
00071    bnrm=snrm(n,b,itol);
00072    asolve(n,r,z,0);
00073  }
00074  else if(itol==2){
00075    asolve(n,b,z,0);
00076    bnrm=snrm(n,z,itol);
00077    asolve(n,r,z,0);
00078  }
00079  else if(itol==3 || itol==4){
00080    asolve(n,b,z,0);
00081    bnrm=snrm(n,z,itol);
00082    asolve(n,r,z,0);
00083    znrm=snrm(n,z,itol);
00084  } 
00085  else{
00086    printf("linbcg error : illegal itol\n");
00087    exit(0);
00088  }
00089 
00090 // printf("bnrm=%g\n",bnrm);
00091  while(iter<=itmax){ // <--- main loop
00092 // printf("iter = %d itmax = %d\n",iter,itmax);
00093    ++(iter);
00094    asolve(n,rr,zz,1); // final 1 indicates use of transpose matrix A tilde
00095    for(bknum=0.0,j=1;j<=n;j++) bknum+=z[j]*rr[j];
00096 //   printf("bknum=%g\n",bknum);
00097 // calculate coefficient bk and direction vectors p and pp
00098    if(iter==1){
00099         for(j=1;j<=n;j++){
00100             p[j]=z[j];
00101             pp[j]=zz[j];
00102         }
00103    }
00104    else{
00105      bk=bknum/bkden;
00106      for(j=1;j<=n;j++){
00107         p[j]=bk*p[j]+z[j];
00108         pp[j]=bk*pp[j]+zz[j];
00109      }   
00110    }
00111 
00112 //   for(j=1;j<=n;j++) printf("p[%d]=%g\n",j,p[j]);
00113 // calculate coefficient ak, new iterate x, and new residuals r and rr
00114    bkden=bknum;
00115    atimes(n,p,z,0);
00116    for(akden=0.0,j=1;j<=n;j++) akden+=z[j]*pp[j];
00117 //   printf("akden=%g\n",akden);
00118    ak=bknum/akden;
00119 //   printf("ak=%g\n",ak);
00120    atimes(n,pp,zz,1);
00121    for(j=1;j<=n;j++){
00122      x[j] += ak*p[j];
00123      r[j] -= ak*z[j];
00124      rr[j]-= ak*zz[j];
00125    }
00126 
00127 // solve A tilde . z = r and check stopping criterion
00128    asolve(n,r,z,0);
00129    if(itol==1) err=snrm(n,r,itol)/bnrm;
00130    else if(itol==2) err=snrm(n,z,itol)/bnrm;
00131    else if(itol==3 || itol==4){
00132           zminrm=znrm;
00133           znrm=snrm(n,z,itol);
00134           if(fabs(zminrm-znrm)>EPS*znrm){
00135             dxnrm=fabs(ak)*snrm(n,p,itol);
00136             err=znrm/fabs(zminrm-znrm)*dxnrm;
00137           }
00138           else{
00139             err=znrm/bnrm; // error may not be accurate, so loop again
00140 //            printf("iter=%4d err=%12.6f\n",iter,err);
00141             continue;
00142           }
00143           xnrm=snrm(n,x,itol);
00144           if(err<=0.5*xnrm) err/=xnrm;
00145           else{
00146             err=znrm/bnrm; // error may not be accurate, so loop again
00147 //            printf("iter=%4d err=%12.6f\n",iter,err);
00148             continue;
00149           }
00150    }
00151 //   printf("*** err=%g ",err);
00152    if(err<=tol){
00153      printf("\nerr = %g < tol = %g\n",err,tol);
00154      break;
00155    }
00156  }
00157 }

© sourcejam.com 2005-2008