00001 /* 00002 This file belongs to Aeneas. Aeneas is a GNU package released under GPL 3. 00003 This code is a simulator for Submicron 3D Semiconductor Devices. 00004 It implements the Monte Carlo transport in 3D tetrahedra meshes 00005 for the simulation of the semiclassical Boltzmann equation for both electrons. 00006 It also includes all the relevant quantum effects for nanodevices. 00007 00008 Copyright (C) 2007 Jean Michel Sellier <sellier@dmi.unict.it> 00009 00010 This program is free software; you can redistribute it and/or modify 00011 it under the terms of the GNU General Public License as published by 00012 the Free Software Foundation; either version 3, or (at your option) 00013 any later version. 00014 00015 This program is distributed in the hope that it will be useful, 00016 but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 GNU General Public License for more details. 00019 00020 You should have received a copy of the GNU General Public License 00021 along with this program. If not, see <http://www.gnu.org/licenses/>. 00022 */ 00023 00024 // Created on : 07 june 2007, Siracusa, Jean Michel Sellier 00025 // Last modified : 16 september 2007, Siracusa, Jean Michel Sellier 00026 00027 // Solves A.x = b for x[1..n], given b[1..n], by the iterative biconjugate 00028 // gradient method. On input x[1..n] should be set to an initial guess of 00029 // the solution (or all zeros); 00030 // itol is 1,2,3 or 4, specifying which convergence test is applied (see below) 00031 // itmax is the maximum number of allowed iterations 00032 // tol is the desired convergence tollerance 00033 // On output, x[1..n] is reset to the improved solution 00034 // iter is the number of iterations actually taken 00035 // err is the estimated error 00036 // The matrix A is referenced only through the user-supplied routines atimes, 00037 // which computes the product of either A or its transpose on a vector; 00038 // and asolve. 00039 00040 void linbcg(int n,double b[],double x[],int itol,double tol,int itmax) 00041 { 00042 int j; 00043 double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zminrm,znrm; 00044 int iter; 00045 double err=1.e12; 00046 00047 // reset the allocated arrays 00048 for(j=1;j<=n;j++){ 00049 p[j]=0.0; 00050 r[j]=0.0; 00051 z[j]=0.0; 00052 pp[j]=0.0; 00053 rr[j]=0.0; 00054 zz[j]=0.0; 00055 } 00056 00057 // {int i; for(i=1;i<=Ng;i++) printf("b[%d]=%g\n",i,b[i]);} 00058 // calculate the initial residual 00059 iter=0; 00060 atimes(n,x,r,0); 00061 for(j=1;j<=n;j++){ 00062 r[j]=b[j]-r[j]; 00063 rr[j]=r[j]; 00064 // printf("j = %d r = %g b = %g rr = %g\n",j,r[j],b[j],rr[j]); 00065 } 00066 00067 // uncomment this line to get the "minimum residual" variant of the algorithm 00068 // atimes(n,r,rr,0); 00069 00070 if(itol==1){ 00071 bnrm=snrm(n,b,itol); 00072 asolve(n,r,z,0); 00073 } 00074 else if(itol==2){ 00075 asolve(n,b,z,0); 00076 bnrm=snrm(n,z,itol); 00077 asolve(n,r,z,0); 00078 } 00079 else if(itol==3 || itol==4){ 00080 asolve(n,b,z,0); 00081 bnrm=snrm(n,z,itol); 00082 asolve(n,r,z,0); 00083 znrm=snrm(n,z,itol); 00084 } 00085 else{ 00086 printf("linbcg error : illegal itol\n"); 00087 exit(0); 00088 } 00089 00090 // printf("bnrm=%g\n",bnrm); 00091 while(iter<=itmax){ // <--- main loop 00092 // printf("iter = %d itmax = %d\n",iter,itmax); 00093 ++(iter); 00094 asolve(n,rr,zz,1); // final 1 indicates use of transpose matrix A tilde 00095 for(bknum=0.0,j=1;j<=n;j++) bknum+=z[j]*rr[j]; 00096 // printf("bknum=%g\n",bknum); 00097 // calculate coefficient bk and direction vectors p and pp 00098 if(iter==1){ 00099 for(j=1;j<=n;j++){ 00100 p[j]=z[j]; 00101 pp[j]=zz[j]; 00102 } 00103 } 00104 else{ 00105 bk=bknum/bkden; 00106 for(j=1;j<=n;j++){ 00107 p[j]=bk*p[j]+z[j]; 00108 pp[j]=bk*pp[j]+zz[j]; 00109 } 00110 } 00111 00112 // for(j=1;j<=n;j++) printf("p[%d]=%g\n",j,p[j]); 00113 // calculate coefficient ak, new iterate x, and new residuals r and rr 00114 bkden=bknum; 00115 atimes(n,p,z,0); 00116 for(akden=0.0,j=1;j<=n;j++) akden+=z[j]*pp[j]; 00117 // printf("akden=%g\n",akden); 00118 ak=bknum/akden; 00119 // printf("ak=%g\n",ak); 00120 atimes(n,pp,zz,1); 00121 for(j=1;j<=n;j++){ 00122 x[j] += ak*p[j]; 00123 r[j] -= ak*z[j]; 00124 rr[j]-= ak*zz[j]; 00125 } 00126 00127 // solve A tilde . z = r and check stopping criterion 00128 asolve(n,r,z,0); 00129 if(itol==1) err=snrm(n,r,itol)/bnrm; 00130 else if(itol==2) err=snrm(n,z,itol)/bnrm; 00131 else if(itol==3 || itol==4){ 00132 zminrm=znrm; 00133 znrm=snrm(n,z,itol); 00134 if(fabs(zminrm-znrm)>EPS*znrm){ 00135 dxnrm=fabs(ak)*snrm(n,p,itol); 00136 err=znrm/fabs(zminrm-znrm)*dxnrm; 00137 } 00138 else{ 00139 err=znrm/bnrm; // error may not be accurate, so loop again 00140 // printf("iter=%4d err=%12.6f\n",iter,err); 00141 continue; 00142 } 00143 xnrm=snrm(n,x,itol); 00144 if(err<=0.5*xnrm) err/=xnrm; 00145 else{ 00146 err=znrm/bnrm; // error may not be accurate, so loop again 00147 // printf("iter=%4d err=%12.6f\n",iter,err); 00148 continue; 00149 } 00150 } 00151 // printf("*** err=%g ",err); 00152 if(err<=tol){ 00153 printf("\nerr = %g < tol = %g\n",err,tol); 00154 break; 00155 } 00156 } 00157 }