Actual source code: bvcontour.c
 
   slepc-3.23.1 2025-05-01
   
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    BV developer functions needed in contour integral methods
 12: */
 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>
 17: #define p_id(i) (i*subcomm->n + subcomm->color)
 19: /*@
 20:    BVScatter - Scatters the columns of a BV to another BV created in a
 21:    subcommunicator.
 23:    Collective
 25:    Input Parameters:
 26: +  Vin  - input basis vectors (defined on the whole communicator)
 27: .  scat - VecScatter object that contains the info for the communication
 28: -  xdup - an auxiliary vector
 30:    Output Parameter:
 31: .  Vout - output basis vectors (defined on the subcommunicator)
 33:    Notes:
 34:    Currently implemented as a loop for each the active column, where each
 35:    column is scattered independently. The vector xdup is defined on the
 36:    contiguous parent communicator and have enough space to store one
 37:    duplicate of the original vector per each subcommunicator.
 39:    Level: developer
 41: .seealso: BVGetColumn()
 42: @*/
 43: PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
 44: {
 45:   PetscInt          i;
 46:   Vec               v,z;
 47:   const PetscScalar *array;
 49:   PetscFunctionBegin;
 54:   PetscCall(BVCreateVec(Vout,&z));
 55:   for (i=Vin->l;i<Vin->k;i++) {
 56:     PetscCall(BVGetColumn(Vin,i,&v));
 57:     PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
 58:     PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
 59:     PetscCall(BVRestoreColumn(Vin,i,&v));
 60:     PetscCall(VecGetArrayRead(xdup,&array));
 61:     PetscCall(VecPlaceArray(z,array));
 62:     PetscCall(BVInsertVec(Vout,i,z));
 63:     PetscCall(VecResetArray(z));
 64:     PetscCall(VecRestoreArrayRead(xdup,&array));
 65:   }
 66:   PetscCall(VecDestroy(&z));
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }
 70: /*@
 71:    BVSumQuadrature - Computes the sum of terms required in the quadrature
 72:    rule to approximate the contour integral.
 74:    Collective
 76:    Input Parameters:
 77: +  Y       - input basis vectors
 78: .  M       - number of moments
 79: .  L       - block size
 80: .  L_max   - maximum block size
 81: .  w       - quadrature weights
 82: .  zn      - normalized quadrature points
 83: .  scat    - (optional) VecScatter object to communicate between subcommunicators
 84: .  subcomm - subcommunicator layout
 85: .  npoints - number of points to process by the subcommunicator
 86: -  useconj - whether conjugate points can be used or not
 88:    Output Parameter:
 89: .  S       - output basis vectors
 91:    Notes:
 92:    This is a generalization of BVMult(). The resulting matrix S consists of M
 93:    panels of L columns, and the following formula is computed for each panel
 94:    S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
 95:    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
 96:    the width of the panels in Y.
 98:    When using subcommunicators, Y is stored in the subcommunicators for a subset
 99:    of integration points. In that case, the computation is done in the subcomm
100:    and then scattered to the whole communicator in S using the VecScatter scat.
101:    The value npoints is the number of points to be processed in this subcomm
102:    and the flag useconj indicates whether symmetric points can be reused.
104:    Level: developer
106: .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
107: @*/
108: PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
109: {
110:   PetscInt       i,j,k,nloc;
111:   Vec            v,sj;
112:   PetscScalar    *ppk,*pv,one=1.0;
114:   PetscFunctionBegin;
119:   PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
120:   PetscCall(PetscMalloc1(npoints,&ppk));
121:   for (i=0;i<npoints;i++) ppk[i] = 1.0;
122:   PetscCall(BVCreateVec(Y,&v));
123:   for (k=0;k<M;k++) {
124:     for (j=0;j<L;j++) {
125:       PetscCall(VecSet(v,0.0));
126:       for (i=0;i<npoints;i++) {
127:         PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
128:         PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
129:       }
130:       if (PetscUnlikely(useconj)) {
131:         PetscCall(VecGetArray(v,&pv));
132:         for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
133:         PetscCall(VecRestoreArray(v,&pv));
134:       }
135:       PetscCall(BVGetColumn(S,k*L+j,&sj));
136:       if (PetscUnlikely(scat)) {
137:         PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
138:         PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
139:       } else PetscCall(VecCopy(v,sj));
140:       PetscCall(BVRestoreColumn(S,k*L+j,&sj));
141:     }
142:     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
143:   }
144:   PetscCall(PetscFree(ppk));
145:   PetscCall(VecDestroy(&v));
146:   PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: /*@
150:    BVDotQuadrature - Computes the projection terms required in the quadrature
151:    rule to approximate the contour integral.
153:    Collective
155:    Input Parameters:
156: +  Y       - first basis vectors
157: .  V       - second basis vectors
158: .  M       - number of moments
159: .  L       - block size
160: .  L_max   - maximum block size
161: .  w       - quadrature weights
162: .  zn      - normalized quadrature points
163: .  subcomm - subcommunicator layout
164: .  npoints - number of points to process by the subcommunicator
165: -  useconj - whether conjugate points can be used or not
167:    Output Parameter:
168: .  Mu      - computed result
170:    Notes:
171:    This is a generalization of BVDot(). The resulting matrix Mu consists of M
172:    blocks of size LxL (placed horizontally), each of them computed as
173:    Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
174:    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
175:    the width of the panels in Y.
177:    When using subcommunicators, Y is stored in the subcommunicators for a subset
178:    of integration points. In that case, the computation is done in the subcomm
179:    and then the final result is combined via reduction.
180:    The value npoints is the number of points to be processed in this subcomm
181:    and the flag useconj indicates whether symmetric points can be reused.
183:    Level: developer
185: .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
186: @*/
187: PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
188: {
189:   PetscMPIInt       sub_size,count;
190:   PetscInt          i,j,k,s;
191:   PetscScalar       *temp,*temp2,*ppk,alp;
192:   Mat               H;
193:   const PetscScalar *pH;
194:   MPI_Comm          child,parent;
196:   PetscFunctionBegin;
200:   PetscCall(PetscSubcommGetChild(subcomm,&child));
201:   PetscCallMPI(MPI_Comm_size(child,&sub_size));
202:   PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
203:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
204:   PetscCall(PetscArrayzero(temp2,2*M*L*L));
205:   PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
206:   PetscCall(BVSetActiveColumns(V,0,L));
207:   PetscCall(BVDot(Y,V,H));
208:   PetscCall(MatDenseGetArrayRead(H,&pH));
209:   for (i=0;i<npoints;i++) {
210:     for (j=0;j<L;j++) {
211:       for (k=0;k<L;k++) {
212:         temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
213:       }
214:     }
215:   }
216:   PetscCall(MatDenseRestoreArrayRead(H,&pH));
217:   for (i=0;i<npoints;i++) ppk[i] = 1;
218:   for (k=0;k<2*M;k++) {
219:     for (j=0;j<L;j++) {
220:       for (i=0;i<npoints;i++) {
221:         alp = ppk[i]*w[p_id(i)];
222:         for (s=0;s<L;s++) {
223:           if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
224:           else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
225:         }
226:       }
227:     }
228:     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
229:   }
230:   for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
231:   PetscCall(PetscMPIIntCast(2*M*L*L,&count));
232:   PetscCall(PetscSubcommGetParent(subcomm,&parent));
233:   PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
234:   PetscCall(PetscFree3(temp,temp2,ppk));
235:   PetscCall(MatDestroy(&H));
236:   PetscFunctionReturn(PETSC_SUCCESS);
237: }
239: /*@
240:    BVTraceQuadrature - Computes an estimate of the number of eigenvalues
241:    inside a region via quantities computed in the quadrature rule of
242:    contour integral methods.
244:    Collective
246:    Input Parameters:
247: +  Y       - first basis vectors
248: .  V       - second basis vectors
249: .  L       - block size
250: .  L_max   - maximum block size
251: .  w       - quadrature weights
252: .  scat    - (optional) VecScatter object to communicate between subcommunicators
253: .  subcomm - subcommunicator layout
254: .  npoints - number of points to process by the subcommunicator
255: -  useconj - whether conjugate points can be used or not
257:    Output Parameter:
258: .  est_eig - estimated eigenvalue count
260:    Notes:
261:    This function returns an estimation of the number of eigenvalues in the
262:    region, computed as trace(V'*S_0), where S_0 is the first panel of S
263:    computed by BVSumQuadrature().
265:    When using subcommunicators, Y is stored in the subcommunicators for a subset
266:    of integration points. In that case, the computation is done in the subcomm
267:    and then scattered to the whole communicator in S using the VecScatter scat.
268:    The value npoints is the number of points to be processed in this subcomm
269:    and the flag useconj indicates whether symmetric points can be reused.
271:    Level: developer
273: .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
274: @*/
275: PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
276: {
277:   PetscInt       i,j;
278:   Vec            y,yall,vj;
279:   PetscScalar    dot,sum=0.0,one=1.0;
281:   PetscFunctionBegin;
286:   PetscCall(BVCreateVec(Y,&y));
287:   PetscCall(BVCreateVec(V,&yall));
288:   for (j=0;j<L;j++) {
289:     PetscCall(VecSet(y,0.0));
290:     for (i=0;i<npoints;i++) {
291:       PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
292:       PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
293:     }
294:     PetscCall(BVGetColumn(V,j,&vj));
295:     if (scat) {
296:       PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
297:       PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
298:       PetscCall(VecDot(vj,yall,&dot));
299:     } else PetscCall(VecDot(vj,y,&dot));
300:     PetscCall(BVRestoreColumn(V,j,&vj));
301:     if (useconj) sum += 2.0*PetscRealPart(dot);
302:     else sum += dot;
303:   }
304:   *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
305:   PetscCall(VecDestroy(&y));
306:   PetscCall(VecDestroy(&yall));
307:   PetscFunctionReturn(PETSC_SUCCESS);
308: }
310: static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
311: {
312:   PetscInt       i,j,k,ml=S->k;
313:   PetscMPIInt    len;
314:   PetscScalar    *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
315:   PetscBLASInt   l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
316: #if defined(PETSC_USE_COMPLEX)
317:   PetscReal      *rwork;
318: #endif
320:   PetscFunctionBegin;
321:   PetscCall(PetscBLASIntCast(S->ld,&lds));
322:   PetscCall(BVGetArray(S,&sarray));
323:   PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
324: #if defined(PETSC_USE_COMPLEX)
325:   PetscCall(PetscMalloc1(5*ml,&rwork));
326: #endif
327:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
329:   PetscCall(PetscArrayzero(B,ml*ml));
330:   for (i=0;i<ml;i++) B[i*ml+i]=1;
332:   for (k=0;k<2;k++) {
333:     PetscCall(PetscBLASIntCast(S->n,&m));
334:     PetscCall(PetscBLASIntCast(ml,&l));
335:     n = l; lda = m; ldb = m; ldc = l;
336:     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
337:     else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
338:     PetscCall(PetscArrayzero(temp2,ml*ml));
339:     PetscCall(PetscMPIIntCast(ml*ml,&len));
340:     PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
342:     PetscCall(PetscBLASIntCast(ml,&m));
343:     n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
344: #if defined(PETSC_USE_COMPLEX)
345:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
346: #else
347:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
348: #endif
349:     SlepcCheckLapackInfo("gesvd",info);
351:     PetscCall(PetscBLASIntCast(S->n,&l));
352:     PetscCall(PetscBLASIntCast(ml,&n));
353:     m = n; lda = l; ldb = m; ldc = l;
354:     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
355:     else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
357:     PetscCall(PetscBLASIntCast(ml,&l));
358:     m = l; n = l; lda = l; ldb = m; ldc = l;
359:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
360:     for (i=0;i<ml;i++) {
361:       sigma[i] = PetscSqrtReal(sigma[i]);
362:       for (j=0;j<S->n;j++) {
363:         if (k%2) Q2[j+i*S->n] /= sigma[i];
364:         else Q1[j+i*S->n] /= sigma[i];
365:       }
366:       for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
367:     }
368:   }
370:   PetscCall(PetscBLASIntCast(ml,&m));
371:   n = m; lda = m; ldu=1; ldvt=1;
372: #if defined(PETSC_USE_COMPLEX)
373:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
374: #else
375:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
376: #endif
377:   SlepcCheckLapackInfo("gesvd",info);
379:   PetscCall(PetscBLASIntCast(S->n,&l));
380:   PetscCall(PetscBLASIntCast(ml,&n));
381:   m = n; lda = l; ldb = m;
382:   if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
383:   else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
385:   PetscCall(PetscFPTrapPop());
386:   PetscCall(BVRestoreArray(S,&sarray));
388:   if (rank) {
389:     (*rank) = 0;
390:     for (i=0;i<ml;i++) {
391:       if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
392:     }
393:   }
394:   PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
395: #if defined(PETSC_USE_COMPLEX)
396:   PetscCall(PetscFree(rwork));
397: #endif
398:   PetscFunctionReturn(PETSC_SUCCESS);
399: }
401: static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
402: {
403:   PetscInt       i,n,ml=S->k;
404:   PetscBLASInt   m,lda,lwork,info;
405:   PetscScalar    *work;
406:   PetscReal      *rwork;
407:   Mat            A;
408:   Vec            v;
410:   PetscFunctionBegin;
411:   /* Compute QR factorizaton of S */
412:   PetscCall(BVGetSizes(S,NULL,&n,NULL));
413:   n = PetscMin(n,ml);
414:   PetscCall(BVSetActiveColumns(S,0,n));
415:   PetscCall(PetscArrayzero(pA,ml*n));
416:   PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
417:   PetscCall(BVOrthogonalize(S,A));
418:   if (n<ml) {
419:     /* the rest of the factorization */
420:     for (i=n;i<ml;i++) {
421:       PetscCall(BVGetColumn(S,i,&v));
422:       PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
423:       PetscCall(BVRestoreColumn(S,i,&v));
424:     }
425:   }
426:   PetscCall(PetscBLASIntCast(n,&lda));
427:   PetscCall(PetscBLASIntCast(ml,&m));
428:   PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
429:   lwork = 5*m;
430:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
431: #if !defined (PETSC_USE_COMPLEX)
432:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
433: #else
434:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
435: #endif
436:   SlepcCheckLapackInfo("gesvd",info);
437:   PetscCall(PetscFPTrapPop());
438:   *rank = 0;
439:   for (i=0;i<n;i++) {
440:     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
441:   }
442:   /* n first columns of A have the left singular vectors */
443:   PetscCall(BVMultInPlace(S,A,0,*rank));
444:   PetscCall(PetscFree2(work,rwork));
445:   PetscCall(MatDestroy(&A));
446:   PetscFunctionReturn(PETSC_SUCCESS);
447: }
449: static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
450: {
451:   PetscInt       i,j,n,ml=S->k;
452:   PetscBLASInt   m,k_,lda,lwork,info;
453:   PetscScalar    *work,*T,*U,*R,sone=1.0,zero=0.0;
454:   PetscReal      *rwork;
455:   Mat            A;
457:   PetscFunctionBegin;
458:   /* Compute QR factorizaton of S */
459:   PetscCall(BVGetSizes(S,NULL,&n,NULL));
460:   PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
461:   PetscCall(BVSetActiveColumns(S,0,ml));
462:   PetscCall(PetscArrayzero(pA,ml*ml));
463:   PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
464:   PetscCall(BVOrthogonalize(S,A));
465:   PetscCall(MatDestroy(&A));
467:   /* SVD of first (M-1)*L diagonal block */
468:   PetscCall(PetscBLASIntCast((M-1)*L,&m));
469:   PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
470:   for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
471:   lwork = 5*m;
472:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
473: #if !defined (PETSC_USE_COMPLEX)
474:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
475: #else
476:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
477: #endif
478:   SlepcCheckLapackInfo("gesvd",info);
479:   PetscCall(PetscFPTrapPop());
480:   *rank = 0;
481:   for (i=0;i<m;i++) {
482:     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
483:   }
484:   PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
485:   PetscCall(BVSetActiveColumns(S,0,m));
486:   PetscCall(BVMultInPlace(S,A,0,*rank));
487:   PetscCall(MatDestroy(&A));
488:   /* Projected linear system */
489:   /* m first columns of A have the right singular vectors */
490:   PetscCall(PetscBLASIntCast(*rank,&k_));
491:   PetscCall(PetscBLASIntCast(ml,&lda));
492:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
493:   PetscCall(PetscArrayzero(pA,ml*ml));
494:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
495:   for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
496:   PetscCall(PetscFree5(T,R,U,work,rwork));
497:   PetscFunctionReturn(PETSC_SUCCESS);
498: }
500: /*@
501:    BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
502:    values) and determine the numerical rank according to a tolerance.
504:    Collective
506:    Input Parameters:
507: +  S     - the basis vectors
508: .  m     - the moment degree
509: .  l     - the block size
510: .  delta - the tolerance used to determine the rank
511: -  meth  - the method to be used
513:    Output Parameters:
514: +  A     - workspace, on output contains relevant values in the CAA method
515: .  sigma - computed singular values
516: -  rank  - estimated rank (optional)
518:    Notes:
519:    This function computes [U,Sigma,V] = svd(S) and replaces S with U.
520:    The current implementation computes this via S'*S, and it may include
521:    some kind of iterative refinement to improve accuracy in some cases.
523:    The parameters m and l refer to the moment and block size of contour
524:    integral methods. All columns up to m*l are modified, and the active
525:    columns are set to 0..m*l.
527:    The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
529:    The A workspace should be m*l*m*l in size.
531:    Once the decomposition is computed, the numerical rank is estimated
532:    by counting the number of singular values that are larger than the
533:    tolerance delta, relative to the first singular value.
535:    Level: developer
537: .seealso: BVSetActiveColumns()
538: @*/
539: PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
540: {
541:   PetscFunctionBegin;
547:   PetscAssertPointer(A,6);
548:   PetscAssertPointer(sigma,7);
549:   PetscAssertPointer(rank,8);
551:   PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
552:   PetscCall(BVSetActiveColumns(S,0,m*l));
553:   switch (meth) {
554:     case BV_SVD_METHOD_REFINE:
555:       PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
556:       break;
557:     case BV_SVD_METHOD_QR:
558:       PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
559:       break;
560:     case BV_SVD_METHOD_QR_CAA:
561:       PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
562:       break;
563:   }
564:   PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
565:   PetscFunctionReturn(PETSC_SUCCESS);
566: }
568: /*@
569:    BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
571:    Logically Collective
573:    Input Parameters:
574: +  S      - basis of L*M columns
575: .  V      - basis of L columns (may be associated to subcommunicators)
576: .  Y      - basis of npoints*L columns
577: .  Lold   - old value of L
578: .  Lnew   - new value of L
579: .  M      - the moment size
580: -  npoints - number of integration points
582:    Level: developer
584: .seealso: BVResize()
585: @*/
586: PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
587: {
588:   PetscInt       i,j;
590:   PetscFunctionBegin;
599:   PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
600:   PetscCall(BVResize(V,Lnew,PETSC_TRUE));
601:   PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
602:   /* columns of Y are interleaved */
603:   for (i=npoints-1;i>=0;i--) {
604:     for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
605:   }
606:   PetscFunctionReturn(PETSC_SUCCESS);
607: }