LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
zgbtrf.f
Go to the documentation of this file.
1 *> \brief \b ZGBTRF
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZGBTRF + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgbtrf.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgbtrf.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgbtrf.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * RECURSIVE SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV,
22 * INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, KL, KU, LDAB, M, N
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * COMPLEX*16 AB( LDAB, * )
30 * ..
31 *
32 *
33 *> \par Purpose:
34 * =============
35 *>
36 *> \verbatim
37 *>
38 *> ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
39 *> using partial pivoting with row interchanges.
40 *>
41 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
42 *> \endverbatim
43 *
44 * Arguments:
45 * ==========
46 *
47 *> \param[in] M
48 *> \verbatim
49 *> M is INTEGER
50 *> The number of rows of the matrix A. M >= 0.
51 *> \endverbatim
52 *>
53 *> \param[in] N
54 *> \verbatim
55 *> N is INTEGER
56 *> The number of columns of the matrix A. N >= 0.
57 *> \endverbatim
58 *>
59 *> \param[in] KL
60 *> \verbatim
61 *> KL is INTEGER
62 *> The number of subdiagonals within the band of A. KL >= 0.
63 *> \endverbatim
64 *>
65 *> \param[in] KU
66 *> \verbatim
67 *> KU is INTEGER
68 *> The number of superdiagonals within the band of A. KU >= 0.
69 *> \endverbatim
70 *>
71 *> \param[in,out] AB
72 *> \verbatim
73 *> AB is COMPLEX*16 array, dimension (LDAB,N)
74 *> On entry, the matrix A in band storage, in rows KL+1 to
75 *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
76 *> The j-th column of A is stored in the j-th column of the
77 *> array AB as follows:
78 *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
79 *>
80 *> On exit, details of the factorization: U is stored as an
81 *> upper triangular band matrix with KL+KU superdiagonals in
82 *> rows 1 to KL+KU+1, and the multipliers used during the
83 *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
84 *> See below for further details.
85 *> \endverbatim
86 *>
87 *> \param[in] LDAB
88 *> \verbatim
89 *> LDAB is INTEGER
90 *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
91 *> \endverbatim
92 *>
93 *> \param[out] IPIV
94 *> \verbatim
95 *> IPIV is INTEGER array, dimension (min(M,N))
96 *> The pivot indices; for 1 <= i <= min(M,N), row i of the
97 *> matrix was interchanged with row IPIV(i).
98 *> \endverbatim
99 *>
100 *> \param[out] INFO
101 *> \verbatim
102 *> INFO is INTEGER
103 *> = 0: successful exit
104 *> < 0: if INFO = -i, the i-th argument had an illegal value
105 *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
106 *> has been completed, but the factor U is exactly
107 *> singular, and division by zero will occur if it is used
108 *> to solve a system of equations.
109 *> \endverbatim
110 *
111 * Authors:
112 * ========
113 *
114 *> \author Univ. of Tennessee
115 *> \author Univ. of California Berkeley
116 *> \author Univ. of Colorado Denver
117 *> \author NAG Ltd.
118 *
119 *> \date November 2011
120 *
121 *> \ingroup complex16GBcomputational
122 *
123 *> \par Further Details:
124 * =====================
125 *>
126 *> \verbatim
127 *>
128 *> The band storage scheme is illustrated by the following example, when
129 *> M = N = 6, KL = 2, KU = 1:
130 *>
131 *> On entry: On exit:
132 *>
133 *> * * * + + + * * * u14 u25 u36
134 *> * * + + + + * * u13 u24 u35 u46
135 *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
136 *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
137 *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
138 *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
139 *>
140 *> Array elements marked * are not used by the routine; elements marked
141 *> + need not be set on entry, but are required by the routine to store
142 *> elements of U because of fill-in resulting from the row interchanges.
143 *> \endverbatim
144 *>
145 * =====================================================================
146  RECURSIVE SUBROUTINE zgbtrf( M, N, KL, KU, AB, LDAB, IPIV, INFO )
147 *
148 * -- LAPACK computational routine (version 3.4.0) --
149 * -- LAPACK is a software package provided by Univ. of Tennessee, --
150 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
151 * November 2011
152 *
153 * .. Scalar Arguments ..
154  INTEGER info, kl, ku, ldab, m, n
155 * ..
156 * .. Array Arguments ..
157  INTEGER ipiv( * )
158  COMPLEX*16 ab( ldab, * )
159 * ..
160 *
161 * =====================================================================
162 *
163 * .. Parameters ..
164  COMPLEX*16 one, zero
165  parameter( one = ( 1.0d+0, 0.0d+0 ),
166  $ zero = ( 0.0d+0, 0.0d+0 ) )
167  INTEGER nbmax, ldwork
168  parameter( nbmax = 64, ldwork = nbmax+1 )
169 * ..
170 * .. Local Scalars ..
171  INTEGER i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp,
172  $ ju, k2, km, kv, nb, nw
173  COMPLEX*16 temp
174 * ..
175 * .. Local Arrays ..
176  COMPLEX*16 work13( ldwork, nbmax ),
177  $ work31( ldwork, nbmax )
178 * ..
179 * .. External Functions ..
180  INTEGER ilaenv, izamax
181  EXTERNAL ilaenv, izamax
182 * ..
183 * .. External Subroutines ..
184  EXTERNAL xerbla, zcopy, zgbtf2, zgemm, zgeru, zlaswp,
185  $ zscal, zswap, ztrsm
186 * ..
187 * .. Intrinsic Functions ..
188  INTRINSIC max, min
189 * ..
190 * .. Executable Statements ..
191 *
192 * KV is the number of superdiagonals in the factor U, allowing for
193 * fill-in
194 *
195  kv = ku + kl
196 *
197 * Test the input parameters.
198 *
199  info = 0
200  IF( m.LT.0 ) THEN
201  info = -1
202  ELSE IF( n.LT.0 ) THEN
203  info = -2
204  ELSE IF( kl.LT.0 ) THEN
205  info = -3
206  ELSE IF( ku.LT.0 ) THEN
207  info = -4
208  ELSE IF( ldab.LT.kl+kv+1 ) THEN
209  info = -6
210  END IF
211  IF( info.NE.0 ) THEN
212  CALL xerbla( 'ZGBTRF', -info )
213  RETURN
214  END IF
215 *
216 * Quick return if possible
217 *
218  IF( m.EQ.0 .OR. n.EQ.0 )
219  $ RETURN
220 *
221 * Determine the block size for this environment
222 *
223  nb = ilaenv( 1, 'ZGBTRF', ' ', m, n, kl, ku )
224 *
225 * The block size must not exceed the limit set by the size of the
226 * local arrays WORK13 and WORK31.
227 *
228  nb = min( nb, nbmax )
229 *
230  IF( nb.LE.1 .OR. nb.GT.kl ) THEN
231 *
232 * Use unblocked code
233 *
234  CALL zgbtf2( m, n, kl, ku, ab, ldab, ipiv, info )
235  ELSE
236 *
237 * Use blocked code
238 *
239 * Zero the superdiagonal elements of the work array WORK13
240 *
241  DO 20 j = 1, nb
242  DO 10 i = 1, j - 1
243  work13( i, j ) = zero
244  10 CONTINUE
245  20 CONTINUE
246 *
247 * Zero the subdiagonal elements of the work array WORK31
248 *
249  DO 40 j = 1, nb
250  DO 30 i = j + 1, nb
251  work31( i, j ) = zero
252  30 CONTINUE
253  40 CONTINUE
254 *
255 * Gaussian elimination with partial pivoting
256 *
257 * Set fill-in elements in columns KU+2 to KV to zero
258 *
259  DO 60 j = ku + 2, min( kv, n )
260  DO 50 i = kv - j + 2, kl
261  ab( i, j ) = zero
262  50 CONTINUE
263  60 CONTINUE
264 *
265 * JU is the index of the last column affected by the current
266 * stage of the factorization
267 *
268  ju = 1
269 *
270  DO 180 j = 1, min( m, n ), nb
271  jb = min( nb, min( m, n )-j+1 )
272 *
273 * The active part of the matrix is partitioned
274 *
275 * A11 A12 A13
276 * A21 A22 A23
277 * A31 A32 A33
278 *
279 * Here A11, A21 and A31 denote the current block of JB columns
280 * which is about to be factorized. The number of rows in the
281 * partitioning are JB, I2, I3 respectively, and the numbers
282 * of columns are JB, J2, J3. The superdiagonal elements of A13
283 * and the subdiagonal elements of A31 lie outside the band.
284 *
285  i2 = min( kl-jb, m-j-jb+1 )
286  i3 = min( jb, m-j-kl+1 )
287 *
288 * J2 and J3 are computed after JU has been updated.
289 *
290 * Factorize the current block of JB columns
291 *
292  DO 80 jj = j, j + jb - 1
293 *
294 * Set fill-in elements in column JJ+KV to zero
295 *
296  IF( jj+kv.LE.n ) THEN
297  DO 70 i = 1, kl
298  ab( i, jj+kv ) = zero
299  70 CONTINUE
300  END IF
301 *
302 * Find pivot and test for singularity. KM is the number of
303 * subdiagonal elements in the current column.
304 *
305  km = min( kl, m-jj )
306  jp = izamax( km+1, ab( kv+1, jj ), 1 )
307  ipiv( jj ) = jp + jj - j
308  IF( ab( kv+jp, jj ).NE.zero ) THEN
309  ju = max( ju, min( jj+ku+jp-1, n ) )
310  IF( jp.NE.1 ) THEN
311 *
312 * Apply interchange to columns J to J+JB-1
313 *
314  IF( jp+jj-1.LT.j+kl ) THEN
315 *
316  CALL zswap( jb, ab( kv+1+jj-j, j ), ldab-1,
317  $ ab( kv+jp+jj-j, j ), ldab-1 )
318  ELSE
319 *
320 * The interchange affects columns J to JJ-1 of A31
321 * which are stored in the work array WORK31
322 *
323  CALL zswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
324  $ work31( jp+jj-j-kl, 1 ), ldwork )
325  CALL zswap( j+jb-jj, ab( kv+1, jj ), ldab-1,
326  $ ab( kv+jp, jj ), ldab-1 )
327  END IF
328  END IF
329 *
330 * Compute multipliers
331 *
332  CALL zscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
333  $ 1 )
334 *
335 * Update trailing submatrix within the band and within
336 * the current block. JM is the index of the last column
337 * which needs to be updated.
338 *
339  jm = min( ju, j+jb-1 )
340  IF( jm.GT.jj )
341  $ CALL zgeru( km, jm-jj, -one, ab( kv+2, jj ), 1,
342  $ ab( kv, jj+1 ), ldab-1,
343  $ ab( kv+1, jj+1 ), ldab-1 )
344  ELSE
345 *
346 * If pivot is zero, set INFO to the index of the pivot
347 * unless a zero pivot has already been found.
348 *
349  IF( info.EQ.0 )
350  $ info = jj
351  END IF
352 *
353 * Copy current column of A31 into the work array WORK31
354 *
355  nw = min( jj-j+1, i3 )
356  IF( nw.GT.0 )
357  $ CALL zcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
358  $ work31( 1, jj-j+1 ), 1 )
359  80 CONTINUE
360  IF( j+jb.LE.n ) THEN
361 *
362 * Apply the row interchanges to the other blocks.
363 *
364  j2 = min( ju-j+1, kv ) - jb
365  j3 = max( 0, ju-j-kv+1 )
366 *
367 * Use ZLASWP to apply the row interchanges to A12, A22, and
368 * A32.
369 *
370  CALL zlaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,
371  $ ipiv( j ), 1 )
372 *
373 * Adjust the pivot indices.
374 *
375  DO 90 i = j, j + jb - 1
376  ipiv( i ) = ipiv( i ) + j - 1
377  90 CONTINUE
378 *
379 * Apply the row interchanges to A13, A23, and A33
380 * columnwise.
381 *
382  k2 = j - 1 + jb + j2
383  DO 110 i = 1, j3
384  jj = k2 + i
385  DO 100 ii = j + i - 1, j + jb - 1
386  ip = ipiv( ii )
387  IF( ip.NE.ii ) THEN
388  temp = ab( kv+1+ii-jj, jj )
389  ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj )
390  ab( kv+1+ip-jj, jj ) = temp
391  END IF
392  100 CONTINUE
393  110 CONTINUE
394 *
395 * Update the relevant part of the trailing submatrix
396 *
397  IF( j2.GT.0 ) THEN
398 *
399 * Update A12
400 *
401  CALL ztrsm( 'Left', 'Lower', 'No transpose', 'Unit',
402  $ jb, j2, one, ab( kv+1, j ), ldab-1,
403  $ ab( kv+1-jb, j+jb ), ldab-1 )
404 *
405  IF( i2.GT.0 ) THEN
406 *
407 * Update A22
408 *
409  CALL zgemm( 'No transpose', 'No transpose', i2, j2,
410  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
411  $ ab( kv+1-jb, j+jb ), ldab-1, one,
412  $ ab( kv+1, j+jb ), ldab-1 )
413  END IF
414 *
415  IF( i3.GT.0 ) THEN
416 *
417 * Update A32
418 *
419  CALL zgemm( 'No transpose', 'No transpose', i3, j2,
420  $ jb, -one, work31, ldwork,
421  $ ab( kv+1-jb, j+jb ), ldab-1, one,
422  $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
423  END IF
424  END IF
425 *
426  IF( j3.GT.0 ) THEN
427 *
428 * Copy the lower triangle of A13 into the work array
429 * WORK13
430 *
431  DO 130 jj = 1, j3
432  DO 120 ii = jj, jb
433  work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
434  120 CONTINUE
435  130 CONTINUE
436 *
437 * Update A13 in the work array
438 *
439  CALL ztrsm( 'Left', 'Lower', 'No transpose', 'Unit',
440  $ jb, j3, one, ab( kv+1, j ), ldab-1,
441  $ work13, ldwork )
442 *
443  IF( i2.GT.0 ) THEN
444 *
445 * Update A23
446 *
447  CALL zgemm( 'No transpose', 'No transpose', i2, j3,
448  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
449  $ work13, ldwork, one, ab( 1+jb, j+kv ),
450  $ ldab-1 )
451  END IF
452 *
453  IF( i3.GT.0 ) THEN
454 *
455 * Update A33
456 *
457  CALL zgemm( 'No transpose', 'No transpose', i3, j3,
458  $ jb, -one, work31, ldwork, work13,
459  $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
460  END IF
461 *
462 * Copy the lower triangle of A13 back into place
463 *
464  DO 150 jj = 1, j3
465  DO 140 ii = jj, jb
466  ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
467  140 CONTINUE
468  150 CONTINUE
469  END IF
470  ELSE
471 *
472 * Adjust the pivot indices.
473 *
474  DO 160 i = j, j + jb - 1
475  ipiv( i ) = ipiv( i ) + j - 1
476  160 CONTINUE
477  END IF
478 *
479 * Partially undo the interchanges in the current block to
480 * restore the upper triangular form of A31 and copy the upper
481 * triangle of A31 back into place
482 *
483  DO 170 jj = j + jb - 1, j, -1
484  jp = ipiv( jj ) - jj + 1
485  IF( jp.NE.1 ) THEN
486 *
487 * Apply interchange to columns J to JJ-1
488 *
489  IF( jp+jj-1.LT.j+kl ) THEN
490 *
491 * The interchange does not affect A31
492 *
493  CALL zswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
494  $ ab( kv+jp+jj-j, j ), ldab-1 )
495  ELSE
496 *
497 * The interchange does affect A31
498 *
499  CALL zswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
500  $ work31( jp+jj-j-kl, 1 ), ldwork )
501  END IF
502  END IF
503 *
504 * Copy the current column of A31 back into place
505 *
506  nw = min( i3, jj-j+1 )
507  IF( nw.GT.0 )
508  $ CALL zcopy( nw, work31( 1, jj-j+1 ), 1,
509  $ ab( kv+kl+1-jj+j, jj ), 1 )
510  170 CONTINUE
511  180 CONTINUE
512  END IF
513 *
514  RETURN
515 *
516 * End of ZGBTRF
517 *
518  END