LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
dgbtrf.f
Go to the documentation of this file.
1 *> \brief \b DGBTRF
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGBTRF + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrf.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrf.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrf.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * RECURSIVE SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV,
22 * INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, KL, KU, LDAB, M, N
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * DOUBLE PRECISION AB( LDAB, * )
30 * ..
31 *
32 *
33 *> \par Purpose:
34 * =============
35 *>
36 *> \verbatim
37 *>
38 *> DGBTRF computes an LU factorization of a real m-by-n band matrix A
39 *> using partial pivoting with row interchanges.
40 *>
41 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
42 *> \endverbatim
43 *
44 * Arguments:
45 * ==========
46 *
47 *> \param[in] M
48 *> \verbatim
49 *> M is INTEGER
50 *> The number of rows of the matrix A. M >= 0.
51 *> \endverbatim
52 *>
53 *> \param[in] N
54 *> \verbatim
55 *> N is INTEGER
56 *> The number of columns of the matrix A. N >= 0.
57 *> \endverbatim
58 *>
59 *> \param[in] KL
60 *> \verbatim
61 *> KL is INTEGER
62 *> The number of subdiagonals within the band of A. KL >= 0.
63 *> \endverbatim
64 *>
65 *> \param[in] KU
66 *> \verbatim
67 *> KU is INTEGER
68 *> The number of superdiagonals within the band of A. KU >= 0.
69 *> \endverbatim
70 *>
71 *> \param[in,out] AB
72 *> \verbatim
73 *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
74 *> On entry, the matrix A in band storage, in rows KL+1 to
75 *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
76 *> The j-th column of A is stored in the j-th column of the
77 *> array AB as follows:
78 *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
79 *>
80 *> On exit, details of the factorization: U is stored as an
81 *> upper triangular band matrix with KL+KU superdiagonals in
82 *> rows 1 to KL+KU+1, and the multipliers used during the
83 *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
84 *> See below for further details.
85 *> \endverbatim
86 *>
87 *> \param[in] LDAB
88 *> \verbatim
89 *> LDAB is INTEGER
90 *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
91 *> \endverbatim
92 *>
93 *> \param[out] IPIV
94 *> \verbatim
95 *> IPIV is INTEGER array, dimension (min(M,N))
96 *> The pivot indices; for 1 <= i <= min(M,N), row i of the
97 *> matrix was interchanged with row IPIV(i).
98 *> \endverbatim
99 *>
100 *> \param[out] INFO
101 *> \verbatim
102 *> INFO is INTEGER
103 *> = 0: successful exit
104 *> < 0: if INFO = -i, the i-th argument had an illegal value
105 *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
106 *> has been completed, but the factor U is exactly
107 *> singular, and division by zero will occur if it is used
108 *> to solve a system of equations.
109 *> \endverbatim
110 *
111 * Authors:
112 * ========
113 *
114 *> \author Univ. of Tennessee
115 *> \author Univ. of California Berkeley
116 *> \author Univ. of Colorado Denver
117 *> \author NAG Ltd.
118 *
119 *> \date November 2011
120 *
121 *> \ingroup doubleGBcomputational
122 *
123 *> \par Further Details:
124 * =====================
125 *>
126 *> \verbatim
127 *>
128 *> The band storage scheme is illustrated by the following example, when
129 *> M = N = 6, KL = 2, KU = 1:
130 *>
131 *> On entry: On exit:
132 *>
133 *> * * * + + + * * * u14 u25 u36
134 *> * * + + + + * * u13 u24 u35 u46
135 *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
136 *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
137 *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
138 *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
139 *>
140 *> Array elements marked * are not used by the routine; elements marked
141 *> + need not be set on entry, but are required by the routine to store
142 *> elements of U because of fill-in resulting from the row interchanges.
143 *> \endverbatim
144 *>
145 * =====================================================================
146  RECURSIVE SUBROUTINE dgbtrf( M, N, KL, KU, AB, LDAB, IPIV, INFO )
147 *
148 * -- LAPACK computational routine (version 3.4.0) --
149 * -- LAPACK is a software package provided by Univ. of Tennessee, --
150 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
151 * November 2011
152 *
153 * .. Scalar Arguments ..
154  INTEGER info, kl, ku, ldab, m, n
155 * ..
156 * .. Array Arguments ..
157  INTEGER ipiv( * )
158  DOUBLE PRECISION ab( ldab, * )
159 * ..
160 *
161 * =====================================================================
162 *
163 * .. Parameters ..
164  DOUBLE PRECISION one, zero
165  parameter( one = 1.0d+0, zero = 0.0d+0 )
166  INTEGER nbmax, ldwork
167  parameter( nbmax = 64, ldwork = nbmax+1 )
168 * ..
169 * .. Local Scalars ..
170  INTEGER i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp,
171  $ ju, k2, km, kv, nb, nw
172  DOUBLE PRECISION temp
173 * ..
174 * .. Local Arrays ..
175  DOUBLE PRECISION work13( ldwork, nbmax ),
176  $ work31( ldwork, nbmax )
177 * ..
178 * .. External Functions ..
179  INTEGER idamax, ilaenv
180  EXTERNAL idamax, ilaenv
181 * ..
182 * .. External Subroutines ..
183  EXTERNAL dcopy, dgbtf2, dgemm, dger, dlaswp, dscal,
184  $ dswap, dtrsm, xerbla
185 * ..
186 * .. Intrinsic Functions ..
187  INTRINSIC max, min
188 * ..
189 * .. Executable Statements ..
190 *
191 * KV is the number of superdiagonals in the factor U, allowing for
192 * fill-in
193 *
194  kv = ku + kl
195 *
196 * Test the input parameters.
197 *
198  info = 0
199  IF( m.LT.0 ) THEN
200  info = -1
201  ELSE IF( n.LT.0 ) THEN
202  info = -2
203  ELSE IF( kl.LT.0 ) THEN
204  info = -3
205  ELSE IF( ku.LT.0 ) THEN
206  info = -4
207  ELSE IF( ldab.LT.kl+kv+1 ) THEN
208  info = -6
209  END IF
210  IF( info.NE.0 ) THEN
211  CALL xerbla( 'DGBTRF', -info )
212  RETURN
213  END IF
214 *
215 * Quick return if possible
216 *
217  IF( m.EQ.0 .OR. n.EQ.0 )
218  $ RETURN
219 *
220 * Determine the block size for this environment
221 *
222  nb = ilaenv( 1, 'DGBTRF', ' ', m, n, kl, ku )
223 *
224 * The block size must not exceed the limit set by the size of the
225 * local arrays WORK13 and WORK31.
226 *
227  nb = min( nb, nbmax )
228 *
229  IF( nb.LE.1 .OR. nb.GT.kl ) THEN
230 *
231 * Use unblocked code
232 *
233  CALL dgbtf2( m, n, kl, ku, ab, ldab, ipiv, info )
234  ELSE
235 *
236 * Use blocked code
237 *
238 * Zero the superdiagonal elements of the work array WORK13
239 *
240  DO 20 j = 1, nb
241  DO 10 i = 1, j - 1
242  work13( i, j ) = zero
243  10 CONTINUE
244  20 CONTINUE
245 *
246 * Zero the subdiagonal elements of the work array WORK31
247 *
248  DO 40 j = 1, nb
249  DO 30 i = j + 1, nb
250  work31( i, j ) = zero
251  30 CONTINUE
252  40 CONTINUE
253 *
254 * Gaussian elimination with partial pivoting
255 *
256 * Set fill-in elements in columns KU+2 to KV to zero
257 *
258  DO 60 j = ku + 2, min( kv, n )
259  DO 50 i = kv - j + 2, kl
260  ab( i, j ) = zero
261  50 CONTINUE
262  60 CONTINUE
263 *
264 * JU is the index of the last column affected by the current
265 * stage of the factorization
266 *
267  ju = 1
268 *
269  DO 180 j = 1, min( m, n ), nb
270  jb = min( nb, min( m, n )-j+1 )
271 *
272 * The active part of the matrix is partitioned
273 *
274 * A11 A12 A13
275 * A21 A22 A23
276 * A31 A32 A33
277 *
278 * Here A11, A21 and A31 denote the current block of JB columns
279 * which is about to be factorized. The number of rows in the
280 * partitioning are JB, I2, I3 respectively, and the numbers
281 * of columns are JB, J2, J3. The superdiagonal elements of A13
282 * and the subdiagonal elements of A31 lie outside the band.
283 *
284  i2 = min( kl-jb, m-j-jb+1 )
285  i3 = min( jb, m-j-kl+1 )
286 *
287 * J2 and J3 are computed after JU has been updated.
288 *
289 * Factorize the current block of JB columns
290 *
291  DO 80 jj = j, j + jb - 1
292 *
293 * Set fill-in elements in column JJ+KV to zero
294 *
295  IF( jj+kv.LE.n ) THEN
296  DO 70 i = 1, kl
297  ab( i, jj+kv ) = zero
298  70 CONTINUE
299  END IF
300 *
301 * Find pivot and test for singularity. KM is the number of
302 * subdiagonal elements in the current column.
303 *
304  km = min( kl, m-jj )
305  jp = idamax( km+1, ab( kv+1, jj ), 1 )
306  ipiv( jj ) = jp + jj - j
307  IF( ab( kv+jp, jj ).NE.zero ) THEN
308  ju = max( ju, min( jj+ku+jp-1, n ) )
309  IF( jp.NE.1 ) THEN
310 *
311 * Apply interchange to columns J to J+JB-1
312 *
313  IF( jp+jj-1.LT.j+kl ) THEN
314 *
315  CALL dswap( jb, ab( kv+1+jj-j, j ), ldab-1,
316  $ ab( kv+jp+jj-j, j ), ldab-1 )
317  ELSE
318 *
319 * The interchange affects columns J to JJ-1 of A31
320 * which are stored in the work array WORK31
321 *
322  CALL dswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
323  $ work31( jp+jj-j-kl, 1 ), ldwork )
324  CALL dswap( j+jb-jj, ab( kv+1, jj ), ldab-1,
325  $ ab( kv+jp, jj ), ldab-1 )
326  END IF
327  END IF
328 *
329 * Compute multipliers
330 *
331  CALL dscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
332  $ 1 )
333 *
334 * Update trailing submatrix within the band and within
335 * the current block. JM is the index of the last column
336 * which needs to be updated.
337 *
338  jm = min( ju, j+jb-1 )
339  IF( jm.GT.jj )
340  $ CALL dger( km, jm-jj, -one, ab( kv+2, jj ), 1,
341  $ ab( kv, jj+1 ), ldab-1,
342  $ ab( kv+1, jj+1 ), ldab-1 )
343  ELSE
344 *
345 * If pivot is zero, set INFO to the index of the pivot
346 * unless a zero pivot has already been found.
347 *
348  IF( info.EQ.0 )
349  $ info = jj
350  END IF
351 *
352 * Copy current column of A31 into the work array WORK31
353 *
354  nw = min( jj-j+1, i3 )
355  IF( nw.GT.0 )
356  $ CALL dcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
357  $ work31( 1, jj-j+1 ), 1 )
358  80 CONTINUE
359  IF( j+jb.LE.n ) THEN
360 *
361 * Apply the row interchanges to the other blocks.
362 *
363  j2 = min( ju-j+1, kv ) - jb
364  j3 = max( 0, ju-j-kv+1 )
365 *
366 * Use DLASWP to apply the row interchanges to A12, A22, and
367 * A32.
368 *
369  CALL dlaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,
370  $ ipiv( j ), 1 )
371 *
372 * Adjust the pivot indices.
373 *
374  DO 90 i = j, j + jb - 1
375  ipiv( i ) = ipiv( i ) + j - 1
376  90 CONTINUE
377 *
378 * Apply the row interchanges to A13, A23, and A33
379 * columnwise.
380 *
381  k2 = j - 1 + jb + j2
382  DO 110 i = 1, j3
383  jj = k2 + i
384  DO 100 ii = j + i - 1, j + jb - 1
385  ip = ipiv( ii )
386  IF( ip.NE.ii ) THEN
387  temp = ab( kv+1+ii-jj, jj )
388  ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj )
389  ab( kv+1+ip-jj, jj ) = temp
390  END IF
391  100 CONTINUE
392  110 CONTINUE
393 *
394 * Update the relevant part of the trailing submatrix
395 *
396  IF( j2.GT.0 ) THEN
397 *
398 * Update A12
399 *
400  CALL dtrsm( 'Left', 'Lower', 'No transpose', 'Unit',
401  $ jb, j2, one, ab( kv+1, j ), ldab-1,
402  $ ab( kv+1-jb, j+jb ), ldab-1 )
403 *
404  IF( i2.GT.0 ) THEN
405 *
406 * Update A22
407 *
408  CALL dgemm( 'No transpose', 'No transpose', i2, j2,
409  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
410  $ ab( kv+1-jb, j+jb ), ldab-1, one,
411  $ ab( kv+1, j+jb ), ldab-1 )
412  END IF
413 *
414  IF( i3.GT.0 ) THEN
415 *
416 * Update A32
417 *
418  CALL dgemm( 'No transpose', 'No transpose', i3, j2,
419  $ jb, -one, work31, ldwork,
420  $ ab( kv+1-jb, j+jb ), ldab-1, one,
421  $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
422  END IF
423  END IF
424 *
425  IF( j3.GT.0 ) THEN
426 *
427 * Copy the lower triangle of A13 into the work array
428 * WORK13
429 *
430  DO 130 jj = 1, j3
431  DO 120 ii = jj, jb
432  work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
433  120 CONTINUE
434  130 CONTINUE
435 *
436 * Update A13 in the work array
437 *
438  CALL dtrsm( 'Left', 'Lower', 'No transpose', 'Unit',
439  $ jb, j3, one, ab( kv+1, j ), ldab-1,
440  $ work13, ldwork )
441 *
442  IF( i2.GT.0 ) THEN
443 *
444 * Update A23
445 *
446  CALL dgemm( 'No transpose', 'No transpose', i2, j3,
447  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
448  $ work13, ldwork, one, ab( 1+jb, j+kv ),
449  $ ldab-1 )
450  END IF
451 *
452  IF( i3.GT.0 ) THEN
453 *
454 * Update A33
455 *
456  CALL dgemm( 'No transpose', 'No transpose', i3, j3,
457  $ jb, -one, work31, ldwork, work13,
458  $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
459  END IF
460 *
461 * Copy the lower triangle of A13 back into place
462 *
463  DO 150 jj = 1, j3
464  DO 140 ii = jj, jb
465  ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
466  140 CONTINUE
467  150 CONTINUE
468  END IF
469  ELSE
470 *
471 * Adjust the pivot indices.
472 *
473  DO 160 i = j, j + jb - 1
474  ipiv( i ) = ipiv( i ) + j - 1
475  160 CONTINUE
476  END IF
477 *
478 * Partially undo the interchanges in the current block to
479 * restore the upper triangular form of A31 and copy the upper
480 * triangle of A31 back into place
481 *
482  DO 170 jj = j + jb - 1, j, -1
483  jp = ipiv( jj ) - jj + 1
484  IF( jp.NE.1 ) THEN
485 *
486 * Apply interchange to columns J to JJ-1
487 *
488  IF( jp+jj-1.LT.j+kl ) THEN
489 *
490 * The interchange does not affect A31
491 *
492  CALL dswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
493  $ ab( kv+jp+jj-j, j ), ldab-1 )
494  ELSE
495 *
496 * The interchange does affect A31
497 *
498  CALL dswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
499  $ work31( jp+jj-j-kl, 1 ), ldwork )
500  END IF
501  END IF
502 *
503 * Copy the current column of A31 back into place
504 *
505  nw = min( i3, jj-j+1 )
506  IF( nw.GT.0 )
507  $ CALL dcopy( nw, work31( 1, jj-j+1 ), 1,
508  $ ab( kv+kl+1-jj+j, jj ), 1 )
509  170 CONTINUE
510  180 CONTINUE
511  END IF
512 *
513  RETURN
514 *
515 * End of DGBTRF
516 *
517  END