96 #include "klu2_internal.h"
97 #include "klu2_kernel.hpp"
98 #include "klu2_memory.hpp"
100 template <
typename Entry,
typename Int>
101 size_t KLU_kernel_factor
136 KLU_common<Entry, Int> *Common
139 double maxlnz, dunits ;
141 Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
142 Int lsize, usize, anz, ok ;
144 ASSERT (Common != NULL) ;
151 anz =
Ap [n+k1] -
Ap [k1] ;
156 Lsize = MAX (Lsize, 1.0) ;
157 lsize = (Int) Lsize * anz + n ;
161 lsize = (Int) Lsize ;
166 lsize = MAX (n+1, lsize) ;
167 usize = MAX (n+1, usize) ;
169 maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
170 maxlnz = MIN (maxlnz, ((
double) INT_MAX)) ;
171 lsize = (Int) MIN (maxlnz, lsize) ;
172 usize = (Int) MIN (maxlnz, usize) ;
174 PRINTF ((
"Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
175 n, anz, k1, lsize, usize, maxlnz)) ;
182 *p_LU = (Unit *) NULL ;
186 Pinv = (Int *) W ; W += n ;
187 Stack = (Int *) W ; W += n ;
188 Flag = (Int *) W ; W += n ;
189 Lpend = (Int *) W ; W += n ;
190 Ap_pos = (Int *) W ; W += n ;
192 dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
193 DUNITS (Int, usize) + DUNITS (Entry, usize) ;
194 lusize = (size_t) dunits ;
195 ok = !INT_OVERFLOW (dunits) ;
196 LU = (Unit *) (ok ? KLU_malloc (lusize,
sizeof (Unit), Common) : NULL) ;
200 Common->status = KLU_OUT_OF_MEMORY ;
210 lusize = KLU_kernel<Entry> (n,
Ap,
Ai, Ax, Q, lusize,
211 Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
212 X, Stack, Flag, Ap_pos, Lpend,
213 k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
219 if (Common->status < KLU_OK)
221 LU = (Unit *) KLU_free (LU, lusize,
sizeof (Unit), Common) ;
225 PRINTF ((
" in klu noffdiag %d\n", Common->noffdiag)) ;
238 template <
typename Entry,
typename Int>
260 for (k = 0 ; k < n ; k++)
263 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
265 for (p = 0 ; p < len ; p++)
268 MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
275 for (k = 0 ; k < n ; k++)
278 x [1] = X [2*k + 1] ;
279 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280 for (p = 0 ; p < len ; p++)
284 MULT_SUB (X [2*i], lik, x [0]) ;
285 MULT_SUB (X [2*i + 1], lik, x [1]) ;
292 for (k = 0 ; k < n ; k++)
295 x [1] = X [3*k + 1] ;
296 x [2] = X [3*k + 2] ;
297 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
298 for (p = 0 ; p < len ; p++)
302 MULT_SUB (X [3*i], lik, x [0]) ;
303 MULT_SUB (X [3*i + 1], lik, x [1]) ;
304 MULT_SUB (X [3*i + 2], lik, x [2]) ;
311 for (k = 0 ; k < n ; k++)
314 x [1] = X [4*k + 1] ;
315 x [2] = X [4*k + 2] ;
316 x [3] = X [4*k + 3] ;
317 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
318 for (p = 0 ; p < len ; p++)
322 MULT_SUB (X [4*i], lik, x [0]) ;
323 MULT_SUB (X [4*i + 1], lik, x [1]) ;
324 MULT_SUB (X [4*i + 2], lik, x [2]) ;
325 MULT_SUB (X [4*i + 3], lik, x [3]) ;
331 throw std::domain_error(
"More than 4 right-hand sides is not supported.");
345 template <
typename Entry,
typename Int>
359 Entry x [4], uik, ukk ;
369 for (k = n-1 ; k >= 0 ; k--)
371 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
373 DIV (x [0], X [k], Udiag [k]) ;
375 for (p = 0 ; p < len ; p++)
378 MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
387 for (k = n-1 ; k >= 0 ; k--)
389 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
393 DIV (x [0], X [2*k], ukk) ;
394 DIV (x [1], X [2*k + 1], ukk) ;
397 X [2*k + 1] = x [1] ;
398 for (p = 0 ; p < len ; p++)
404 MULT_SUB (X [2*i], uik, x [0]) ;
405 MULT_SUB (X [2*i + 1], uik, x [1]) ;
413 for (k = n-1 ; k >= 0 ; k--)
415 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
418 DIV (x [0], X [3*k], ukk) ;
419 DIV (x [1], X [3*k + 1], ukk) ;
420 DIV (x [2], X [3*k + 2], ukk) ;
423 X [3*k + 1] = x [1] ;
424 X [3*k + 2] = x [2] ;
425 for (p = 0 ; p < len ; p++)
429 MULT_SUB (X [3*i], uik, x [0]) ;
430 MULT_SUB (X [3*i + 1], uik, x [1]) ;
431 MULT_SUB (X [3*i + 2], uik, x [2]) ;
439 for (k = n-1 ; k >= 0 ; k--)
441 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
444 DIV (x [0], X [4*k], ukk) ;
445 DIV (x [1], X [4*k + 1], ukk) ;
446 DIV (x [2], X [4*k + 2], ukk) ;
447 DIV (x [3], X [4*k + 3], ukk) ;
450 X [4*k + 1] = x [1] ;
451 X [4*k + 2] = x [2] ;
452 X [4*k + 3] = x [3] ;
453 for (p = 0 ; p < len ; p++)
458 MULT_SUB (X [4*i], uik, x [0]) ;
459 MULT_SUB (X [4*i + 1], uik, x [1]) ;
460 MULT_SUB (X [4*i + 2], uik, x [2]) ;
461 MULT_SUB (X [4*i + 3], uik, x [3]) ;
479 template <
typename Entry,
typename Int>
505 for (k = n-1 ; k >= 0 ; k--)
507 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
509 for (p = 0 ; p < len ; p++)
515 MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
521 MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
530 for (k = n-1 ; k >= 0 ; k--)
533 x [1] = X [2*k + 1] ;
534 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
535 for (p = 0 ; p < len ; p++)
548 MULT_SUB (x [0], lik, X [2*i]) ;
549 MULT_SUB (x [1], lik, X [2*i + 1]) ;
552 X [2*k + 1] = x [1] ;
558 for (k = n-1 ; k >= 0 ; k--)
561 x [1] = X [3*k + 1] ;
562 x [2] = X [3*k + 2] ;
563 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
564 for (p = 0 ; p < len ; p++)
577 MULT_SUB (x [0], lik, X [3*i]) ;
578 MULT_SUB (x [1], lik, X [3*i + 1]) ;
579 MULT_SUB (x [2], lik, X [3*i + 2]) ;
582 X [3*k + 1] = x [1] ;
583 X [3*k + 2] = x [2] ;
589 for (k = n-1 ; k >= 0 ; k--)
592 x [1] = X [4*k + 1] ;
593 x [2] = X [4*k + 2] ;
594 x [3] = X [4*k + 3] ;
595 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
596 for (p = 0 ; p < len ; p++)
609 MULT_SUB (x [0], lik, X [4*i]) ;
610 MULT_SUB (x [1], lik, X [4*i + 1]) ;
611 MULT_SUB (x [2], lik, X [4*i + 2]) ;
612 MULT_SUB (x [3], lik, X [4*i + 3]) ;
615 X [4*k + 1] = x [1] ;
616 X [4*k + 2] = x [2] ;
617 X [4*k + 3] = x [3] ;
633 template <
typename Entry,
typename Int>
641 const Entry Udiag [ ],
650 Entry x [4], uik, ukk ;
660 for (k = 0 ; k < n ; k++)
662 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
664 for (p = 0 ; p < len ; p++)
670 MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
676 MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
682 CONJ (ukk, Udiag [k]) ;
689 DIV (X [k], x [0], ukk) ;
695 for (k = 0 ; k < n ; k++)
697 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
699 x [1] = X [2*k + 1] ;
700 for (p = 0 ; p < len ; p++)
713 MULT_SUB (x [0], uik, X [2*i]) ;
714 MULT_SUB (x [1], uik, X [2*i + 1]) ;
719 CONJ (ukk, Udiag [k]) ;
726 DIV (X [2*k], x [0], ukk) ;
727 DIV (X [2*k + 1], x [1], ukk) ;
733 for (k = 0 ; k < n ; k++)
735 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
737 x [1] = X [3*k + 1] ;
738 x [2] = X [3*k + 2] ;
739 for (p = 0 ; p < len ; p++)
752 MULT_SUB (x [0], uik, X [3*i]) ;
753 MULT_SUB (x [1], uik, X [3*i + 1]) ;
754 MULT_SUB (x [2], uik, X [3*i + 2]) ;
759 CONJ (ukk, Udiag [k]) ;
766 DIV (X [3*k], x [0], ukk) ;
767 DIV (X [3*k + 1], x [1], ukk) ;
768 DIV (X [3*k + 2], x [2], ukk) ;
774 for (k = 0 ; k < n ; k++)
776 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
778 x [1] = X [4*k + 1] ;
779 x [2] = X [4*k + 2] ;
780 x [3] = X [4*k + 3] ;
781 for (p = 0 ; p < len ; p++)
794 MULT_SUB (x [0], uik, X [4*i]) ;
795 MULT_SUB (x [1], uik, X [4*i + 1]) ;
796 MULT_SUB (x [2], uik, X [4*i + 2]) ;
797 MULT_SUB (x [3], uik, X [4*i + 3]) ;
802 CONJ (ukk, Udiag [k]) ;
809 DIV (X [4*k], x [0], ukk) ;
810 DIV (X [4*k + 1], x [1], ukk) ;
811 DIV (X [4*k + 2], x [2], ukk) ;
812 DIV (X [4*k + 3], x [3], ukk) ;
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:76
int Ai[]
Row values.
Definition: klu2_simple.cpp:78