Actual source code: sell.c
  2: /*
  3:   Defines the basic matrix operations for the SELL matrix storage format.
  4: */
  5: #include <../src/mat/impls/sell/seq/sell.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/kernels/blocktranspose.h>
  9: static PetscBool  cited      = PETSC_FALSE;
 10: static const char citation[] = "@inproceedings{ZhangELLPACK2018,\n"
 11:                                " author = {Hong Zhang and Richard T. Mills and Karl Rupp and Barry F. Smith},\n"
 12:                                " title = {Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512}},\n"
 13:                                " booktitle = {Proceedings of the 47th International Conference on Parallel Processing},\n"
 14:                                " year = 2018\n"
 15:                                "}\n";
 17: #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
 19:   #include <immintrin.h>
 21:   #if !defined(_MM_SCALE_8)
 22:     #define _MM_SCALE_8 8
 23:   #endif
 25:   #if defined(__AVX512F__)
 26:     /* these do not work
 27:    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
 28:    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
 29:   */
 30:     #define AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y) \
 31:       /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
 32:       vec_idx  = _mm256_loadu_si256((__m256i const *)acolidx); \
 33:       vec_vals = _mm512_loadu_pd(aval); \
 34:       vec_x    = _mm512_i32gather_pd(vec_idx, x, _MM_SCALE_8); \
 35:       vec_y    = _mm512_fmadd_pd(vec_x, vec_vals, vec_y)
 36:   #elif defined(__AVX2__) && defined(__FMA__)
 37:     #define AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y) \
 38:       vec_vals = _mm256_loadu_pd(aval); \
 39:       vec_idx  = _mm_loadu_si128((__m128i const *)acolidx); /* SSE2 */ \
 40:       vec_x    = _mm256_i32gather_pd(x, vec_idx, _MM_SCALE_8); \
 41:       vec_y    = _mm256_fmadd_pd(vec_x, vec_vals, vec_y)
 42:   #endif
 43: #endif /* PETSC_HAVE_IMMINTRIN_H */
 45: /*@C
 46:  MatSeqSELLSetPreallocation - For good matrix assembly performance
 47:  the user should preallocate the matrix storage by setting the parameter `nz`
 48:  (or the array `nnz`).
 50:  Collective
 52:  Input Parameters:
 53: +  B - The `MATSEQSELL` matrix
 54: .  rlenmax - number of nonzeros per row (same for all rows), ignored if `rlen` is provided
 55: -  rlen - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`
 57:  Level: intermediate
 59:  Notes:
 60:  Specify the preallocated storage with either `rlenmax` or `rlen` (not both).
 61:  Set `rlenmax` = `PETSC_DEFAULT` and `rlen` = `NULL` for PETSc to control dynamic memory
 62:  allocation.
 64:  You can call `MatGetInfo()` to get information on how effective the preallocation was;
 65:  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
 66:  You can also run with the option `-info` and look for messages with the string
 67:  malloc in them to see if additional memory allocation was needed.
 69:  Developer Note:
 70:  Use `rlenmax` of `MAT_SKIP_ALLOCATION` to not allocate any space for the matrix
 71:  entries or columns indices.
 73:  The maximum number of nonzeos in any row should be as accurate as possible.
 74:  If it is underestimated, you will get bad performance due to reallocation
 75:  (`MatSeqXSELLReallocateSELL()`).
 77: .seealso: `Mat`, `MATSEQSELL`, `MATSELL`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MatGetInfo()`
 78:  @*/
 79: PetscErrorCode MatSeqSELLSetPreallocation(Mat B, PetscInt rlenmax, const PetscInt rlen[])
 80: {
 81:   PetscFunctionBegin;
 84:   PetscTryMethod(B, "MatSeqSELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[]), (B, rlenmax, rlen));
 85:   PetscFunctionReturn(PETSC_SUCCESS);
 86: }
 88: PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B, PetscInt maxallocrow, const PetscInt rlen[])
 89: {
 90:   Mat_SeqSELL *b;
 91:   PetscInt     i, j, totalslices;
 92:   PetscBool    skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
 94:   PetscFunctionBegin;
 95:   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
 96:   if (maxallocrow == MAT_SKIP_ALLOCATION) {
 97:     skipallocation = PETSC_TRUE;
 98:     maxallocrow    = 0;
 99:   }
101:   PetscCall(PetscLayoutSetUp(B->rmap));
102:   PetscCall(PetscLayoutSetUp(B->cmap));
104:   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
105:   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
106:   PetscCheck(maxallocrow >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "maxallocrow cannot be less than 0: value %" PetscInt_FMT, maxallocrow);
107:   if (rlen) {
108:     for (i = 0; i < B->rmap->n; i++) {
109:       PetscCheck(rlen[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "rlen cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, rlen[i]);
110:       PetscCheck(rlen[i] <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "rlen cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, rlen[i], B->cmap->n);
111:     }
112:   }
114:   B->preallocated = PETSC_TRUE;
116:   b = (Mat_SeqSELL *)B->data;
118:   totalslices    = PetscCeilInt(B->rmap->n, 8);
119:   b->totalslices = totalslices;
120:   if (!skipallocation) {
121:     if (B->rmap->n & 0x07) PetscCall(PetscInfo(B, "Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %" PetscInt_FMT ")\n", B->rmap->n));
123:     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
124:       PetscCall(PetscMalloc1(totalslices + 1, &b->sliidx));
125:     }
126:     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
127:       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
128:       else if (maxallocrow < 0) maxallocrow = 1;
129:       for (i = 0; i <= totalslices; i++) b->sliidx[i] = i * 8 * maxallocrow;
130:     } else {
131:       maxallocrow  = 0;
132:       b->sliidx[0] = 0;
133:       for (i = 1; i < totalslices; i++) {
134:         b->sliidx[i] = 0;
135:         for (j = 0; j < 8; j++) b->sliidx[i] = PetscMax(b->sliidx[i], rlen[8 * (i - 1) + j]);
136:         maxallocrow = PetscMax(b->sliidx[i], maxallocrow);
137:         PetscCall(PetscIntSumError(b->sliidx[i - 1], 8 * b->sliidx[i], &b->sliidx[i]));
138:       }
139:       /* last slice */
140:       b->sliidx[totalslices] = 0;
141:       for (j = (totalslices - 1) * 8; j < B->rmap->n; j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices], rlen[j]);
142:       maxallocrow            = PetscMax(b->sliidx[totalslices], maxallocrow);
143:       b->sliidx[totalslices] = b->sliidx[totalslices - 1] + 8 * b->sliidx[totalslices];
144:     }
146:     /* allocate space for val, colidx, rlen */
147:     /* FIXME: should B's old memory be unlogged? */
148:     PetscCall(MatSeqXSELLFreeSELL(B, &b->val, &b->colidx));
149:     /* FIXME: assuming an element of the bit array takes 8 bits */
150:     PetscCall(PetscMalloc2(b->sliidx[totalslices], &b->val, b->sliidx[totalslices], &b->colidx));
151:     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
152:     PetscCall(PetscCalloc1(8 * totalslices, &b->rlen));
154:     b->singlemalloc = PETSC_TRUE;
155:     b->free_val     = PETSC_TRUE;
156:     b->free_colidx  = PETSC_TRUE;
157:   } else {
158:     b->free_val    = PETSC_FALSE;
159:     b->free_colidx = PETSC_FALSE;
160:   }
162:   b->nz               = 0;
163:   b->maxallocrow      = maxallocrow;
164:   b->rlenmax          = maxallocrow;
165:   b->maxallocmat      = b->sliidx[totalslices];
166:   B->info.nz_unneeded = (double)b->maxallocmat;
167:   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
168:   PetscFunctionReturn(PETSC_SUCCESS);
169: }
171: PetscErrorCode MatGetRow_SeqSELL(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
172: {
173:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
174:   PetscInt     shift;
176:   PetscFunctionBegin;
177:   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
178:   if (nz) *nz = a->rlen[row];
179:   shift = a->sliidx[row >> 3] + (row & 0x07);
180:   if (!a->getrowcols) PetscCall(PetscMalloc2(a->rlenmax, &a->getrowcols, a->rlenmax, &a->getrowvals));
181:   if (idx) {
182:     PetscInt j;
183:     for (j = 0; j < a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift + 8 * j];
184:     *idx = a->getrowcols;
185:   }
186:   if (v) {
187:     PetscInt j;
188:     for (j = 0; j < a->rlen[row]; j++) a->getrowvals[j] = a->val[shift + 8 * j];
189:     *v = a->getrowvals;
190:   }
191:   PetscFunctionReturn(PETSC_SUCCESS);
192: }
194: PetscErrorCode MatRestoreRow_SeqSELL(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
195: {
196:   PetscFunctionBegin;
197:   PetscFunctionReturn(PETSC_SUCCESS);
198: }
200: PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
201: {
202:   Mat          B;
203:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
204:   PetscInt     i;
206:   PetscFunctionBegin;
207:   if (reuse == MAT_REUSE_MATRIX) {
208:     B = *newmat;
209:     PetscCall(MatZeroEntries(B));
210:   } else {
211:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
212:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
213:     PetscCall(MatSetType(B, MATSEQAIJ));
214:     PetscCall(MatSeqAIJSetPreallocation(B, 0, a->rlen));
215:   }
217:   for (i = 0; i < A->rmap->n; i++) {
218:     PetscInt     nz = 0, *cols = NULL;
219:     PetscScalar *vals = NULL;
221:     PetscCall(MatGetRow_SeqSELL(A, i, &nz, &cols, &vals));
222:     PetscCall(MatSetValues(B, 1, &i, nz, cols, vals, INSERT_VALUES));
223:     PetscCall(MatRestoreRow_SeqSELL(A, i, &nz, &cols, &vals));
224:   }
226:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
227:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
228:   B->rmap->bs = A->rmap->bs;
230:   if (reuse == MAT_INPLACE_MATRIX) {
231:     PetscCall(MatHeaderReplace(A, &B));
232:   } else {
233:     *newmat = B;
234:   }
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: #include <../src/mat/impls/aij/seq/aij.h>
240: PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
241: {
242:   Mat                B;
243:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
244:   PetscInt          *ai = a->i, m = A->rmap->N, n = A->cmap->N, i, *rowlengths, row, ncols;
245:   const PetscInt    *cols;
246:   const PetscScalar *vals;
248:   PetscFunctionBegin;
250:   if (reuse == MAT_REUSE_MATRIX) {
251:     B = *newmat;
252:   } else {
253:     if (PetscDefined(USE_DEBUG) || !a->ilen) {
254:       PetscCall(PetscMalloc1(m, &rowlengths));
255:       for (i = 0; i < m; i++) rowlengths[i] = ai[i + 1] - ai[i];
256:     }
257:     if (PetscDefined(USE_DEBUG) && a->ilen) {
258:       PetscBool eq;
259:       PetscCall(PetscMemcmp(rowlengths, a->ilen, m * sizeof(PetscInt), &eq));
260:       PetscCheck(eq, PETSC_COMM_SELF, PETSC_ERR_PLIB, "SeqAIJ ilen array incorrect");
261:       PetscCall(PetscFree(rowlengths));
262:       rowlengths = a->ilen;
263:     } else if (a->ilen) rowlengths = a->ilen;
264:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
265:     PetscCall(MatSetSizes(B, m, n, m, n));
266:     PetscCall(MatSetType(B, MATSEQSELL));
267:     PetscCall(MatSeqSELLSetPreallocation(B, 0, rowlengths));
268:     if (rowlengths != a->ilen) PetscCall(PetscFree(rowlengths));
269:   }
271:   for (row = 0; row < m; row++) {
272:     PetscCall(MatGetRow_SeqAIJ(A, row, &ncols, (PetscInt **)&cols, (PetscScalar **)&vals));
273:     PetscCall(MatSetValues_SeqSELL(B, 1, &row, ncols, cols, vals, INSERT_VALUES));
274:     PetscCall(MatRestoreRow_SeqAIJ(A, row, &ncols, (PetscInt **)&cols, (PetscScalar **)&vals));
275:   }
276:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
277:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
278:   B->rmap->bs = A->rmap->bs;
280:   if (reuse == MAT_INPLACE_MATRIX) {
281:     PetscCall(MatHeaderReplace(A, &B));
282:   } else {
283:     *newmat = B;
284:   }
285:   PetscFunctionReturn(PETSC_SUCCESS);
286: }
288: PetscErrorCode MatMult_SeqSELL(Mat A, Vec xx, Vec yy)
289: {
290:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
291:   PetscScalar       *y;
292:   const PetscScalar *x;
293:   const MatScalar   *aval        = a->val;
294:   PetscInt           totalslices = a->totalslices;
295:   const PetscInt    *acolidx     = a->colidx;
296:   PetscInt           i, j;
297: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
298:   __m512d  vec_x, vec_y, vec_vals;
299:   __m256i  vec_idx;
300:   __mmask8 mask;
301:   __m512d  vec_x2, vec_y2, vec_vals2, vec_x3, vec_y3, vec_vals3, vec_x4, vec_y4, vec_vals4;
302:   __m256i  vec_idx2, vec_idx3, vec_idx4;
303: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
304:   __m128i   vec_idx;
305:   __m256d   vec_x, vec_y, vec_y2, vec_vals;
306:   MatScalar yval;
307:   PetscInt  r, rows_left, row, nnz_in_row;
308: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
309:   __m128d   vec_x_tmp;
310:   __m256d   vec_x, vec_y, vec_y2, vec_vals;
311:   MatScalar yval;
312:   PetscInt  r, rows_left, row, nnz_in_row;
313: #else
314:   PetscScalar sum[8];
315: #endif
317: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
318:   #pragma disjoint(*x, *y, *aval)
319: #endif
321:   PetscFunctionBegin;
322:   PetscCall(VecGetArrayRead(xx, &x));
323:   PetscCall(VecGetArray(yy, &y));
324: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
325:   for (i = 0; i < totalslices; i++) { /* loop over slices */
326:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
327:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
329:     vec_y  = _mm512_setzero_pd();
330:     vec_y2 = _mm512_setzero_pd();
331:     vec_y3 = _mm512_setzero_pd();
332:     vec_y4 = _mm512_setzero_pd();
334:     j = a->sliidx[i] >> 3; /* 8 bytes are read at each time, corresponding to a slice column */
335:     switch ((a->sliidx[i + 1] - a->sliidx[i]) / 8 & 3) {
336:     case 3:
337:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
338:       acolidx += 8;
339:       aval += 8;
340:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
341:       acolidx += 8;
342:       aval += 8;
343:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
344:       acolidx += 8;
345:       aval += 8;
346:       j += 3;
347:       break;
348:     case 2:
349:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
350:       acolidx += 8;
351:       aval += 8;
352:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
353:       acolidx += 8;
354:       aval += 8;
355:       j += 2;
356:       break;
357:     case 1:
358:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
359:       acolidx += 8;
360:       aval += 8;
361:       j += 1;
362:       break;
363:     }
364:   #pragma novector
365:     for (; j < (a->sliidx[i + 1] >> 3); j += 4) {
366:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
367:       acolidx += 8;
368:       aval += 8;
369:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
370:       acolidx += 8;
371:       aval += 8;
372:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
373:       acolidx += 8;
374:       aval += 8;
375:       AVX512_Mult_Private(vec_idx4, vec_x4, vec_vals4, vec_y4);
376:       acolidx += 8;
377:       aval += 8;
378:     }
380:     vec_y = _mm512_add_pd(vec_y, vec_y2);
381:     vec_y = _mm512_add_pd(vec_y, vec_y3);
382:     vec_y = _mm512_add_pd(vec_y, vec_y4);
383:     if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
384:       mask = (__mmask8)(0xff >> (8 - (A->rmap->n & 0x07)));
385:       _mm512_mask_storeu_pd(&y[8 * i], mask, vec_y);
386:     } else {
387:       _mm512_storeu_pd(&y[8 * i], vec_y);
388:     }
389:   }
390: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
391:   for (i = 0; i < totalslices; i++) { /* loop over full slices */
392:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
393:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
395:     /* last slice may have padding rows. Don't use vectorization. */
396:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
397:       rows_left = A->rmap->n - 8 * i;
398:       for (r = 0; r < rows_left; ++r) {
399:         yval       = (MatScalar)0;
400:         row        = 8 * i + r;
401:         nnz_in_row = a->rlen[row];
402:         for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]];
403:         y[row] = yval;
404:       }
405:       break;
406:     }
408:     vec_y  = _mm256_setzero_pd();
409:     vec_y2 = _mm256_setzero_pd();
411:   /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
412:   #pragma novector
413:   #pragma unroll(2)
414:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
415:       AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
416:       aval += 4;
417:       acolidx += 4;
418:       AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y2);
419:       aval += 4;
420:       acolidx += 4;
421:     }
423:     _mm256_storeu_pd(y + i * 8, vec_y);
424:     _mm256_storeu_pd(y + i * 8 + 4, vec_y2);
425:   }
426: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
427:   for (i = 0; i < totalslices; i++) { /* loop over full slices */
428:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
429:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
431:     vec_y  = _mm256_setzero_pd();
432:     vec_y2 = _mm256_setzero_pd();
434:     /* last slice may have padding rows. Don't use vectorization. */
435:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
436:       rows_left = A->rmap->n - 8 * i;
437:       for (r = 0; r < rows_left; ++r) {
438:         yval       = (MatScalar)0;
439:         row        = 8 * i + r;
440:         nnz_in_row = a->rlen[row];
441:         for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]];
442:         y[row] = yval;
443:       }
444:       break;
445:     }
447:   /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
448:   #pragma novector
449:   #pragma unroll(2)
450:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
451:       vec_vals  = _mm256_loadu_pd(aval);
452:       vec_x_tmp = _mm_setzero_pd();
453:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
454:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
455:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
456:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
457:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
458:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
459:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y);
460:       aval += 4;
462:       vec_vals  = _mm256_loadu_pd(aval);
463:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
464:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
465:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
466:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
467:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
468:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
469:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y2);
470:       aval += 4;
471:     }
473:     _mm256_storeu_pd(y + i * 8, vec_y);
474:     _mm256_storeu_pd(y + i * 8 + 4, vec_y2);
475:   }
476: #else
477:   for (i = 0; i < totalslices; i++) { /* loop over slices */
478:     for (j = 0; j < 8; j++) sum[j] = 0.0;
479:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
480:       sum[0] += aval[j] * x[acolidx[j]];
481:       sum[1] += aval[j + 1] * x[acolidx[j + 1]];
482:       sum[2] += aval[j + 2] * x[acolidx[j + 2]];
483:       sum[3] += aval[j + 3] * x[acolidx[j + 3]];
484:       sum[4] += aval[j + 4] * x[acolidx[j + 4]];
485:       sum[5] += aval[j + 5] * x[acolidx[j + 5]];
486:       sum[6] += aval[j + 6] * x[acolidx[j + 6]];
487:       sum[7] += aval[j + 7] * x[acolidx[j + 7]];
488:     }
489:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
490:       for (j = 0; j < (A->rmap->n & 0x07); j++) y[8 * i + j] = sum[j];
491:     } else {
492:       for (j = 0; j < 8; j++) y[8 * i + j] = sum[j];
493:     }
494:   }
495: #endif
497:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); /* theoretical minimal FLOPs */
498:   PetscCall(VecRestoreArrayRead(xx, &x));
499:   PetscCall(VecRestoreArray(yy, &y));
500:   PetscFunctionReturn(PETSC_SUCCESS);
501: }
503: #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
504: PetscErrorCode MatMultAdd_SeqSELL(Mat A, Vec xx, Vec yy, Vec zz)
505: {
506:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
507:   PetscScalar       *y, *z;
508:   const PetscScalar *x;
509:   const MatScalar   *aval        = a->val;
510:   PetscInt           totalslices = a->totalslices;
511:   const PetscInt    *acolidx     = a->colidx;
512:   PetscInt           i, j;
513: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
514:   __m512d  vec_x, vec_y, vec_vals;
515:   __m256i  vec_idx;
516:   __mmask8 mask;
517:   __m512d  vec_x2, vec_y2, vec_vals2, vec_x3, vec_y3, vec_vals3, vec_x4, vec_y4, vec_vals4;
518:   __m256i  vec_idx2, vec_idx3, vec_idx4;
519: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
520:   __m128d   vec_x_tmp;
521:   __m256d   vec_x, vec_y, vec_y2, vec_vals;
522:   MatScalar yval;
523:   PetscInt  r, row, nnz_in_row;
524: #else
525:   PetscScalar sum[8];
526: #endif
528: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
529:   #pragma disjoint(*x, *y, *aval)
530: #endif
532:   PetscFunctionBegin;
533:   PetscCall(VecGetArrayRead(xx, &x));
534:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
535: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
536:   for (i = 0; i < totalslices; i++) { /* loop over slices */
537:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
538:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
540:     if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
541:       mask  = (__mmask8)(0xff >> (8 - (A->rmap->n & 0x07)));
542:       vec_y = _mm512_mask_loadu_pd(vec_y, mask, &y[8 * i]);
543:     } else {
544:       vec_y = _mm512_loadu_pd(&y[8 * i]);
545:     }
546:     vec_y2 = _mm512_setzero_pd();
547:     vec_y3 = _mm512_setzero_pd();
548:     vec_y4 = _mm512_setzero_pd();
550:     j = a->sliidx[i] >> 3; /* 8 bytes are read at each time, corresponding to a slice column */
551:     switch ((a->sliidx[i + 1] - a->sliidx[i]) / 8 & 3) {
552:     case 3:
553:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
554:       acolidx += 8;
555:       aval += 8;
556:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
557:       acolidx += 8;
558:       aval += 8;
559:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
560:       acolidx += 8;
561:       aval += 8;
562:       j += 3;
563:       break;
564:     case 2:
565:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
566:       acolidx += 8;
567:       aval += 8;
568:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
569:       acolidx += 8;
570:       aval += 8;
571:       j += 2;
572:       break;
573:     case 1:
574:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
575:       acolidx += 8;
576:       aval += 8;
577:       j += 1;
578:       break;
579:     }
580:   #pragma novector
581:     for (; j < (a->sliidx[i + 1] >> 3); j += 4) {
582:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
583:       acolidx += 8;
584:       aval += 8;
585:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
586:       acolidx += 8;
587:       aval += 8;
588:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
589:       acolidx += 8;
590:       aval += 8;
591:       AVX512_Mult_Private(vec_idx4, vec_x4, vec_vals4, vec_y4);
592:       acolidx += 8;
593:       aval += 8;
594:     }
596:     vec_y = _mm512_add_pd(vec_y, vec_y2);
597:     vec_y = _mm512_add_pd(vec_y, vec_y3);
598:     vec_y = _mm512_add_pd(vec_y, vec_y4);
599:     if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
600:       _mm512_mask_storeu_pd(&z[8 * i], mask, vec_y);
601:     } else {
602:       _mm512_storeu_pd(&z[8 * i], vec_y);
603:     }
604:   }
605: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
606:   for (i = 0; i < totalslices; i++) { /* loop over full slices */
607:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
608:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
610:     /* last slice may have padding rows. Don't use vectorization. */
611:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
612:       for (r = 0; r < (A->rmap->n & 0x07); ++r) {
613:         row        = 8 * i + r;
614:         yval       = (MatScalar)0.0;
615:         nnz_in_row = a->rlen[row];
616:         for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]];
617:         z[row] = y[row] + yval;
618:       }
619:       break;
620:     }
622:     vec_y  = _mm256_loadu_pd(y + 8 * i);
623:     vec_y2 = _mm256_loadu_pd(y + 8 * i + 4);
625:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
626:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
627:       vec_vals  = _mm256_loadu_pd(aval);
628:       vec_x_tmp = _mm_setzero_pd();
629:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
630:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
631:       vec_x     = _mm256_setzero_pd();
632:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
633:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
634:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
635:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
636:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y);
637:       aval += 4;
639:       vec_vals  = _mm256_loadu_pd(aval);
640:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
641:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
642:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
643:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
644:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
645:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
646:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y2);
647:       aval += 4;
648:     }
650:     _mm256_storeu_pd(z + i * 8, vec_y);
651:     _mm256_storeu_pd(z + i * 8 + 4, vec_y2);
652:   }
653: #else
654:   for (i = 0; i < totalslices; i++) { /* loop over slices */
655:     for (j = 0; j < 8; j++) sum[j] = 0.0;
656:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
657:       sum[0] += aval[j] * x[acolidx[j]];
658:       sum[1] += aval[j + 1] * x[acolidx[j + 1]];
659:       sum[2] += aval[j + 2] * x[acolidx[j + 2]];
660:       sum[3] += aval[j + 3] * x[acolidx[j + 3]];
661:       sum[4] += aval[j + 4] * x[acolidx[j + 4]];
662:       sum[5] += aval[j + 5] * x[acolidx[j + 5]];
663:       sum[6] += aval[j + 6] * x[acolidx[j + 6]];
664:       sum[7] += aval[j + 7] * x[acolidx[j + 7]];
665:     }
666:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
667:       for (j = 0; j < (A->rmap->n & 0x07); j++) z[8 * i + j] = y[8 * i + j] + sum[j];
668:     } else {
669:       for (j = 0; j < 8; j++) z[8 * i + j] = y[8 * i + j] + sum[j];
670:     }
671:   }
672: #endif
674:   PetscCall(PetscLogFlops(2.0 * a->nz));
675:   PetscCall(VecRestoreArrayRead(xx, &x));
676:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
677:   PetscFunctionReturn(PETSC_SUCCESS);
678: }
680: PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A, Vec xx, Vec zz, Vec yy)
681: {
682:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
683:   PetscScalar       *y;
684:   const PetscScalar *x;
685:   const MatScalar   *aval    = a->val;
686:   const PetscInt    *acolidx = a->colidx;
687:   PetscInt           i, j, r, row, nnz_in_row, totalslices = a->totalslices;
689: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
690:   #pragma disjoint(*x, *y, *aval)
691: #endif
693:   PetscFunctionBegin;
694:   if (A->symmetric == PETSC_BOOL3_TRUE) {
695:     PetscCall(MatMultAdd_SeqSELL(A, xx, zz, yy));
696:     PetscFunctionReturn(PETSC_SUCCESS);
697:   }
698:   if (zz != yy) PetscCall(VecCopy(zz, yy));
699:   PetscCall(VecGetArrayRead(xx, &x));
700:   PetscCall(VecGetArray(yy, &y));
701:   for (i = 0; i < a->totalslices; i++) { /* loop over slices */
702:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
703:       for (r = 0; r < (A->rmap->n & 0x07); ++r) {
704:         row        = 8 * i + r;
705:         nnz_in_row = a->rlen[row];
706:         for (j = 0; j < nnz_in_row; ++j) y[acolidx[8 * j + r]] += aval[8 * j + r] * x[row];
707:       }
708:       break;
709:     }
710:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
711:       y[acolidx[j]] += aval[j] * x[8 * i];
712:       y[acolidx[j + 1]] += aval[j + 1] * x[8 * i + 1];
713:       y[acolidx[j + 2]] += aval[j + 2] * x[8 * i + 2];
714:       y[acolidx[j + 3]] += aval[j + 3] * x[8 * i + 3];
715:       y[acolidx[j + 4]] += aval[j + 4] * x[8 * i + 4];
716:       y[acolidx[j + 5]] += aval[j + 5] * x[8 * i + 5];
717:       y[acolidx[j + 6]] += aval[j + 6] * x[8 * i + 6];
718:       y[acolidx[j + 7]] += aval[j + 7] * x[8 * i + 7];
719:     }
720:   }
721:   PetscCall(PetscLogFlops(2.0 * a->sliidx[a->totalslices]));
722:   PetscCall(VecRestoreArrayRead(xx, &x));
723:   PetscCall(VecRestoreArray(yy, &y));
724:   PetscFunctionReturn(PETSC_SUCCESS);
725: }
727: PetscErrorCode MatMultTranspose_SeqSELL(Mat A, Vec xx, Vec yy)
728: {
729:   PetscFunctionBegin;
730:   if (A->symmetric == PETSC_BOOL3_TRUE) {
731:     PetscCall(MatMult_SeqSELL(A, xx, yy));
732:   } else {
733:     PetscCall(VecSet(yy, 0.0));
734:     PetscCall(MatMultTransposeAdd_SeqSELL(A, xx, yy, yy));
735:   }
736:   PetscFunctionReturn(PETSC_SUCCESS);
737: }
739: /*
740:      Checks for missing diagonals
741: */
742: PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A, PetscBool *missing, PetscInt *d)
743: {
744:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
745:   PetscInt    *diag, i;
747:   PetscFunctionBegin;
748:   *missing = PETSC_FALSE;
749:   if (A->rmap->n > 0 && !(a->colidx)) {
750:     *missing = PETSC_TRUE;
751:     if (d) *d = 0;
752:     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
753:   } else {
754:     diag = a->diag;
755:     for (i = 0; i < A->rmap->n; i++) {
756:       if (diag[i] == -1) {
757:         *missing = PETSC_TRUE;
758:         if (d) *d = i;
759:         PetscCall(PetscInfo(A, "Matrix is missing diagonal number %" PetscInt_FMT "\n", i));
760:         break;
761:       }
762:     }
763:   }
764:   PetscFunctionReturn(PETSC_SUCCESS);
765: }
767: PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
768: {
769:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
770:   PetscInt     i, j, m = A->rmap->n, shift;
772:   PetscFunctionBegin;
773:   if (!a->diag) {
774:     PetscCall(PetscMalloc1(m, &a->diag));
775:     a->free_diag = PETSC_TRUE;
776:   }
777:   for (i = 0; i < m; i++) {                      /* loop over rows */
778:     shift      = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
779:     a->diag[i] = -1;
780:     for (j = 0; j < a->rlen[i]; j++) {
781:       if (a->colidx[shift + j * 8] == i) {
782:         a->diag[i] = shift + j * 8;
783:         break;
784:       }
785:     }
786:   }
787:   PetscFunctionReturn(PETSC_SUCCESS);
788: }
790: /*
791:   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
792: */
793: PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A, PetscScalar omega, PetscScalar fshift)
794: {
795:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
796:   PetscInt     i, *diag, m = A->rmap->n;
797:   MatScalar   *val = a->val;
798:   PetscScalar *idiag, *mdiag;
800:   PetscFunctionBegin;
801:   if (a->idiagvalid) PetscFunctionReturn(PETSC_SUCCESS);
802:   PetscCall(MatMarkDiagonal_SeqSELL(A));
803:   diag = a->diag;
804:   if (!a->idiag) {
805:     PetscCall(PetscMalloc3(m, &a->idiag, m, &a->mdiag, m, &a->ssor_work));
806:     val = a->val;
807:   }
808:   mdiag = a->mdiag;
809:   idiag = a->idiag;
811:   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
812:     for (i = 0; i < m; i++) {
813:       mdiag[i] = val[diag[i]];
814:       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
815:         PetscCheck(PetscRealPart(fshift), PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Zero diagonal on row %" PetscInt_FMT, i);
816:         PetscCall(PetscInfo(A, "Zero diagonal on row %" PetscInt_FMT "\n", i));
817:         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
818:         A->factorerror_zeropivot_value = 0.0;
819:         A->factorerror_zeropivot_row   = i;
820:       }
821:       idiag[i] = 1.0 / val[diag[i]];
822:     }
823:     PetscCall(PetscLogFlops(m));
824:   } else {
825:     for (i = 0; i < m; i++) {
826:       mdiag[i] = val[diag[i]];
827:       idiag[i] = omega / (fshift + val[diag[i]]);
828:     }
829:     PetscCall(PetscLogFlops(2.0 * m));
830:   }
831:   a->idiagvalid = PETSC_TRUE;
832:   PetscFunctionReturn(PETSC_SUCCESS);
833: }
835: PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
836: {
837:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
839:   PetscFunctionBegin;
840:   PetscCall(PetscArrayzero(a->val, a->sliidx[a->totalslices]));
841:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
842:   PetscFunctionReturn(PETSC_SUCCESS);
843: }
845: PetscErrorCode MatDestroy_SeqSELL(Mat A)
846: {
847:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
849:   PetscFunctionBegin;
850: #if defined(PETSC_USE_LOG)
851:   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->n, A->cmap->n, a->nz));
852: #endif
853:   PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx));
854:   PetscCall(ISDestroy(&a->row));
855:   PetscCall(ISDestroy(&a->col));
856:   PetscCall(PetscFree(a->diag));
857:   PetscCall(PetscFree(a->rlen));
858:   PetscCall(PetscFree(a->sliidx));
859:   PetscCall(PetscFree3(a->idiag, a->mdiag, a->ssor_work));
860:   PetscCall(PetscFree(a->solve_work));
861:   PetscCall(ISDestroy(&a->icol));
862:   PetscCall(PetscFree(a->saved_values));
863:   PetscCall(PetscFree2(a->getrowcols, a->getrowvals));
865:   PetscCall(PetscFree(A->data));
867:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
868:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
869:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
870:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", NULL));
871:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLGetArray_C", NULL));
872:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLRestoreArray_C", NULL));
873:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqsell_seqaij_C", NULL));
874:   PetscFunctionReturn(PETSC_SUCCESS);
875: }
877: PetscErrorCode MatSetOption_SeqSELL(Mat A, MatOption op, PetscBool flg)
878: {
879:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
881:   PetscFunctionBegin;
882:   switch (op) {
883:   case MAT_ROW_ORIENTED:
884:     a->roworiented = flg;
885:     break;
886:   case MAT_KEEP_NONZERO_PATTERN:
887:     a->keepnonzeropattern = flg;
888:     break;
889:   case MAT_NEW_NONZERO_LOCATIONS:
890:     a->nonew = (flg ? 0 : 1);
891:     break;
892:   case MAT_NEW_NONZERO_LOCATION_ERR:
893:     a->nonew = (flg ? -1 : 0);
894:     break;
895:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
896:     a->nonew = (flg ? -2 : 0);
897:     break;
898:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
899:     a->nounused = (flg ? -1 : 0);
900:     break;
901:   case MAT_FORCE_DIAGONAL_ENTRIES:
902:   case MAT_IGNORE_OFF_PROC_ENTRIES:
903:   case MAT_USE_HASH_TABLE:
904:   case MAT_SORTED_FULL:
905:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
906:     break;
907:   case MAT_SPD:
908:   case MAT_SYMMETRIC:
909:   case MAT_STRUCTURALLY_SYMMETRIC:
910:   case MAT_HERMITIAN:
911:   case MAT_SYMMETRY_ETERNAL:
912:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
913:   case MAT_SPD_ETERNAL:
914:     /* These options are handled directly by MatSetOption() */
915:     break;
916:   default:
917:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
918:   }
919:   PetscFunctionReturn(PETSC_SUCCESS);
920: }
922: PetscErrorCode MatGetDiagonal_SeqSELL(Mat A, Vec v)
923: {
924:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
925:   PetscInt     i, j, n, shift;
926:   PetscScalar *x, zero = 0.0;
928:   PetscFunctionBegin;
929:   PetscCall(VecGetLocalSize(v, &n));
930:   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
932:   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
933:     PetscInt *diag = a->diag;
934:     PetscCall(VecGetArray(v, &x));
935:     for (i = 0; i < n; i++) x[i] = 1.0 / a->val[diag[i]];
936:     PetscCall(VecRestoreArray(v, &x));
937:     PetscFunctionReturn(PETSC_SUCCESS);
938:   }
940:   PetscCall(VecSet(v, zero));
941:   PetscCall(VecGetArray(v, &x));
942:   for (i = 0; i < n; i++) {                 /* loop over rows */
943:     shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
944:     x[i]  = 0;
945:     for (j = 0; j < a->rlen[i]; j++) {
946:       if (a->colidx[shift + j * 8] == i) {
947:         x[i] = a->val[shift + j * 8];
948:         break;
949:       }
950:     }
951:   }
952:   PetscCall(VecRestoreArray(v, &x));
953:   PetscFunctionReturn(PETSC_SUCCESS);
954: }
956: PetscErrorCode MatDiagonalScale_SeqSELL(Mat A, Vec ll, Vec rr)
957: {
958:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
959:   const PetscScalar *l, *r;
960:   PetscInt           i, j, m, n, row;
962:   PetscFunctionBegin;
963:   if (ll) {
964:     /* The local size is used so that VecMPI can be passed to this routine
965:        by MatDiagonalScale_MPISELL */
966:     PetscCall(VecGetLocalSize(ll, &m));
967:     PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector wrong length");
968:     PetscCall(VecGetArrayRead(ll, &l));
969:     for (i = 0; i < a->totalslices; i++) {                  /* loop over slices */
970:       if (i == a->totalslices - 1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
971:         for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
972:           if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8 * i + row];
973:         }
974:       } else {
975:         for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) a->val[j] *= l[8 * i + row];
976:       }
977:     }
978:     PetscCall(VecRestoreArrayRead(ll, &l));
979:     PetscCall(PetscLogFlops(a->nz));
980:   }
981:   if (rr) {
982:     PetscCall(VecGetLocalSize(rr, &n));
983:     PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vector wrong length");
984:     PetscCall(VecGetArrayRead(rr, &r));
985:     for (i = 0; i < a->totalslices; i++) {                  /* loop over slices */
986:       if (i == a->totalslices - 1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
987:         for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
988:           if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
989:         }
990:       } else {
991:         for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j++) a->val[j] *= r[a->colidx[j]];
992:       }
993:     }
994:     PetscCall(VecRestoreArrayRead(rr, &r));
995:     PetscCall(PetscLogFlops(a->nz));
996:   }
997:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
998:   PetscFunctionReturn(PETSC_SUCCESS);
999: }
1001: PetscErrorCode MatGetValues_SeqSELL(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
1002: {
1003:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1004:   PetscInt    *cp, i, k, low, high, t, row, col, l;
1005:   PetscInt     shift;
1006:   MatScalar   *vp;
1008:   PetscFunctionBegin;
1009:   for (k = 0; k < m; k++) { /* loop over requested rows */
1010:     row = im[k];
1011:     if (row < 0) continue;
1012:     PetscCheck(row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->n - 1);
1013:     shift = a->sliidx[row >> 3] + (row & 0x07); /* starting index of the row */
1014:     cp    = a->colidx + shift;                  /* pointer to the row */
1015:     vp    = a->val + shift;                     /* pointer to the row */
1016:     for (l = 0; l < n; l++) {                   /* loop over requested columns */
1017:       col = in[l];
1018:       if (col < 0) continue;
1019:       PetscCheck(col < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: row %" PetscInt_FMT " max %" PetscInt_FMT, col, A->cmap->n - 1);
1020:       high = a->rlen[row];
1021:       low  = 0; /* assume unsorted */
1022:       while (high - low > 5) {
1023:         t = (low + high) / 2;
1024:         if (*(cp + t * 8) > col) high = t;
1025:         else low = t;
1026:       }
1027:       for (i = low; i < high; i++) {
1028:         if (*(cp + 8 * i) > col) break;
1029:         if (*(cp + 8 * i) == col) {
1030:           *v++ = *(vp + 8 * i);
1031:           goto finished;
1032:         }
1033:       }
1034:       *v++ = 0.0;
1035:     finished:;
1036:     }
1037:   }
1038:   PetscFunctionReturn(PETSC_SUCCESS);
1039: }
1041: PetscErrorCode MatView_SeqSELL_ASCII(Mat A, PetscViewer viewer)
1042: {
1043:   Mat_SeqSELL      *a = (Mat_SeqSELL *)A->data;
1044:   PetscInt          i, j, m = A->rmap->n, shift;
1045:   const char       *name;
1046:   PetscViewerFormat format;
1048:   PetscFunctionBegin;
1049:   PetscCall(PetscViewerGetFormat(viewer, &format));
1050:   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1051:     PetscInt nofinalvalue = 0;
1052:     /*
1053:     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1054:       nofinalvalue = 1;
1055:     }
1056:     */
1057:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1058:     PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", m, A->cmap->n));
1059:     PetscCall(PetscViewerASCIIPrintf(viewer, "%% Nonzeros = %" PetscInt_FMT " \n", a->nz));
1060: #if defined(PETSC_USE_COMPLEX)
1061:     PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = zeros(%" PetscInt_FMT ",4);\n", a->nz + nofinalvalue));
1062: #else
1063:     PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = zeros(%" PetscInt_FMT ",3);\n", a->nz + nofinalvalue));
1064: #endif
1065:     PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = [\n"));
1067:     for (i = 0; i < m; i++) {
1068:       shift = a->sliidx[i >> 3] + (i & 0x07);
1069:       for (j = 0; j < a->rlen[i]; j++) {
1070: #if defined(PETSC_USE_COMPLEX)
1071:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e %18.16e\n", i + 1, a->colidx[shift + 8 * j] + 1, (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1072: #else
1073:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n", i + 1, a->colidx[shift + 8 * j] + 1, (double)a->val[shift + 8 * j]));
1074: #endif
1075:       }
1076:     }
1077:     /*
1078:     if (nofinalvalue) {
1079: #if defined(PETSC_USE_COMPLEX)
1080:       PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e %18.16e\n",m,A->cmap->n,0.,0.));
1081: #else
1082:       PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",m,A->cmap->n,0.0));
1083: #endif
1084:     }
1085:     */
1086:     PetscCall(PetscObjectGetName((PetscObject)A, &name));
1087:     PetscCall(PetscViewerASCIIPrintf(viewer, "];\n %s = spconvert(zzz);\n", name));
1088:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1089:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1090:     PetscFunctionReturn(PETSC_SUCCESS);
1091:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1092:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1093:     for (i = 0; i < m; i++) {
1094:       PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1095:       shift = a->sliidx[i >> 3] + (i & 0x07);
1096:       for (j = 0; j < a->rlen[i]; j++) {
1097: #if defined(PETSC_USE_COMPLEX)
1098:         if (PetscImaginaryPart(a->val[shift + 8 * j]) > 0.0 && PetscRealPart(a->val[shift + 8 * j]) != 0.0) {
1099:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1100:         } else if (PetscImaginaryPart(a->val[shift + 8 * j]) < 0.0 && PetscRealPart(a->val[shift + 8 * j]) != 0.0) {
1101:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)-PetscImaginaryPart(a->val[shift + 8 * j])));
1102:         } else if (PetscRealPart(a->val[shift + 8 * j]) != 0.0) {
1103:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j])));
1104:         }
1105: #else
1106:         if (a->val[shift + 8 * j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)a->val[shift + 8 * j]));
1107: #endif
1108:       }
1109:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1110:     }
1111:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1112:   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1113:     PetscInt    cnt = 0, jcnt;
1114:     PetscScalar value;
1115: #if defined(PETSC_USE_COMPLEX)
1116:     PetscBool realonly = PETSC_TRUE;
1117:     for (i = 0; i < a->sliidx[a->totalslices]; i++) {
1118:       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1119:         realonly = PETSC_FALSE;
1120:         break;
1121:       }
1122:     }
1123: #endif
1125:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1126:     for (i = 0; i < m; i++) {
1127:       jcnt  = 0;
1128:       shift = a->sliidx[i >> 3] + (i & 0x07);
1129:       for (j = 0; j < A->cmap->n; j++) {
1130:         if (jcnt < a->rlen[i] && j == a->colidx[shift + 8 * j]) {
1131:           value = a->val[cnt++];
1132:           jcnt++;
1133:         } else {
1134:           value = 0.0;
1135:         }
1136: #if defined(PETSC_USE_COMPLEX)
1137:         if (realonly) {
1138:           PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e ", (double)PetscRealPart(value)));
1139:         } else {
1140:           PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e+%7.5e i ", (double)PetscRealPart(value), (double)PetscImaginaryPart(value)));
1141:         }
1142: #else
1143:         PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e ", (double)value));
1144: #endif
1145:       }
1146:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1147:     }
1148:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1149:   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1150:     PetscInt fshift = 1;
1151:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1152: #if defined(PETSC_USE_COMPLEX)
1153:     PetscCall(PetscViewerASCIIPrintf(viewer, "%%%%MatrixMarket matrix coordinate complex general\n"));
1154: #else
1155:     PetscCall(PetscViewerASCIIPrintf(viewer, "%%%%MatrixMarket matrix coordinate real general\n"));
1156: #endif
1157:     PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, A->cmap->n, a->nz));
1158:     for (i = 0; i < m; i++) {
1159:       shift = a->sliidx[i >> 3] + (i & 0x07);
1160:       for (j = 0; j < a->rlen[i]; j++) {
1161: #if defined(PETSC_USE_COMPLEX)
1162:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %g %g\n", i + fshift, a->colidx[shift + 8 * j] + fshift, (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1163: #else
1164:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %g\n", i + fshift, a->colidx[shift + 8 * j] + fshift, (double)a->val[shift + 8 * j]));
1165: #endif
1166:       }
1167:     }
1168:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1169:   } else if (format == PETSC_VIEWER_NATIVE) {
1170:     for (i = 0; i < a->totalslices; i++) { /* loop over slices */
1171:       PetscInt row;
1172:       PetscCall(PetscViewerASCIIPrintf(viewer, "slice %" PetscInt_FMT ": %" PetscInt_FMT " %" PetscInt_FMT "\n", i, a->sliidx[i], a->sliidx[i + 1]));
1173:       for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
1174: #if defined(PETSC_USE_COMPLEX)
1175:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1176:           PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g + %g i\n", 8 * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j])));
1177:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1178:           PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g - %g i\n", 8 * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]), -(double)PetscImaginaryPart(a->val[j])));
1179:         } else {
1180:           PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g\n", 8 * i + row, a->colidx[j], (double)PetscRealPart(a->val[j])));
1181:         }
1182: #else
1183:         PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g\n", 8 * i + row, a->colidx[j], (double)a->val[j]));
1184: #endif
1185:       }
1186:     }
1187:   } else {
1188:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1189:     if (A->factortype) {
1190:       for (i = 0; i < m; i++) {
1191:         shift = a->sliidx[i >> 3] + (i & 0x07);
1192:         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1193:         /* L part */
1194:         for (j = shift; j < a->diag[i]; j += 8) {
1195: #if defined(PETSC_USE_COMPLEX)
1196:           if (PetscImaginaryPart(a->val[shift + 8 * j]) > 0.0) {
1197:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j])));
1198:           } else if (PetscImaginaryPart(a->val[shift + 8 * j]) < 0.0) {
1199:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)(-PetscImaginaryPart(a->val[j]))));
1200:           } else {
1201:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(a->val[j])));
1202:           }
1203: #else
1204:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)a->val[j]));
1205: #endif
1206:         }
1207:         /* diagonal */
1208:         j = a->diag[i];
1209: #if defined(PETSC_USE_COMPLEX)
1210:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1211:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]), (double)PetscImaginaryPart(1.0 / a->val[j])));
1212:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1213:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]), (double)(-PetscImaginaryPart(1.0 / a->val[j]))));
1214:         } else {
1215:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j])));
1216:         }
1217: #else
1218:         PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)(1.0 / a->val[j])));
1219: #endif
1221:         /* U part */
1222:         for (j = a->diag[i] + 1; j < shift + 8 * a->rlen[i]; j += 8) {
1223: #if defined(PETSC_USE_COMPLEX)
1224:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1225:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j])));
1226:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1227:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)(-PetscImaginaryPart(a->val[j]))));
1228:           } else {
1229:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(a->val[j])));
1230:           }
1231: #else
1232:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)a->val[j]));
1233: #endif
1234:         }
1235:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1236:       }
1237:     } else {
1238:       for (i = 0; i < m; i++) {
1239:         shift = a->sliidx[i >> 3] + (i & 0x07);
1240:         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1241:         for (j = 0; j < a->rlen[i]; j++) {
1242: #if defined(PETSC_USE_COMPLEX)
1243:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1244:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1245:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1246:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)-PetscImaginaryPart(a->val[shift + 8 * j])));
1247:           } else {
1248:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j])));
1249:           }
1250: #else
1251:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)a->val[shift + 8 * j]));
1252: #endif
1253:         }
1254:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1255:       }
1256:     }
1257:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1258:   }
1259:   PetscCall(PetscViewerFlush(viewer));
1260:   PetscFunctionReturn(PETSC_SUCCESS);
1261: }
1263: #include <petscdraw.h>
1264: PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw, void *Aa)
1265: {
1266:   Mat               A = (Mat)Aa;
1267:   Mat_SeqSELL      *a = (Mat_SeqSELL *)A->data;
1268:   PetscInt          i, j, m = A->rmap->n, shift;
1269:   int               color;
1270:   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1271:   PetscViewer       viewer;
1272:   PetscViewerFormat format;
1274:   PetscFunctionBegin;
1275:   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1276:   PetscCall(PetscViewerGetFormat(viewer, &format));
1277:   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1279:   /* loop over matrix elements drawing boxes */
1281:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1282:     PetscDrawCollectiveBegin(draw);
1283:     /* Blue for negative, Cyan for zero and  Red for positive */
1284:     color = PETSC_DRAW_BLUE;
1285:     for (i = 0; i < m; i++) {
1286:       shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1287:       y_l   = m - i - 1.0;
1288:       y_r   = y_l + 1.0;
1289:       for (j = 0; j < a->rlen[i]; j++) {
1290:         x_l = a->colidx[shift + j * 8];
1291:         x_r = x_l + 1.0;
1292:         if (PetscRealPart(a->val[shift + 8 * j]) >= 0.) continue;
1293:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1294:       }
1295:     }
1296:     color = PETSC_DRAW_CYAN;
1297:     for (i = 0; i < m; i++) {
1298:       shift = a->sliidx[i >> 3] + (i & 0x07);
1299:       y_l   = m - i - 1.0;
1300:       y_r   = y_l + 1.0;
1301:       for (j = 0; j < a->rlen[i]; j++) {
1302:         x_l = a->colidx[shift + j * 8];
1303:         x_r = x_l + 1.0;
1304:         if (a->val[shift + 8 * j] != 0.) continue;
1305:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1306:       }
1307:     }
1308:     color = PETSC_DRAW_RED;
1309:     for (i = 0; i < m; i++) {
1310:       shift = a->sliidx[i >> 3] + (i & 0x07);
1311:       y_l   = m - i - 1.0;
1312:       y_r   = y_l + 1.0;
1313:       for (j = 0; j < a->rlen[i]; j++) {
1314:         x_l = a->colidx[shift + j * 8];
1315:         x_r = x_l + 1.0;
1316:         if (PetscRealPart(a->val[shift + 8 * j]) <= 0.) continue;
1317:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1318:       }
1319:     }
1320:     PetscDrawCollectiveEnd(draw);
1321:   } else {
1322:     /* use contour shading to indicate magnitude of values */
1323:     /* first determine max of all nonzero values */
1324:     PetscReal minv = 0.0, maxv = 0.0;
1325:     PetscInt  count = 0;
1326:     PetscDraw popup;
1327:     for (i = 0; i < a->sliidx[a->totalslices]; i++) {
1328:       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1329:     }
1330:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1331:     PetscCall(PetscDrawGetPopup(draw, &popup));
1332:     PetscCall(PetscDrawScalePopup(popup, minv, maxv));
1334:     PetscDrawCollectiveBegin(draw);
1335:     for (i = 0; i < m; i++) {
1336:       shift = a->sliidx[i >> 3] + (i & 0x07);
1337:       y_l   = m - i - 1.0;
1338:       y_r   = y_l + 1.0;
1339:       for (j = 0; j < a->rlen[i]; j++) {
1340:         x_l   = a->colidx[shift + j * 8];
1341:         x_r   = x_l + 1.0;
1342:         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]), minv, maxv);
1343:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1344:         count++;
1345:       }
1346:     }
1347:     PetscDrawCollectiveEnd(draw);
1348:   }
1349:   PetscFunctionReturn(PETSC_SUCCESS);
1350: }
1352: #include <petscdraw.h>
1353: PetscErrorCode MatView_SeqSELL_Draw(Mat A, PetscViewer viewer)
1354: {
1355:   PetscDraw draw;
1356:   PetscReal xr, yr, xl, yl, h, w;
1357:   PetscBool isnull;
1359:   PetscFunctionBegin;
1360:   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1361:   PetscCall(PetscDrawIsNull(draw, &isnull));
1362:   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1364:   xr = A->cmap->n;
1365:   yr = A->rmap->n;
1366:   h  = yr / 10.0;
1367:   w  = xr / 10.0;
1368:   xr += w;
1369:   yr += h;
1370:   xl = -w;
1371:   yl = -h;
1372:   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
1373:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
1374:   PetscCall(PetscDrawZoom(draw, MatView_SeqSELL_Draw_Zoom, A));
1375:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
1376:   PetscCall(PetscDrawSave(draw));
1377:   PetscFunctionReturn(PETSC_SUCCESS);
1378: }
1380: PetscErrorCode MatView_SeqSELL(Mat A, PetscViewer viewer)
1381: {
1382:   PetscBool iascii, isbinary, isdraw;
1384:   PetscFunctionBegin;
1385:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1386:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1387:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1388:   if (iascii) {
1389:     PetscCall(MatView_SeqSELL_ASCII(A, viewer));
1390:   } else if (isbinary) {
1391:     /* PetscCall(MatView_SeqSELL_Binary(A,viewer)); */
1392:   } else if (isdraw) PetscCall(MatView_SeqSELL_Draw(A, viewer));
1393:   PetscFunctionReturn(PETSC_SUCCESS);
1394: }
1396: PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A, MatAssemblyType mode)
1397: {
1398:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1399:   PetscInt     i, shift, row_in_slice, row, nrow, *cp, lastcol, j, k;
1400:   MatScalar   *vp;
1402:   PetscFunctionBegin;
1403:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
1404:   /* To do: compress out the unused elements */
1405:   PetscCall(MatMarkDiagonal_SeqSELL(A));
1406:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " allocated %" PetscInt_FMT " used (%" PetscInt_FMT " nonzeros+%" PetscInt_FMT " paddedzeros)\n", A->rmap->n, A->cmap->n, a->maxallocmat, a->sliidx[a->totalslices], a->nz, a->sliidx[a->totalslices] - a->nz));
1407:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
1408:   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", a->rlenmax));
1409:   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1410:   for (i = 0; i < a->totalslices; ++i) {
1411:     shift = a->sliidx[i];                                      /* starting index of the slice */
1412:     cp    = a->colidx + shift;                                 /* pointer to the column indices of the slice */
1413:     vp    = a->val + shift;                                    /* pointer to the nonzero values of the slice */
1414:     for (row_in_slice = 0; row_in_slice < 8; ++row_in_slice) { /* loop over rows in the slice */
1415:       row  = 8 * i + row_in_slice;
1416:       nrow = a->rlen[row]; /* number of nonzeros in row */
1417:       /*
1418:         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1419:         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1420:       */
1421:       lastcol = 0;
1422:       if (nrow > 0) {                                /* nonempty row */
1423:         lastcol = cp[8 * (nrow - 1) + row_in_slice]; /* use the index from the last nonzero at current row */
1424:       } else if (!row_in_slice) {                    /* first row of the correct slice is empty */
1425:         for (j = 1; j < 8; j++) {
1426:           if (a->rlen[8 * i + j]) {
1427:             lastcol = cp[j];
1428:             break;
1429:           }
1430:         }
1431:       } else {
1432:         if (a->sliidx[i + 1] != shift) lastcol = cp[row_in_slice - 1]; /* use the index from the previous row */
1433:       }
1435:       for (k = nrow; k < (a->sliidx[i + 1] - shift) / 8; ++k) {
1436:         cp[8 * k + row_in_slice] = lastcol;
1437:         vp[8 * k + row_in_slice] = (MatScalar)0;
1438:       }
1439:     }
1440:   }
1442:   A->info.mallocs += a->reallocs;
1443:   a->reallocs = 0;
1445:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
1446:   PetscFunctionReturn(PETSC_SUCCESS);
1447: }
1449: PetscErrorCode MatGetInfo_SeqSELL(Mat A, MatInfoType flag, MatInfo *info)
1450: {
1451:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1453:   PetscFunctionBegin;
1454:   info->block_size   = 1.0;
1455:   info->nz_allocated = a->maxallocmat;
1456:   info->nz_used      = a->sliidx[a->totalslices]; /* include padding zeros */
1457:   info->nz_unneeded  = (a->maxallocmat - a->sliidx[a->totalslices]);
1458:   info->assemblies   = A->num_ass;
1459:   info->mallocs      = A->info.mallocs;
1460:   info->memory       = 0; /* REVIEW ME */
1461:   if (A->factortype) {
1462:     info->fill_ratio_given  = A->info.fill_ratio_given;
1463:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1464:     info->factor_mallocs    = A->info.factor_mallocs;
1465:   } else {
1466:     info->fill_ratio_given  = 0;
1467:     info->fill_ratio_needed = 0;
1468:     info->factor_mallocs    = 0;
1469:   }
1470:   PetscFunctionReturn(PETSC_SUCCESS);
1471: }
1473: PetscErrorCode MatSetValues_SeqSELL(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
1474: {
1475:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1476:   PetscInt     shift, i, k, l, low, high, t, ii, row, col, nrow;
1477:   PetscInt    *cp, nonew = a->nonew, lastcol = -1;
1478:   MatScalar   *vp, value;
1480:   PetscFunctionBegin;
1481:   for (k = 0; k < m; k++) { /* loop over added rows */
1482:     row = im[k];
1483:     if (row < 0) continue;
1484:     PetscCheck(row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->n - 1);
1485:     shift = a->sliidx[row >> 3] + (row & 0x07); /* starting index of the row */
1486:     cp    = a->colidx + shift;                  /* pointer to the row */
1487:     vp    = a->val + shift;                     /* pointer to the row */
1488:     nrow  = a->rlen[row];
1489:     low   = 0;
1490:     high  = nrow;
1492:     for (l = 0; l < n; l++) { /* loop over added columns */
1493:       col = in[l];
1494:       if (col < 0) continue;
1495:       PetscCheck(col < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, col, A->cmap->n - 1);
1496:       if (a->roworiented) {
1497:         value = v[l + k * n];
1498:       } else {
1499:         value = v[k + l * m];
1500:       }
1501:       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1503:       /* search in this row for the specified column, i indicates the column to be set */
1504:       if (col <= lastcol) low = 0;
1505:       else high = nrow;
1506:       lastcol = col;
1507:       while (high - low > 5) {
1508:         t = (low + high) / 2;
1509:         if (*(cp + t * 8) > col) high = t;
1510:         else low = t;
1511:       }
1512:       for (i = low; i < high; i++) {
1513:         if (*(cp + i * 8) > col) break;
1514:         if (*(cp + i * 8) == col) {
1515:           if (is == ADD_VALUES) *(vp + i * 8) += value;
1516:           else *(vp + i * 8) = value;
1517:           low = i + 1;
1518:           goto noinsert;
1519:         }
1520:       }
1521:       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1522:       if (nonew == 1) goto noinsert;
1523:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1524:       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1525:       MatSeqXSELLReallocateSELL(A, A->rmap->n, 1, nrow, a->sliidx, row / 8, row, col, a->colidx, a->val, cp, vp, nonew, MatScalar);
1526:       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1527:       for (ii = nrow - 1; ii >= i; ii--) {
1528:         *(cp + (ii + 1) * 8) = *(cp + ii * 8);
1529:         *(vp + (ii + 1) * 8) = *(vp + ii * 8);
1530:       }
1531:       a->rlen[row]++;
1532:       *(cp + i * 8) = col;
1533:       *(vp + i * 8) = value;
1534:       a->nz++;
1535:       A->nonzerostate++;
1536:       low = i + 1;
1537:       high++;
1538:       nrow++;
1539:     noinsert:;
1540:     }
1541:     a->rlen[row] = nrow;
1542:   }
1543:   PetscFunctionReturn(PETSC_SUCCESS);
1544: }
1546: PetscErrorCode MatCopy_SeqSELL(Mat A, Mat B, MatStructure str)
1547: {
1548:   PetscFunctionBegin;
1549:   /* If the two matrices have the same copy implementation, use fast copy. */
1550:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1551:     Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1552:     Mat_SeqSELL *b = (Mat_SeqSELL *)B->data;
1554:     PetscCheck(a->sliidx[a->totalslices] == b->sliidx[b->totalslices], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzeros in two matrices are different");
1555:     PetscCall(PetscArraycpy(b->val, a->val, a->sliidx[a->totalslices]));
1556:   } else {
1557:     PetscCall(MatCopy_Basic(A, B, str));
1558:   }
1559:   PetscFunctionReturn(PETSC_SUCCESS);
1560: }
1562: PetscErrorCode MatSetUp_SeqSELL(Mat A)
1563: {
1564:   PetscFunctionBegin;
1565:   PetscCall(MatSeqSELLSetPreallocation(A, PETSC_DEFAULT, NULL));
1566:   PetscFunctionReturn(PETSC_SUCCESS);
1567: }
1569: PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A, PetscScalar *array[])
1570: {
1571:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1573:   PetscFunctionBegin;
1574:   *array = a->val;
1575:   PetscFunctionReturn(PETSC_SUCCESS);
1576: }
1578: PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A, PetscScalar *array[])
1579: {
1580:   PetscFunctionBegin;
1581:   PetscFunctionReturn(PETSC_SUCCESS);
1582: }
1584: PetscErrorCode MatRealPart_SeqSELL(Mat A)
1585: {
1586:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1587:   PetscInt     i;
1588:   MatScalar   *aval = a->val;
1590:   PetscFunctionBegin;
1591:   for (i = 0; i < a->sliidx[a->totalslices]; i++) aval[i] = PetscRealPart(aval[i]);
1592:   PetscFunctionReturn(PETSC_SUCCESS);
1593: }
1595: PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1596: {
1597:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1598:   PetscInt     i;
1599:   MatScalar   *aval = a->val;
1601:   PetscFunctionBegin;
1602:   for (i = 0; i < a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1603:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
1604:   PetscFunctionReturn(PETSC_SUCCESS);
1605: }
1607: PetscErrorCode MatScale_SeqSELL(Mat inA, PetscScalar alpha)
1608: {
1609:   Mat_SeqSELL *a      = (Mat_SeqSELL *)inA->data;
1610:   MatScalar   *aval   = a->val;
1611:   PetscScalar  oalpha = alpha;
1612:   PetscBLASInt one    = 1, size;
1614:   PetscFunctionBegin;
1615:   PetscCall(PetscBLASIntCast(a->sliidx[a->totalslices], &size));
1616:   PetscCallBLAS("BLASscal", BLASscal_(&size, &oalpha, aval, &one));
1617:   PetscCall(PetscLogFlops(a->nz));
1618:   PetscCall(MatSeqSELLInvalidateDiagonal(inA));
1619:   PetscFunctionReturn(PETSC_SUCCESS);
1620: }
1622: PetscErrorCode MatShift_SeqSELL(Mat Y, PetscScalar a)
1623: {
1624:   Mat_SeqSELL *y = (Mat_SeqSELL *)Y->data;
1626:   PetscFunctionBegin;
1627:   if (!Y->preallocated || !y->nz) PetscCall(MatSeqSELLSetPreallocation(Y, 1, NULL));
1628:   PetscCall(MatShift_Basic(Y, a));
1629:   PetscFunctionReturn(PETSC_SUCCESS);
1630: }
1632: PetscErrorCode MatSOR_SeqSELL(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1633: {
1634:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
1635:   PetscScalar       *x, sum, *t;
1636:   const MatScalar   *idiag = NULL, *mdiag;
1637:   const PetscScalar *b, *xb;
1638:   PetscInt           n, m = A->rmap->n, i, j, shift;
1639:   const PetscInt    *diag;
1641:   PetscFunctionBegin;
1642:   its = its * lits;
1644:   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1645:   if (!a->idiagvalid) PetscCall(MatInvertDiagonal_SeqSELL(A, omega, fshift));
1646:   a->fshift = fshift;
1647:   a->omega  = omega;
1649:   diag  = a->diag;
1650:   t     = a->ssor_work;
1651:   idiag = a->idiag;
1652:   mdiag = a->mdiag;
1654:   PetscCall(VecGetArray(xx, &x));
1655:   PetscCall(VecGetArrayRead(bb, &b));
1656:   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1657:   PetscCheck(flag != SOR_APPLY_UPPER, PETSC_COMM_SELF, PETSC_ERR_SUP, "SOR_APPLY_UPPER is not implemented");
1658:   PetscCheck(flag != SOR_APPLY_LOWER, PETSC_COMM_SELF, PETSC_ERR_SUP, "SOR_APPLY_LOWER is not implemented");
1659:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
1661:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1662:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1663:       for (i = 0; i < m; i++) {
1664:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1665:         sum   = b[i];
1666:         n     = (diag[i] - shift) / 8;
1667:         for (j = 0; j < n; j++) sum -= a->val[shift + j * 8] * x[a->colidx[shift + j * 8]];
1668:         t[i] = sum;
1669:         x[i] = sum * idiag[i];
1670:       }
1671:       xb = t;
1672:       PetscCall(PetscLogFlops(a->nz));
1673:     } else xb = b;
1674:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1675:       for (i = m - 1; i >= 0; i--) {
1676:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1677:         sum   = xb[i];
1678:         n     = a->rlen[i] - (diag[i] - shift) / 8 - 1;
1679:         for (j = 1; j <= n; j++) sum -= a->val[diag[i] + j * 8] * x[a->colidx[diag[i] + j * 8]];
1680:         if (xb == b) {
1681:           x[i] = sum * idiag[i];
1682:         } else {
1683:           x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */
1684:         }
1685:       }
1686:       PetscCall(PetscLogFlops(a->nz)); /* assumes 1/2 in upper */
1687:     }
1688:     its--;
1689:   }
1690:   while (its--) {
1691:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1692:       for (i = 0; i < m; i++) {
1693:         /* lower */
1694:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1695:         sum   = b[i];
1696:         n     = (diag[i] - shift) / 8;
1697:         for (j = 0; j < n; j++) sum -= a->val[shift + j * 8] * x[a->colidx[shift + j * 8]];
1698:         t[i] = sum; /* save application of the lower-triangular part */
1699:         /* upper */
1700:         n = a->rlen[i] - (diag[i] - shift) / 8 - 1;
1701:         for (j = 1; j <= n; j++) sum -= a->val[diag[i] + j * 8] * x[a->colidx[diag[i] + j * 8]];
1702:         x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */
1703:       }
1704:       xb = t;
1705:       PetscCall(PetscLogFlops(2.0 * a->nz));
1706:     } else xb = b;
1707:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1708:       for (i = m - 1; i >= 0; i--) {
1709:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1710:         sum   = xb[i];
1711:         if (xb == b) {
1712:           /* whole matrix (no checkpointing available) */
1713:           n = a->rlen[i];
1714:           for (j = 0; j < n; j++) sum -= a->val[shift + j * 8] * x[a->colidx[shift + j * 8]];
1715:           x[i] = (1. - omega) * x[i] + (sum + mdiag[i] * x[i]) * idiag[i];
1716:         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1717:           n = a->rlen[i] - (diag[i] - shift) / 8 - 1;
1718:           for (j = 1; j <= n; j++) sum -= a->val[diag[i] + j * 8] * x[a->colidx[diag[i] + j * 8]];
1719:           x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */
1720:         }
1721:       }
1722:       if (xb == b) {
1723:         PetscCall(PetscLogFlops(2.0 * a->nz));
1724:       } else {
1725:         PetscCall(PetscLogFlops(a->nz)); /* assumes 1/2 in upper */
1726:       }
1727:     }
1728:   }
1729:   PetscCall(VecRestoreArray(xx, &x));
1730:   PetscCall(VecRestoreArrayRead(bb, &b));
1731:   PetscFunctionReturn(PETSC_SUCCESS);
1732: }
1734: static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1735:                                        MatGetRow_SeqSELL,
1736:                                        MatRestoreRow_SeqSELL,
1737:                                        MatMult_SeqSELL,
1738:                                        /* 4*/ MatMultAdd_SeqSELL,
1739:                                        MatMultTranspose_SeqSELL,
1740:                                        MatMultTransposeAdd_SeqSELL,
1741:                                        NULL,
1742:                                        NULL,
1743:                                        NULL,
1744:                                        /* 10*/ NULL,
1745:                                        NULL,
1746:                                        NULL,
1747:                                        MatSOR_SeqSELL,
1748:                                        NULL,
1749:                                        /* 15*/ MatGetInfo_SeqSELL,
1750:                                        MatEqual_SeqSELL,
1751:                                        MatGetDiagonal_SeqSELL,
1752:                                        MatDiagonalScale_SeqSELL,
1753:                                        NULL,
1754:                                        /* 20*/ NULL,
1755:                                        MatAssemblyEnd_SeqSELL,
1756:                                        MatSetOption_SeqSELL,
1757:                                        MatZeroEntries_SeqSELL,
1758:                                        /* 24*/ NULL,
1759:                                        NULL,
1760:                                        NULL,
1761:                                        NULL,
1762:                                        NULL,
1763:                                        /* 29*/ MatSetUp_SeqSELL,
1764:                                        NULL,
1765:                                        NULL,
1766:                                        NULL,
1767:                                        NULL,
1768:                                        /* 34*/ MatDuplicate_SeqSELL,
1769:                                        NULL,
1770:                                        NULL,
1771:                                        NULL,
1772:                                        NULL,
1773:                                        /* 39*/ NULL,
1774:                                        NULL,
1775:                                        NULL,
1776:                                        MatGetValues_SeqSELL,
1777:                                        MatCopy_SeqSELL,
1778:                                        /* 44*/ NULL,
1779:                                        MatScale_SeqSELL,
1780:                                        MatShift_SeqSELL,
1781:                                        NULL,
1782:                                        NULL,
1783:                                        /* 49*/ NULL,
1784:                                        NULL,
1785:                                        NULL,
1786:                                        NULL,
1787:                                        NULL,
1788:                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
1789:                                        NULL,
1790:                                        NULL,
1791:                                        NULL,
1792:                                        NULL,
1793:                                        /* 59*/ NULL,
1794:                                        MatDestroy_SeqSELL,
1795:                                        MatView_SeqSELL,
1796:                                        NULL,
1797:                                        NULL,
1798:                                        /* 64*/ NULL,
1799:                                        NULL,
1800:                                        NULL,
1801:                                        NULL,
1802:                                        NULL,
1803:                                        /* 69*/ NULL,
1804:                                        NULL,
1805:                                        NULL,
1806:                                        NULL,
1807:                                        NULL,
1808:                                        /* 74*/ NULL,
1809:                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1810:                                        NULL,
1811:                                        NULL,
1812:                                        NULL,
1813:                                        /* 79*/ NULL,
1814:                                        NULL,
1815:                                        NULL,
1816:                                        NULL,
1817:                                        NULL,
1818:                                        /* 84*/ NULL,
1819:                                        NULL,
1820:                                        NULL,
1821:                                        NULL,
1822:                                        NULL,
1823:                                        /* 89*/ NULL,
1824:                                        NULL,
1825:                                        NULL,
1826:                                        NULL,
1827:                                        NULL,
1828:                                        /* 94*/ NULL,
1829:                                        NULL,
1830:                                        NULL,
1831:                                        NULL,
1832:                                        NULL,
1833:                                        /* 99*/ NULL,
1834:                                        NULL,
1835:                                        NULL,
1836:                                        MatConjugate_SeqSELL,
1837:                                        NULL,
1838:                                        /*104*/ NULL,
1839:                                        NULL,
1840:                                        NULL,
1841:                                        NULL,
1842:                                        NULL,
1843:                                        /*109*/ NULL,
1844:                                        NULL,
1845:                                        NULL,
1846:                                        NULL,
1847:                                        MatMissingDiagonal_SeqSELL,
1848:                                        /*114*/ NULL,
1849:                                        NULL,
1850:                                        NULL,
1851:                                        NULL,
1852:                                        NULL,
1853:                                        /*119*/ NULL,
1854:                                        NULL,
1855:                                        NULL,
1856:                                        NULL,
1857:                                        NULL,
1858:                                        /*124*/ NULL,
1859:                                        NULL,
1860:                                        NULL,
1861:                                        NULL,
1862:                                        NULL,
1863:                                        /*129*/ NULL,
1864:                                        NULL,
1865:                                        NULL,
1866:                                        NULL,
1867:                                        NULL,
1868:                                        /*134*/ NULL,
1869:                                        NULL,
1870:                                        NULL,
1871:                                        NULL,
1872:                                        NULL,
1873:                                        /*139*/ NULL,
1874:                                        NULL,
1875:                                        NULL,
1876:                                        MatFDColoringSetUp_SeqXAIJ,
1877:                                        NULL,
1878:                                        /*144*/ NULL,
1879:                                        NULL,
1880:                                        NULL,
1881:                                        NULL,
1882:                                        NULL,
1883:                                        NULL,
1884:                                        /*150*/ NULL,
1885:                                        NULL};
1887: PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1888: {
1889:   Mat_SeqSELL *a = (Mat_SeqSELL *)mat->data;
1891:   PetscFunctionBegin;
1892:   PetscCheck(a->nonew, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1894:   /* allocate space for values if not already there */
1895:   if (!a->saved_values) PetscCall(PetscMalloc1(a->sliidx[a->totalslices] + 1, &a->saved_values));
1897:   /* copy values over */
1898:   PetscCall(PetscArraycpy(a->saved_values, a->val, a->sliidx[a->totalslices]));
1899:   PetscFunctionReturn(PETSC_SUCCESS);
1900: }
1902: PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1903: {
1904:   Mat_SeqSELL *a = (Mat_SeqSELL *)mat->data;
1906:   PetscFunctionBegin;
1907:   PetscCheck(a->nonew, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1908:   PetscCheck(a->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
1909:   PetscCall(PetscArraycpy(a->val, a->saved_values, a->sliidx[a->totalslices]));
1910:   PetscFunctionReturn(PETSC_SUCCESS);
1911: }
1913: /*@C
1914:  MatSeqSELLRestoreArray - returns access to the array where the data for a `MATSEQSELL` matrix is stored obtained by `MatSeqSELLGetArray()`
1916:  Not Collective
1918:  Input Parameters:
1919: +  mat - a `MATSEQSELL` matrix
1920: -  array - pointer to the data
1922:  Level: intermediate
1924: .seealso: `Mat`, `MATSEQSELL`, `MatSeqSELLGetArray()`, `MatSeqSELLRestoreArrayF90()`
1925:  @*/
1926: PetscErrorCode MatSeqSELLRestoreArray(Mat A, PetscScalar **array)
1927: {
1928:   PetscFunctionBegin;
1929:   PetscUseMethod(A, "MatSeqSELLRestoreArray_C", (Mat, PetscScalar **), (A, array));
1930:   PetscFunctionReturn(PETSC_SUCCESS);
1931: }
1933: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1934: {
1935:   Mat_SeqSELL *b;
1936:   PetscMPIInt  size;
1938:   PetscFunctionBegin;
1939:   PetscCall(PetscCitationsRegister(citation, &cited));
1940:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1941:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Comm must be of size 1");
1943:   PetscCall(PetscNew(&b));
1945:   B->data = (void *)b;
1947:   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
1949:   b->row                = NULL;
1950:   b->col                = NULL;
1951:   b->icol               = NULL;
1952:   b->reallocs           = 0;
1953:   b->ignorezeroentries  = PETSC_FALSE;
1954:   b->roworiented        = PETSC_TRUE;
1955:   b->nonew              = 0;
1956:   b->diag               = NULL;
1957:   b->solve_work         = NULL;
1958:   B->spptr              = NULL;
1959:   b->saved_values       = NULL;
1960:   b->idiag              = NULL;
1961:   b->mdiag              = NULL;
1962:   b->ssor_work          = NULL;
1963:   b->omega              = 1.0;
1964:   b->fshift             = 0.0;
1965:   b->idiagvalid         = PETSC_FALSE;
1966:   b->keepnonzeropattern = PETSC_FALSE;
1968:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQSELL));
1969:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLGetArray_C", MatSeqSELLGetArray_SeqSELL));
1970:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLRestoreArray_C", MatSeqSELLRestoreArray_SeqSELL));
1971:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqSELL));
1972:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqSELL));
1973:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLSetPreallocation_C", MatSeqSELLSetPreallocation_SeqSELL));
1974:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqsell_seqaij_C", MatConvert_SeqSELL_SeqAIJ));
1975:   PetscFunctionReturn(PETSC_SUCCESS);
1976: }
1978: /*
1979:  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1980:  */
1981: PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
1982: {
1983:   Mat_SeqSELL *c = (Mat_SeqSELL *)C->data, *a = (Mat_SeqSELL *)A->data;
1984:   PetscInt     i, m                           = A->rmap->n;
1985:   PetscInt     totalslices = a->totalslices;
1987:   PetscFunctionBegin;
1988:   C->factortype = A->factortype;
1989:   c->row        = NULL;
1990:   c->col        = NULL;
1991:   c->icol       = NULL;
1992:   c->reallocs   = 0;
1993:   C->assembled  = PETSC_TRUE;
1995:   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
1996:   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
1998:   PetscCall(PetscMalloc1(8 * totalslices, &c->rlen));
1999:   PetscCall(PetscMalloc1(totalslices + 1, &c->sliidx));
2001:   for (i = 0; i < m; i++) c->rlen[i] = a->rlen[i];
2002:   for (i = 0; i < totalslices + 1; i++) c->sliidx[i] = a->sliidx[i];
2004:   /* allocate the matrix space */
2005:   if (mallocmatspace) {
2006:     PetscCall(PetscMalloc2(a->maxallocmat, &c->val, a->maxallocmat, &c->colidx));
2008:     c->singlemalloc = PETSC_TRUE;
2010:     if (m > 0) {
2011:       PetscCall(PetscArraycpy(c->colidx, a->colidx, a->maxallocmat));
2012:       if (cpvalues == MAT_COPY_VALUES) {
2013:         PetscCall(PetscArraycpy(c->val, a->val, a->maxallocmat));
2014:       } else {
2015:         PetscCall(PetscArrayzero(c->val, a->maxallocmat));
2016:       }
2017:     }
2018:   }
2020:   c->ignorezeroentries = a->ignorezeroentries;
2021:   c->roworiented       = a->roworiented;
2022:   c->nonew             = a->nonew;
2023:   if (a->diag) {
2024:     PetscCall(PetscMalloc1(m, &c->diag));
2025:     for (i = 0; i < m; i++) c->diag[i] = a->diag[i];
2026:   } else c->diag = NULL;
2028:   c->solve_work         = NULL;
2029:   c->saved_values       = NULL;
2030:   c->idiag              = NULL;
2031:   c->ssor_work          = NULL;
2032:   c->keepnonzeropattern = a->keepnonzeropattern;
2033:   c->free_val           = PETSC_TRUE;
2034:   c->free_colidx        = PETSC_TRUE;
2036:   c->maxallocmat  = a->maxallocmat;
2037:   c->maxallocrow  = a->maxallocrow;
2038:   c->rlenmax      = a->rlenmax;
2039:   c->nz           = a->nz;
2040:   C->preallocated = PETSC_TRUE;
2042:   c->nonzerorowcnt = a->nonzerorowcnt;
2043:   C->nonzerostate  = A->nonzerostate;
2045:   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
2046:   PetscFunctionReturn(PETSC_SUCCESS);
2047: }
2049: PetscErrorCode MatDuplicate_SeqSELL(Mat A, MatDuplicateOption cpvalues, Mat *B)
2050: {
2051:   PetscFunctionBegin;
2052:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
2053:   PetscCall(MatSetSizes(*B, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
2054:   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) PetscCall(MatSetBlockSizesFromMats(*B, A, A));
2055:   PetscCall(MatSetType(*B, ((PetscObject)A)->type_name));
2056:   PetscCall(MatDuplicateNoCreate_SeqSELL(*B, A, cpvalues, PETSC_TRUE));
2057:   PetscFunctionReturn(PETSC_SUCCESS);
2058: }
2060: /*MC
2061:    MATSEQSELL - MATSEQSELL = "seqsell" - A matrix type to be used for sequential sparse matrices,
2062:    based on the sliced Ellpack format
2064:    Options Database Key:
2065: . -mat_type seqsell - sets the matrix type to "`MATSEQELL` during a call to `MatSetFromOptions()`
2067:    Level: beginner
2069: .seealso: `Mat`, `MatCreateSeqSell()`, `MATSELL`, `MATMPISELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
2070: M*/
2072: /*MC
2073:    MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.
2075:    This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
2076:    and `MATMPISELL` otherwise.  As a result, for single process communicators,
2077:   `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
2078:   for communicators controlling multiple processes.  It is recommended that you call both of
2079:   the above preallocation routines for simplicity.
2081:    Options Database Key:
2082: . -mat_type sell - sets the matrix type to "sell" during a call to MatSetFromOptions()
2084:   Level: beginner
2086:   Notes:
2087:    This format is only supported for real scalars, double precision, and 32-bit indices (the defaults).
2089:    It can provide better performance on Intel and AMD processes with AVX2 or AVX512 support for matrices that have a similar number of
2090:    non-zeros in contiguous groups of rows. However if the computation is memory bandwidth limited it may not provide much improvement.
2092:   Developer Notes:
2093:    On Intel (and AMD) systems some of the matrix operations use SIMD (AVX) instructions to achieve higher performance.
2095:    The sparse matrix format is as follows. For simplicity we assume a slice size of 2, it is actually 8
2096: .vb
2097:                             (2 0  3 4)
2098:    Consider the matrix A =  (5 0  6 0)
2099:                             (0 0  7 8)
2100:                             (0 0  9 9)
2102:    symbolically the Ellpack format can be written as
2104:         (2 3 4 |)           (0 2 3 |)
2105:    v =  (5 6 0 |)  colidx = (0 2 2 |)
2106:         --------            ---------
2107:         (7 8 |)             (2 3 |)
2108:         (9 9 |)             (2 3 |)
2110:     The data for 2 contiguous rows of the matrix are stored together (in column-major format) (with any left-over rows handled as a special case).
2111:     Any of the rows in a slice fewer columns than the rest of the slice (row 1 above) are padded with a previous valid column in their "extra" colidx[] locations and
2112:     zeros in their "extra" v locations so that the matrix operations do not need special code to handle different length rows within the 2 rows in a slice.
2114:     The one-dimensional representation of v used in the code is (2 5 3 6 4 0 7 9 8 9)  and for colidx is (0 0 2 2 3 2 2 2 3 3)
2116: .ve
2118:       See MatMult_SeqSELL() for how this format is used with the SIMD operations to achieve high performance.
2120:  References:
2121: . * - Hong Zhang, Richard T. Mills, Karl Rupp, and Barry F. Smith, Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512},
2122:    Proceedings of the 47th International Conference on Parallel Processing, 2018.
2124: .seealso: `Mat`, `MatCreateSeqSELL()`, `MatCreateSeqAIJ()`, `MatCreateSell()`, `MATSEQSELL`, `MATMPISELL`, `MATSEQAIJ`, `MATMPIAIJ`, `MATAIJ`
2125: M*/
2127: /*@C
2128:        MatCreateSeqSELL - Creates a sparse matrix in `MATSEQSELL` format.
2130:  Collective
2132:  Input Parameters:
2133: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
2134: .  m - number of rows
2135: .  n - number of columns
2136: .  rlenmax - maximum number of nonzeros in a row, ignored if `rlen` is provided
2137: -  rlen - array containing the number of nonzeros in the various rows (possibly different for each row) or NULL
2139:  Output Parameter:
2140: .  A - the matrix
2142:  Level: intermediate
2144:  Notes:
2145:  It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2146:  MatXXXXSetPreallocation() paradigm instead of this routine directly.
2147:  [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]
2149:  Specify the preallocated storage with either `rlenmax` or `rlen` (not both).
2150:  Set `rlenmax` = `PETSC_DEFAULT` and `rlen` = `NULL` for PETSc to control dynamic memory
2151:  allocation.
2153: .seealso: `Mat`, `MATSEQSELL`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MatSeqSELLSetPreallocation()`, `MATSELL`, `MATSEQSELL`, `MATMPISELL`
2154:  @*/
2155: PetscErrorCode MatCreateSeqSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt rlenmax, const PetscInt rlen[], Mat *A)
2156: {
2157:   PetscFunctionBegin;
2158:   PetscCall(MatCreate(comm, A));
2159:   PetscCall(MatSetSizes(*A, m, n, m, n));
2160:   PetscCall(MatSetType(*A, MATSEQSELL));
2161:   PetscCall(MatSeqSELLSetPreallocation_SeqSELL(*A, rlenmax, rlen));
2162:   PetscFunctionReturn(PETSC_SUCCESS);
2163: }
2165: PetscErrorCode MatEqual_SeqSELL(Mat A, Mat B, PetscBool *flg)
2166: {
2167:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data, *b = (Mat_SeqSELL *)B->data;
2168:   PetscInt     totalslices = a->totalslices;
2170:   PetscFunctionBegin;
2171:   /* If the  matrix dimensions are not equal,or no of nonzeros */
2172:   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) || (a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2173:     *flg = PETSC_FALSE;
2174:     PetscFunctionReturn(PETSC_SUCCESS);
2175:   }
2176:   /* if the a->colidx are the same */
2177:   PetscCall(PetscArraycmp(a->colidx, b->colidx, a->sliidx[totalslices], flg));
2178:   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
2179:   /* if a->val are the same */
2180:   PetscCall(PetscArraycmp(a->val, b->val, a->sliidx[totalslices], flg));
2181:   PetscFunctionReturn(PETSC_SUCCESS);
2182: }
2184: PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2185: {
2186:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
2188:   PetscFunctionBegin;
2189:   a->idiagvalid = PETSC_FALSE;
2190:   PetscFunctionReturn(PETSC_SUCCESS);
2191: }
2193: PetscErrorCode MatConjugate_SeqSELL(Mat A)
2194: {
2195: #if defined(PETSC_USE_COMPLEX)
2196:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
2197:   PetscInt     i;
2198:   PetscScalar *val = a->val;
2200:   PetscFunctionBegin;
2201:   for (i = 0; i < a->sliidx[a->totalslices]; i++) val[i] = PetscConj(val[i]);
2202: #else
2203:   PetscFunctionBegin;
2204: #endif
2205:   PetscFunctionReturn(PETSC_SUCCESS);
2206: }