Millepede-II V04-17-04
mpbits.f90
Go to the documentation of this file.
1
38
39#ifdef SCOREP_USER_ENABLE
40#include "scorep/SCOREP_User.inc"
41#endif
42
44MODULE mpbits
45 USE mpdef
46 IMPLICIT NONE
47
48 INTEGER(mpl) :: ndimb
49 INTEGER(mpl) :: ndimb2
50 INTEGER(mpi) :: n
51 INTEGER(mpi) :: nar
52 INTEGER(mpi) :: nac
53 INTEGER(mpi) :: n2
54 INTEGER(mpi) :: ibfw
55 INTEGER(mpi) :: ireqpe
56 INTEGER(mpi) :: isngpe
57 INTEGER(mpi) :: iextnd
58 INTEGER(mpi) :: nspc
59 INTEGER(mpi) :: mxcnt
60 INTEGER(mpi) :: nthrd
61 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: bitfieldcounters
62 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: bitmap
63 INTEGER(mpi), PARAMETER :: bs = bit_size(1_mpi)
64
65END MODULE mpbits
66
73SUBROUTINE inbits(im,jm,inc) ! include element (I,J)
74 USE mpbits
75 IMPLICIT NONE
76
77 INTEGER(mpi), INTENT(IN) :: im
78 INTEGER(mpi), INTENT(IN) :: jm
79 INTEGER(mpi), INTENT(IN) :: inc
80
81 INTEGER(mpl) :: l
82 INTEGER(mpi) :: i
83 INTEGER(mpi) :: j
84 INTEGER(mpi) :: noffj
85 INTEGER(mpi) :: nout
86 INTEGER(mpi) :: m
87 INTEGER(mpi) :: icount
88 INTEGER(mpi) :: jcount
89 INTEGER(mpl) :: noffi
90
91 ! diagonal included now !
92 !IF(im == jm) RETURN ! diagonal
93 j=min(im,jm)
94 i=max(im,jm)
95 IF(j <= 0) RETURN ! out low
96 IF(i > n) RETURN ! out high
97 IF(iextnd >= 0) THEN
98 ! lower triangle, row 'i', col 'j'
99 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2 ! row offset for J=1
100 noffj=(j-1)*ibfw
101 l=noffi/bs+i+noffj/bs ! row offset + column offset
102 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !)
103 ELSE
104 ! upper triangle, row 'j', col 'i'
105 noffi=int(j-1,mpl)*int(2*n+2-j,mpl)/2 ! row offset for I=J
106 noffj=i-j
107 l=noffi/bs+j+noffj/bs ! row offset + column offset
108 ! add J instead of 1 to keep bit maps of different rows in different words (openMP !)
109 ENDIF
110 m=mod(noffj,bs)
111 IF (ibfw <= 1) THEN
113 ELSE
114 ! get counter from bit field
115 icount=0
116 nout=m+ibfw-bs ! number of bits outside word
117 IF (nout <= 0) THEN
118 ! inside single word
119 CALL mvbits(bitfieldcounters(l),m,ibfw,icount,0)
120 ELSE
121 ! spread over two words
122 CALL mvbits(bitfieldcounters(l),m,ibfw-nout,icount,0)
123 CALL mvbits(bitfieldcounters(l+1),0,nout,icount,ibfw-nout)
124 ENDIF
125 ! increment
126 jcount=icount
127 icount=min(icount+inc,mxcnt)
128 ! store counter into bit field
129 IF (icount /= jcount) THEN
130 IF (nout <= 0) THEN
131 ! inside single word
132 CALL mvbits(icount,0,ibfw,bitfieldcounters(l),m)
133 ELSE
134 ! spread over two words
135 CALL mvbits(icount,0,ibfw-nout,bitfieldcounters(l),m)
136 CALL mvbits(icount,ibfw-nout,nout,bitfieldcounters(l+1),0)
137 ENDIF
138 END IF
139 END IF
140 RETURN
141
142END SUBROUTINE inbits
143
149SUBROUTINE irbits(i,j) ! include element (I,J)
150 USE mpbits
151 IMPLICIT NONE
152
153 INTEGER(mpi), INTENT(IN) :: i
154 INTEGER(mpi), INTENT(IN) :: j
155
156 INTEGER(mpl) :: l
157 INTEGER(mpi) :: noffj
158 INTEGER(mpi) :: m
159 INTEGER(mpl) :: noffi
160
161 IF(i > nar.OR.j > nac) RETURN ! out high
162 ! upper triangle, rectangular part
163 noffi=ndimb+int(i-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
164 noffj=j-1
165 l=noffi+noffj/bs ! row offset + column offset
166 m=mod(noffj,bs)
168 RETURN
169
170END SUBROUTINE irbits
171
182SUBROUTINE clbits(in,jreqpe,jhispe,jsngpe,jextnd,idimb,ispc)
183 USE mpbits
184 USE mpdalc
185 IMPLICIT NONE
186
187 INTEGER(mpi), INTENT(IN) :: in
188 INTEGER(mpi), INTENT(IN) :: jreqpe
189 INTEGER(mpi), INTENT(IN) :: jhispe
190 INTEGER(mpi), INTENT(IN) :: jsngpe
191 INTEGER(mpi), INTENT(IN) :: jextnd
192 INTEGER(mpl), INTENT(OUT) :: idimb
193 INTEGER(mpi), INTENT(OUT) :: ispc
194
195 INTEGER(mpl) :: noffd
196 INTEGER(mpi) :: i
197 INTEGER(mpi) :: icount
198 INTEGER(mpi) :: mb
199 !$ INTEGER(mpi) :: OMP_GET_MAX_THREADS
200 ! save input parameter
201 n=in
202 nar=0
203 nac=0
204 ireqpe=jreqpe
205 isngpe=jsngpe
206 iextnd=max(0,jextnd)
207 ! number of precision types (D, F)
208 ispc=1
209 ! bit field size
210 ibfw=1 ! number of bits needed to count up to ICOUNT
211 mxcnt=1
212 IF (jextnd >= 0) THEN
213 ! optional larger bit fields for lower triangle
214 icount=max(jsngpe+1,jhispe)
215 icount=max(jreqpe,icount)
216 DO i=1,30
217 IF (icount > mxcnt) THEN
218 ibfw=ibfw+1
219 mxcnt=mxcnt*2+1
220 END IF
221 END DO
222 IF (jsngpe>0) ispc=2
223 END IF
224 ! bit field array size
225 noffd=int(n,mpl)*int(n+1,mpl)*int(ibfw,mpl)/2
226 ndimb=noffd/bs+n
227 idimb=ndimb
228 nspc=ispc
229 mb=int(4.0e-6*real(ndimb,mps),mpi)
230 WRITE(*,*) ' '
231 WRITE(*,*) 'CLBITS: symmetric matrix of dimension',n
232 WRITE(*,*) 'CLBITS: off-diagonal elements',noffd
233 IF (mb > 0) THEN
234 WRITE(*,*) 'CLBITS: dimension of bit-array',ndimb , '(',mb,'MB)'
235 ELSE
236 WRITE(*,*) 'CLBITS: dimension of bit-array',ndimb , '(< 1 MB)'
237 END IF
238 CALL mpalloc(bitfieldcounters,ndimb,'INBITS: bit storage')
240 nthrd=1
241 !$ NTHRD=OMP_GET_MAX_THREADS()
242 RETURN
243END SUBROUTINE clbits
244
255SUBROUTINE plbits(in,inar,inac,idimb)
256 USE mpbits
257 USE mpdalc
258 IMPLICIT NONE
259
260 INTEGER(mpi), INTENT(IN) :: in
261 INTEGER(mpi), INTENT(IN) :: inar
262 INTEGER(mpi), INTENT(IN) :: inac
263 INTEGER(mpl), INTENT(OUT) :: idimb
264
265 INTEGER(mpl) :: noffd
266 INTEGER(mpi) :: mb
267 !$ INTEGER(mpi) :: OMP_GET_MAX_THREADS
268 ! save input parameter
269 n=in
270 nar=inar
271 nac=inac
272 ireqpe=1
273 isngpe=0
274 iextnd=-1 ! upper triangle
275 ibfw=1
276 ! bit field array size
277 noffd=int(n,mpl)*int(n+1,mpl)/2
278 ndimb=noffd/bs+n
279 idimb=ndimb+int(nar,mpl)*int(nac/bs+1,mpl)
280 nspc=1
281 mb=int(4.0e-6*real(idimb,mps),mpi)
282 WRITE(*,*) ' '
283 WRITE(*,*) 'PLBITS: symmetric matrix of dimension',n, '(',nar, nac,')'
284 WRITE(*,*) 'PLBITS: off-diagonal elements',noffd, '(',int(nar,mpl)*int(nac,mpl),')'
285 IF (mb > 0) THEN
286 WRITE(*,*) 'PLBITS: dimension of bit-array',idimb , '(',mb,'MB)'
287 ELSE
288 WRITE(*,*) 'PLBITS: dimension of bit-array',idimb , '(< 1 MB)'
289 END IF
290 CALL mpalloc(bitfieldcounters,idimb,'INBITS: bit storage')
292 nthrd=1
293 !$ NTHRD=OMP_GET_MAX_THREADS()
294 RETURN
295END SUBROUTINE plbits
296
305SUBROUTINE ndbits(npgrp,ndims,nsparr,ihst)
306 USE mpbits
307 USE mpdalc
308 IMPLICIT NONE
309
310 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
311 INTEGER(mpl), DIMENSION(4), INTENT(OUT) :: ndims
312 INTEGER(mpl), DIMENSION(:,:), INTENT(OUT) :: nsparr
313 INTEGER(mpi), INTENT(IN) :: ihst
314
315 INTEGER(mpi) :: nwcp(0:1)
316 INTEGER(mpi) :: irgn(2)
317 INTEGER(mpi) :: inr(2)
318 INTEGER(mpi) :: ichunk
319 INTEGER(mpi) :: i
320 INTEGER(mpi) :: j
321 INTEGER(mpi) :: jcol
322 INTEGER(mpi) :: last
323 INTEGER(mpi) :: lrgn
324 INTEGER(mpi) :: next
325 INTEGER(mpi) :: icp
326 INTEGER(mpi) :: mm
327 INTEGER(mpi) :: jp
328 INTEGER(mpi) :: n1
329 INTEGER(mpi) :: nd
330 INTEGER(mpi) :: ib
331 INTEGER(mpi) :: ir
332 INTEGER(mpi) :: icount
333 INTEGER(mpi) :: iproc
334 INTEGER(mpi) :: iword
335 INTEGER(mpi) :: mb
336 INTEGER(mpl) :: ll
337 INTEGER(mpl) :: lb
338 INTEGER(mpl) :: nin
339 INTEGER(mpl) :: npar
340 INTEGER(mpl) :: ntot
341 INTEGER(mpl) :: nskyln
342 INTEGER(mpl) :: noffi
343 INTEGER(mpl) :: noffj
344 REAL(mps) :: cpr
345 REAL(mps) :: fracu
346 REAL(mps) :: fracs
347 REAL(mps) :: fracz
348 LOGICAL :: btest
349 !$ INTEGER(mpi) :: OMP_GET_THREAD_NUM
350 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: lastRowInCol
351
352 ll=int(n,mpl)*int(nthrd,mpl)
353 CALL mpalloc(lastrowincol,ll,'NDBITS: last (non zero) row in col.')
354 lastrowincol=0
355
356 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
357
358 ndims(1)=ndimb
359 ndims(2)=0
360 ndims(3)=0
361 ndims(4)=0
362 ntot=0
363 ll=0
364 lb=0
365 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
366 ! reduce bit field counters to (precision type) bits, analyze precision type bit fields ('1st half' (j<=i))
367 ! parallelize row loop
368 ! private copy of NTOT for each thread, combined at end, init with 0.
369 !$OMP PARALLEL DO &
370 !$OMP PRIVATE(I,NOFFI,LL,MM,LB,MB,IWORD,IPROC,J,ICOUNT,IB,INR,IRGN,LAST,LRGN,NEXT,JP,IR,NPAR) &
371 !$OMP REDUCTION(+:NTOT) &
372 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
373 DO i=1,n
374 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2
375 ll=noffi/bs+i
376 mm=0
377 lb=ll
378 mb=0
379 iword=0 ! reset temporary bit fields
380 iproc=0
381 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number
382 inr(1)=0
383 inr(2)=0
384 irgn(1)=1 ! 'end marker' region
385 irgn(2)=1
386 last=0
387 lrgn=0
388 npar=0
389
390 DO j=1,i ! loop until diagonal element
391 ! get (pair) counter
392 icount=0
393 next=0
394 DO ib=0,ibfw-1
395 IF (btest(bitfieldcounters(ll),mm)) icount=ibset(icount,ib)
396 mm=mm+1
397 IF (mm >= bs) THEN
398 ll=ll+1
399 mm=mm-bs
400 END IF
401 END DO
402
403 IF (icount > 0) THEN
404 npar=npar+npgrp(j+1)-npgrp(j)
405 IF (iproc == 0.AND.ihst > 0) CALL hmpent(ihst,real(icount,mps))
406 END IF
407
408 ! keep pair ?
409 IF (icount >= ireqpe) THEN
410 next=1 ! double
411 IF (icount <= isngpe) next=2 ! single
412 iword=ibset(iword,mb+next-1)
413 inr(next)=inr(next)+npgrp(j+1)-npgrp(j) ! number of parameters
414 IF (next /= last) THEN
415 irgn(next)=irgn(next)+1
416 END IF
417 jcol=j+iproc*n
418 lastrowincol(jcol)=max(lastrowincol(jcol),i)
419 END IF
420 last=next
421 ! save condensed bitfield
422 mb=mb+nspc
423 IF (mb >= bs) THEN
424 bitfieldcounters(lb)=iword ! store
425 iword=0
426 lb=lb+1
427 mb=mb-bs
428 END IF
429 END DO
430 bitfieldcounters(lb)=iword ! store
431 ntot=ntot+npar*(npgrp(i+1)-npgrp(i))
432
433 ! save row statistics
434 ir=i+1
435 DO jp=1,nspc
436 nsparr(1,ir)=irgn(jp) ! number of regions per row and precision
437 nsparr(2,ir)=inr(jp) ! number of columns per row and precision (groups)
438 ir=ir+n+1
439 END DO
440
441 END DO
442 !$OMP END PARALLEL DO
443
444 ! analyze precision type bit fields for extended storage, check for row compression
445
446 ! parallelize row loop
447 ! private copy of NDIMS for each thread, combined at end, init with 0.
448 !$OMP PARALLEL DO &
449 !$OMP PRIVATE(I,NOFFI,NOFFJ,LL,MM,INR,IRGN,LAST,LRGN,J,NEXT,ICP,NWCP,JP,IR,IB) &
450 !$OMP REDUCTION(+:NDIMS) &
451 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
452 DO i=1,n
453 ! restore row statistics
454 ir=i+1
455 DO jp=1,nspc
456 irgn(jp)=int(nsparr(1,ir),mpi) ! number of regions per row and precision
457 inr(jp)=int(nsparr(2,ir),mpi) ! number of columns per row and precision (groups)
458 ir=ir+n+1
459 END DO
460
461 ! analyze precision type bit fields for extended storage ('2nd half' (j>i) too) ?
462 IF (iextnd > 0) THEN
463
464 noffj=(i-1)*nspc
465 mm=int(mod(noffj,int(bs,mpl)),mpi)
466
467 last=0
468 lrgn=0
469
470 ! remaining columns
471 DO j=i+1, n
472 ! index for pair (J,I)
473 noffi=int(j-1,mpl)*int(j,mpl)*int(ibfw,mpl)/2 ! for I=1
474 ll=noffi/bs+j+noffj/bs ! row offset + column offset
475
476 ! get precision type
477 next=0
478 DO ib=0,nspc-1
479 IF (btest(bitfieldcounters(ll),mm+ib)) next=ibset(next,ib)
480 END DO
481
482 ! keep pair ?
483 IF (next > 0) THEN
484 inr(next)=inr(next)+npgrp(j+1)-npgrp(j) ! number of parameters
485 IF (next /= last) THEN
486 irgn(next)=irgn(next)+1
487 END IF
488 END IF
489 last=next
490 END DO
491 END IF
492
493 ! row statistics, compression
494 ir=i+1
495 DO jp=1,nspc
496 icp=0
497 nwcp(0)=inr(jp) ! list of column indices (default)
498 IF (inr(jp) > 0) THEN
499 nwcp(1)=irgn(jp)*2 ! list of regions (group starts and offsets)
500 ! compress row ?
501 IF ((nwcp(1) < nwcp(0)).OR.iextnd > 0) THEN
502 icp=1
503 END IF
504 ! total space
505 ndims(2) =ndims(2) +nwcp(icp)
506 ndims(jp+2)=ndims(jp+2)+nwcp(0)*(npgrp(i+1)-npgrp(i))
507 END IF
508 ! per row and precision
509 nsparr(1,ir)=nwcp(icp)
510 nsparr(2,ir)=nwcp(0)*(npgrp(i+1)-npgrp(i))
511 ir=ir+n+1
512 END DO
513 END DO
514 !$OMP END PARALLEL DO
515
516 ! sum up, fill row offsets
517 lb=0
518 n1=0
519 ll=nd
520 DO jp=1,nspc
521 DO i=1,n
522 n1=n1+1
523 nsparr(1,n1)=lb
524 nsparr(2,n1)=ll
525 lb=lb+nsparr(1,n1+1)
526 ll=ll+nsparr(2,n1+1)
527 END DO
528 n1=n1+1
529 nsparr(1,n1)=lb
530 nsparr(2,n1)=ll
531 ll=0
532 END DO
533
534 ! look at skyline
535 ! first combine threads
536 ll=n
537 DO j=2,nthrd
538 DO i=1,n
539 ll=ll+1
540 lastrowincol(i)=max(lastrowincol(i),lastrowincol(ll))
541 END DO
542 END DO
543 ! sumup
544 nskyln=0
545 DO i=1,n
546 npar=npgrp(lastrowincol(i)+1)-npgrp(i)
547 nskyln=nskyln+npar*(npgrp(i+1)-npgrp(i))
548 END DO
549 ! cleanup
550 CALL mpdealloc(lastrowincol)
551
552 nin=ndims(3)+ndims(4)
553 fracz=200.0*real(ntot,mps)/real(nd,mps)/real(nd-1,mps)
554 fracu=200.0*real(nin,mps)/real(nd,mps)/real(nd-1,mps)
555 fracs=200.0*real(nskyln,mps)/real(nd,mps)/real(nd+1,mps)
556 WRITE(*,*) ' '
557 WRITE(*,*) 'NDBITS: number of diagonal elements',nd
558 WRITE(*,*) 'NDBITS: number of used off-diagonal elements',nin
559 WRITE(*,1000) 'fraction of non-zero off-diagonal elements', fracz
560 WRITE(*,1000) 'fraction of used off-diagonal elements', fracu
561 cpr=100.0*real(mpi*ndims(2)+mpd*ndims(3)+mps*ndims(4),mps)/real((mpd+mpi)*nin,mps)
562 WRITE(*,1000) 'compression ratio for off-diagonal elements', cpr
563 WRITE(*,1000) 'fraction inside skyline ', fracs
5641000 FORMAT(' NDBITS: ',a,f6.2,' %')
565 RETURN
566END SUBROUTINE ndbits
567
578SUBROUTINE pbsbits(npgrp,ibsize,nnzero,nblock,nbkrow)
579 USE mpbits
580 USE mpdalc
581 IMPLICIT NONE
582
583 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
584 INTEGER(mpi), INTENT(IN) :: ibsize
585 INTEGER(mpl), INTENT(OUT) :: nnzero
586 INTEGER(mpl), INTENT(OUT) :: nblock
587 INTEGER(mpi), DIMENSION(:),INTENT(OUT) :: nbkrow
588
589 INTEGER(mpi) :: ichunk
590 INTEGER(mpi) :: i
591 INTEGER(mpi) :: ib
592 INTEGER(mpi) :: igrpf
593 INTEGER(mpi) :: igrpl
594 INTEGER(mpi) :: iproc
595 INTEGER(mpi) :: ioffb
596 INTEGER(mpi) :: ioffg
597 INTEGER(mpi) :: j
598 INTEGER(mpi) :: mb
599 INTEGER(mpi) :: mbt
600 INTEGER(mpi) :: mm
601 INTEGER(mpi) :: nd
602 INTEGER(mpi) :: ngrp
603 INTEGER(mpi) :: ir
604 INTEGER(mpi) :: irfrst
605 INTEGER(mpi) :: irlast
606 INTEGER(mpi) :: jb
607 INTEGER(mpi) :: jc
608 INTEGER(mpl) :: length
609 INTEGER(mpl) :: ll
610 INTEGER(mpl) :: noffi
611 INTEGER(mpl), PARAMETER :: two=2
612 INTEGER(mpi), DIMENSION(:,:), ALLOCATABLE :: rowBlocksToGroups
613 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: blockCounter
614 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: groupList
615 !$ INTEGER(mpi) :: OMP_GET_THREAD_NUM
616
617 LOGICAL :: btest
618
619 nnzero=0
620 nblock=0
621 nbkrow=0
622
623 !$POMP INST BEGIN(pbsbits)
624#ifdef SCOREP_USER_ENABLE
625 scorep_user_region_by_name_begin("pbsbits", scorep_user_region_type_common)
626#endif
627 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
628 mb=(nd+nac-1)/ibsize+1 ! max. number of blocks per row/column
629 mbt=(nd-1)/ibsize+1 ! max. number of blocks in triangular part
630 length=int(mb,mpl)*int(nthrd,mpl)
631 CALL mpalloc(blockcounter,length,'PBBITS: block counter')
632 length=int(n,mpl)*int(nthrd,mpl)
633 CALL mpalloc(grouplist,length,'PBBITS: group list')
634
635 ! mapping row blocks to parameters groups
636 length=int(mbt,mpl)
637 CALL mpalloc(rowblockstogroups,two,length,'mapping row blocks to par. groups (I)')
638 rowblockstogroups(:,:)=0
639 igrpf=1 ! first group of block
640 igrpl=1 ! last group of block
641 ir=1 ! first row of block
642 DO i=1,mbt
643 DO WHILE (igrpf < n .AND. npgrp(igrpf+1) <= ir)
644 igrpf=igrpf+1
645 END DO
646 rowblockstogroups(1,i)=igrpf
647 ir=ir+ibsize
648 DO WHILE (igrpl < n .AND. npgrp(igrpl+1) < ir)
649 igrpl=igrpl+1
650 END DO
651 rowblockstogroups(2,i)=igrpl
652 END DO
653
654 ll=0
655 ichunk=min((mbt+nthrd-1)/nthrd/32+1,256)
656 ! analyze bit fields ('upper half' (j>=i))
657 ! parallelize row loop
658 !$OMP PARALLEL DO &
659 !$OMP PRIVATE(IPROC,IOFFB,IOFFG,I,NOFFI,LL,MM,NGRP,J,IR,IRFRST,IRLAST) &
660 !$OMP REDUCTION(+:NNZERO,NBLOCK) &
661 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
662 DO ib=1,mbt
663 irfrst=ibsize*(ib-1)+1 ! first row in block
664 irlast=ibsize*ib ! last row in block
665 iproc=0
666 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number
667 ioffb=iproc*mb
668 ioffg=iproc*n
669 blockcounter(ioffb+1:ioffb+mb)=0
670 DO i=rowblockstogroups(1,ib),rowblockstogroups(2,ib)
671 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
672 ll=noffi/bs+i
673 mm=0
674 ngrp=0
675 DO j=i,n ! loop from diagonal element
676 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
677 ngrp=ngrp+1
678 grouplist(ioffg+ngrp)=j
679 END IF
680 mm=mm+1
681 IF (mm >= bs) THEN
682 ll=ll+1
683 mm=mm-bs
684 END IF
685 END DO
686 ! analyze rows (in overlap of group 'i 'and block 'ib')
687 DO ir=max(irfrst,npgrp(i)),min(irlast,npgrp(i+1)-1)
688 ! triangular part (parameter groups)
689 DO j=1,ngrp
690 ! column loop
691 DO jc=max(ir,npgrp(grouplist(ioffg+j))),npgrp(grouplist(ioffg+j)+1)-1
692 ! block number
693 jb=(jc-1)/ibsize+1
694 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
695 END DO
696 END DO
697 ! rectangular part (parameters, constraints)
698 noffi=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
699 ll=noffi ! row offset
700 mm=0
701 DO j=1,nac
702 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
703 ! block number
704 jb=(nd+j-1)/ibsize+1
705 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
706 END IF
707 mm=mm+1
708 IF (mm >= bs) THEN
709 ll=ll+1
710 mm=mm-bs
711 END IF
712 END DO
713 END DO
714 END DO
715 ! end of 'row' block
716 DO j=1,mb
717 IF (blockcounter(ioffb+j) > 0) THEN
718 nnzero=nnzero+blockcounter(ioffb+j)
719 nblock=nblock+1
720 nbkrow(ib)=nbkrow(ib)+1
721 ENDIF
722 END DO
723 END DO
724 !$OMP END PARALLEL DO
725
726 ! 'empty' diagonal elements needed too
727 DO ib=mbt+1,mb
728 nnzero=nnzero+ibsize
729 nblock=nblock+1
730 nbkrow(ib)=1
731 END DO
732
733 ! cleanup
734 CALL mpdealloc(grouplist)
735 CALL mpdealloc(blockcounter)
736 CALL mpdealloc(rowblockstogroups)
737
738#ifdef SCOREP_USER_ENABLE
739 scorep_user_region_by_name_end("pbsbits")
740#endif
741 !$POMP INST END(pbsbits)
742 WRITE(*,*) ' '
743 WRITE(*,*) 'PBSBITS: number of used elements', nnzero
744 WRITE(*,1000) 'fraction of used elements', 200.0*real(nnzero,mps)/real(nd+nac,mps)/real(nd+nac+1,mps)
745 WRITE(*,*) 'PBSBITS: block size', ibsize
746 WRITE(*,*) 'PBSBITS: number of (used) blocks', nblock
747 WRITE(*,1000) 'fraction of used storage ', 100.0*real(ibsize*ibsize+1,mps)*real(nblock,mps)/real(2*nnzero,mps)
7481000 FORMAT(' PBSBITS: ',a,f7.2,' %')
749 RETURN
750END SUBROUTINE pbsbits
751
761SUBROUTINE pblbits(npgrp,ibsize,nsparr,nsparc)
762 USE mpbits
763 USE mpdalc
764 IMPLICIT NONE
765
766 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
767 INTEGER(mpi), INTENT(IN) :: ibsize
768 INTEGER(mpl), DIMENSION(:), INTENT(IN) :: nsparr
769 INTEGER(mpl), DIMENSION(:), INTENT(OUT) :: nsparc
770
771 INTEGER(mpi) :: ichunk
772 INTEGER(mpi) :: i
773 INTEGER(mpi) :: ib
774 INTEGER(mpi) :: igrpf
775 INTEGER(mpi) :: igrpl
776 INTEGER(mpi) :: iproc
777 INTEGER(mpi) :: ioffb
778 INTEGER(mpi) :: ioffg
779 INTEGER(mpi) :: j
780 INTEGER(mpi) :: mb
781 INTEGER(mpi) :: mbt
782 INTEGER(mpi) :: mm
783 INTEGER(mpi) :: nd
784 INTEGER(mpi) :: ngrp
785 INTEGER(mpi) :: ir
786 INTEGER(mpi) :: irfrst
787 INTEGER(mpi) :: irlast
788 INTEGER(mpi) :: jb
789 INTEGER(mpi) :: jc
790 INTEGER(mpl) :: kk
791 INTEGER(mpl) :: length
792 INTEGER(mpl) :: ll
793 INTEGER(mpl) :: noffi
794 INTEGER(mpl), PARAMETER :: two=2
795 INTEGER(mpi), DIMENSION(:,:), ALLOCATABLE :: rowBlocksToGroups
796 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: blockCounter
797 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: groupList
798 !$ INTEGER(mpi) :: OMP_GET_THREAD_NUM
799
800 LOGICAL :: btest
801
802 !$POMP INST BEGIN(pblbits)
803#ifdef SCOREP_USER_ENABLE
804 scorep_user_region_by_name_begin("pblbits", scorep_user_region_type_common)
805#endif
806 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
807 mb=(nd+nac-1)/ibsize+1 ! max. number of blocks per row/column
808 mbt=(nd-1)/ibsize+1 ! max. number of blocks in triangular part
809 length=int(mb,mpl)*int(nthrd,mpl)
810 CALL mpalloc(blockcounter,length,'PBBITS: block counter')
811 length=int(n,mpl)*int(nthrd,mpl)
812 CALL mpalloc(grouplist,length,'PBBITS: group list')
813
814 ! mapping row blocks to parameters groups
815 length=int(mbt,mpl)
816 CALL mpalloc(rowblockstogroups,two,length,'mapping row blocks to par. groups (I)')
817 rowblockstogroups(:,:)=0
818 igrpf=1 ! first group of block
819 igrpl=1 ! last group of block
820 ir=1 ! first row of block
821 DO i=1,mbt
822 DO WHILE (igrpf < n .AND. npgrp(igrpf+1) <= ir)
823 igrpf=igrpf+1
824 END DO
825 rowblockstogroups(1,i)=igrpf
826 ir=ir+ibsize
827 DO WHILE (igrpl < n .AND. npgrp(igrpl+1) < ir)
828 igrpl=igrpl+1
829 END DO
830 rowblockstogroups(2,i)=igrpl
831 END DO
832
833 ll=0
834 ichunk=min((mbt+nthrd-1)/nthrd/32+1,256)
835 ! analyze bit fields ('upper half' (j>=i))
836 ! parallelize row loop
837 !$OMP PARALLEL DO &
838 !$OMP PRIVATE(IPROC,IOFFB,IOFFG,I,NOFFI,KK,LL,MM,NGRP,J,IR,IRFRST,IRLAST) &
839 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
840 DO ib=1,mbt
841 irfrst=ibsize*(ib-1)+1 ! first row in block
842 irlast=ibsize*ib ! last row in block
843 iproc=0
844 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number
845 ioffb=iproc*mb
846 ioffg=iproc*n
847 blockcounter(ioffb+1:ioffb+mb)=0
848 DO i=rowblockstogroups(1,ib),rowblockstogroups(2,ib)
849 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
850 ll=noffi/bs+i
851 mm=0
852 ngrp=0
853 DO j=i,n ! loop from diagonal element
854 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
855 ngrp=ngrp+1
856 grouplist(ioffg+ngrp)=j
857 END IF
858 mm=mm+1
859 IF (mm >= bs) THEN
860 ll=ll+1
861 mm=mm-bs
862 END IF
863 END DO
864 ! analyze rows (in overlap of group 'i 'and block 'ib')
865 DO ir=max(irfrst,npgrp(i)),min(irlast,npgrp(i+1)-1)
866 ! triangular part (parameter groups)
867 DO j=1,ngrp
868 ! column loop
869 DO jc=max(ir,npgrp(grouplist(ioffg+j))),npgrp(grouplist(ioffg+j)+1)-1
870 ! block number
871 jb=(jc-1)/ibsize+1
872 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
873 END DO
874 END DO
875 ! rectangular part (parameters, constraints)
876 noffi=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
877 ll=noffi ! row offset
878 mm=0
879 DO j=1,nac
880 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
881 ! block number
882 jb=(nd+j-1)/ibsize+1
883 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
884 END IF
885 mm=mm+1
886 IF (mm >= bs) THEN
887 ll=ll+1
888 mm=mm-bs
889 END IF
890 END DO
891 END DO
892 END DO
893 ! end of 'row' block
894 kk=nsparr(ib) ! offset for row block
895 DO j=1,mb
896 IF (blockcounter(ioffb+j) > 0) THEN
897 ! store used column block indices
898 nsparc(kk)=j
899 kk=kk+1
900 ENDIF
901 END DO
902 END DO
903 !$OMP END PARALLEL DO
904
905 ! 'empty' diagonal elements needed too
906 DO ib=mbt+1,mb
907 kk=nsparr(ib)
908 nsparc(kk)=ib
909 END DO
910
911 ! cleanup
912 CALL mpdealloc(grouplist)
913 CALL mpdealloc(blockcounter)
914 CALL mpdealloc(rowblockstogroups)
915
916#ifdef SCOREP_USER_ENABLE
917 scorep_user_region_by_name_end("pblbits")
918#endif
919 !$POMP INST END(pblbits)
920 WRITE(*,*) ' '
921 WRITE(*,*) 'PBLBITS: column list constructed ',nsparr(mb+1)-nsparr(1), ' words'
923 RETURN
924END SUBROUTINE pblbits
925
926
934SUBROUTINE prbits(npgrp,nsparr)
935 USE mpbits
936 USE mpdalc
937 IMPLICIT NONE
938
939 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
940 INTEGER(mpl), DIMENSION(:), INTENT(OUT) :: nsparr
941
942 INTEGER(mpi) :: ichunk
943 INTEGER(mpi) :: i
944 INTEGER(mpi) :: j
945 INTEGER(mpi) :: mm
946 INTEGER(mpi) :: nd
947 INTEGER(mpi) :: ir
948 INTEGER(mpl) :: ll
949 INTEGER(mpl) :: npar
950 INTEGER(mpl) :: nparc
951 INTEGER(mpl) :: ntot
952 INTEGER(mpl) :: noffi
953
954 LOGICAL :: btest
955
956 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
957
958 ntot=0
959 ll=0
960 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
961 ! analyze bit fields ('upper half' (j>=i))
962 ! parallelize row loop
963 ! private copy of NTOT for each thread, combined at end, init with 0.
964 !$OMP PARALLEL DO &
965 !$OMP PRIVATE(I,NOFFI,LL,MM,J,IR,NPAR,NPARC) &
966 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
967 DO i=1,n
968 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
969 ll=noffi/bs+i
970 mm=0
971 npar=0
972 ! triangular part (parameter groups)
973 DO j=i,n ! loop from diagonal element
974 IF (btest(bitfieldcounters(ll),mm)) npar=npar+npgrp(j+1)-npgrp(j) ! number of parameters
975 mm=mm+1
976 IF (mm >= bs) THEN
977 ll=ll+1
978 mm=mm-bs
979 END IF
980 END DO
981
982 ! save number of parameters for row(s)
983 DO ir=npgrp(i),npgrp(i+1)-1
984 ! rectangular part (parameters, constraints)
985 noffi=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
986 ll=noffi ! row offset
987 mm=0
988 nparc=0
989 DO j=1,nac
990 IF (btest(bitfieldcounters(ll),mm)) THEN ! found parameter/constraint combination
991 nparc=nparc+1
992 END IF
993 mm=mm+1
994 IF (mm >= bs) THEN
995 ll=ll+1
996 mm=mm-bs
997 END IF
998 END DO
999 nsparr(ir+1)=npar+nparc
1000 npar=npar-1
1001 END DO
1002
1003 END DO
1004 !$OMP END PARALLEL DO
1005
1006 ! sum up, fill row offsets
1007 nsparr(1)=1
1008 DO i=1,nd
1009 nsparr(i+1)=nsparr(i+1)+nsparr(i)
1010 END DO
1011 ! 'empty' diagonal elements needed too
1012 DO i=nd+1,nd+nac
1013 nsparr(i+1)=nsparr(i)+1
1014 END DO
1015 ntot=nsparr(nd+nac+1)-nsparr(1)
1016
1017 WRITE(*,*) ' '
1018 WRITE(*,*) 'PRBITS: number of diagonal elements',nd+nac
1019 WRITE(*,*) 'PRBITS: number of used elements',ntot
1020 WRITE(*,1000) 'fraction of used elements', 200.0*real(ntot,mps)/real(nd+nac,mps)/real(nd+nac+1,mps)
10211000 FORMAT(' PRBITS: ',a,f6.2,' %')
1022 RETURN
1023END SUBROUTINE prbits
1024
1033SUBROUTINE pcbits(npgrp,nsparr,nsparc)
1034 USE mpbits
1035 USE mpdalc
1036 IMPLICIT NONE
1037
1038 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
1039 INTEGER(mpl), DIMENSION(:), INTENT(IN) :: nsparr
1040 INTEGER(mpl), DIMENSION(:), INTENT(OUT) :: nsparc
1041
1042 INTEGER(mpi) :: ichunk
1043 INTEGER(mpi) :: i
1044 INTEGER(mpi) :: j
1045 INTEGER(mpi) :: mm
1046 INTEGER(mpi) :: nd
1047 INTEGER(mpi) :: ic
1048 INTEGER(mpi) :: ir
1049 INTEGER(mpl) :: kk
1050 INTEGER(mpl) :: ll
1051
1052 INTEGER(mpl) :: noffi
1053 INTEGER(mpl) :: noffr
1054
1055 LOGICAL :: btest
1056
1057 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
1058
1059 ll=0
1060 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
1061 ! analyze bit fields ('upper half' (j>=i))
1062 ! parallelize row loop
1063 ! private copy of NTOT for each thread, combined at end, init with 0.
1064 !$OMP PARALLEL DO &
1065 !$OMP PRIVATE(I,NOFFI,LL,MM,J,IR,KK,IC) &
1066 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
1067 DO i=1,n
1068 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
1069 ! fill column list for row(s)
1070 DO ir=npgrp(i),npgrp(i+1)-1
1071 ! triangular part (parameter groups)
1072 ll=noffi/bs+i
1073 mm=0
1074 kk=nsparr(ir)
1075 DO j=i,n ! loop from diagonal element
1076 IF (btest(bitfieldcounters(ll),mm)) THEN
1077 ! 'all' columns in group
1078 DO ic=max(ir,npgrp(j)),npgrp(j+1)-1
1079 nsparc(kk)=ic
1080 kk=kk+1
1081 END DO
1082 ENDIF
1083 mm=mm+1
1084 IF (mm >= bs) THEN
1085 ll=ll+1
1086 mm=mm-bs
1087 END IF
1088 END DO
1089 ! rectangular part (parameters, constraints)
1090 noffr=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
1091 ll=noffr ! row offset
1092 mm=0
1093 DO j=1,nac
1094 IF (btest(bitfieldcounters(ll),mm)) THEN ! found parameter/constraint combination
1095 nsparc(kk)=nd+j
1096 kk=kk+1
1097 END IF
1098 mm=mm+1
1099 IF (mm >= bs) THEN
1100 ll=ll+1
1101 mm=mm-bs
1102 END IF
1103 END DO
1104 END DO
1105
1106 END DO
1107 !$OMP END PARALLEL DO
1108
1109 ! 'empty' diagonal elements needed too
1110 DO ir=nd+1,nd+nac
1111 kk=nsparr(ir)
1112 nsparc(kk)=ir
1113 END DO
1114
1115 WRITE(*,*) ' '
1116 WRITE(*,*) 'PCBITS: column list constructed ',nsparr(nd+nac+1)-nsparr(1), ' words'
1118 RETURN
1119END SUBROUTINE pcbits
1120
1127SUBROUTINE ckbits(npgrp,ndims)
1128 USE mpbits
1129 IMPLICIT NONE
1130
1131 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
1132 INTEGER(mpl), DIMENSION(4), INTENT(OUT) :: ndims
1133
1134 INTEGER(mpi) :: nwcp(0:1)
1135 INTEGER(mpi) :: irgn(2)
1136 INTEGER(mpi) :: inr(2)
1137 INTEGER(mpl) :: ll
1138 INTEGER(mpl) :: noffi
1139 INTEGER(mpi) :: i
1140 INTEGER(mpi) :: j
1141 INTEGER(mpi) :: last
1142 INTEGER(mpi) :: lrgn
1143 INTEGER(mpi) :: next
1144 INTEGER(mpi) :: icp
1145 INTEGER(mpi) :: ib
1146 INTEGER(mpi) :: icount
1147 INTEGER(mpi) :: kbfw
1148 INTEGER(mpi) :: jp
1149 INTEGER(mpi) :: mm
1150 LOGICAL :: btest
1151
1152 DO i=1,4
1153 ndims(i)=0
1154 END DO
1155 kbfw=1
1156 IF (ibfw > 1) kbfw=2
1157 ll=0
1158
1159 DO i=1,n
1160 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2
1161 ll=noffi/bs+i
1162 mm=0
1163 inr(1)=0
1164 inr(2)=0
1165 irgn(1)=1
1166 irgn(2)=1
1167 last=0
1168 lrgn=0
1169 DO j=1,i
1170 icount=0
1171 next=0
1172 DO ib=0,ibfw-1
1173 IF (btest(bitfieldcounters(ll),mm)) icount=ibset(icount,ib)
1174 mm=mm+1
1175 IF (mm >= bs) THEN
1176 ll=ll+1
1177 mm=mm-bs
1178 END IF
1179 END DO
1180
1181 IF (icount > 0) ndims(1)=ndims(1)+1
1182 ! keep pair ?
1183 IF (icount >= ireqpe) THEN
1184 next=1 ! double
1185 IF (icount <= isngpe) next=2 ! single
1186 inr(next)=inr(next)+npgrp(j+1)-npgrp(j) ! number of parameters
1187 IF (next /= last) THEN
1188 irgn(next)=irgn(next)+1
1189 END IF
1190 END IF
1191 last=next
1192 END DO
1193
1194 DO jp=1,kbfw
1195 IF (inr(jp) > 0) THEN
1196 icp=0
1197 nwcp(0)=inr(jp) ! list of column indices (default)
1198 nwcp(1)=irgn(jp)*2 ! list of regions (group starts and offsets)
1199 ! compress row ?
1200 IF ((nwcp(1) < nwcp(0)).OR.iextnd > 0) THEN
1201 icp=1
1202 END IF
1203 ! total space
1204 ndims(2) =ndims(2) +nwcp(icp)
1205 ndims(jp+2)=ndims(jp+2)+nwcp(0)*(npgrp(i+1)-npgrp(i))
1206 END IF
1207 END DO
1208
1209 END DO
1210
1211 RETURN
1212END SUBROUTINE ckbits
1213
1220SUBROUTINE spbits(npgrp,nsparr,nsparc) ! collect elements
1221 USE mpbits
1222 USE mpdalc
1223 IMPLICIT NONE
1224
1225 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
1226 INTEGER(mpl), DIMENSION(:,:), INTENT(IN) :: nsparr
1227 INTEGER(mpi), DIMENSION(:), INTENT(OUT) :: nsparc
1228
1229 INTEGER(mpl) :: kl
1230 INTEGER(mpl) :: l
1231 INTEGER(mpl) :: ll
1232 INTEGER(mpl) :: l1
1233 INTEGER(mpl) :: n1
1234 INTEGER(mpl) :: ndiff
1235 INTEGER(mpl) :: noffi
1236 INTEGER(mpl) :: noffj
1237 INTEGER(mpi) :: i
1238 INTEGER(mpi) :: j
1239 INTEGER(mpi) :: jb
1240 INTEGER(mpi) :: k
1241 INTEGER(mpi) :: m
1242 INTEGER(mpi) :: ichunk
1243 INTEGER(mpi) :: next
1244 INTEGER(mpi) :: last
1245
1246 LOGICAL :: btest
1247
1248 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
1249 DO jb=0,nspc-1
1250 ! parallelize row loop
1251 !$OMP PARALLEL DO &
1252 !$OMP PRIVATE(I,N1,NOFFI,NOFFJ,L,M,KL,L1,NDIFF,LAST,LL,J,NEXT) &
1253 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
1254 DO i=1,n
1255 n1=i+jb*(n+1)
1256 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2
1257 l=noffi/bs+i
1258 m=jb
1259 kl=nsparr(1,n1) ! pointer to row in NSPARC
1260 l1=nsparr(2,n1) ! pointer to row in sparse matrix
1261 ndiff=(nsparr(1,n1+1)-kl)*(npgrp(i+1)-npgrp(i))-(nsparr(2,n1+1)-l1) ! 0 for no compression
1262 ll=l1
1263 last=0
1264
1265 DO j=1,i ! loop until diagonal element
1266 next=0
1267 IF(bitfieldcounters(l) /= 0) THEN
1268 IF(btest(bitfieldcounters(l),m)) THEN
1269 IF (ndiff == 0) THEN
1270 DO k=npgrp(j),npgrp(j+1)-1
1271 kl=kl+1
1272 nsparc(kl)=k ! column index
1273 END DO
1274 ELSE
1275 next=1
1276 IF (last == 0) THEN
1277 kl=kl+1
1278 nsparc(kl)=int(ll-l1,mpi)
1279 kl=kl+1
1280 nsparc(kl)=j
1281 END IF
1282 END IF
1283 ll=ll+(npgrp(j+1)-npgrp(j))
1284 END IF
1285 END IF
1286 last=next
1287 m=m+nspc
1288 IF (m >= bs) THEN
1289 m=m-bs
1290 l=l+1
1291 END IF
1292 END DO
1293
1294 ! extended storage ('2nd half' too) ?
1295 IF (iextnd > 0) THEN
1296 noffj=(i-1)*nspc
1297 m=int(mod(noffj,int(bs,mpl)),mpi)+jb
1298 last=0
1299 ! remaining columns
1300 DO j=i+1, n
1301 ! index for pair (J,I)
1302 noffi=int(j-1,mpl)*int(j,mpl)*int(ibfw,mpl)/2 ! for I=1
1303 l=noffi/bs+j+noffj/bs ! row offset + column offset
1304 next=0
1305 IF(btest(bitfieldcounters(l),m)) THEN
1306 IF (ndiff == 0) THEN
1307 DO k=npgrp(j),npgrp(j+1)-1
1308 kl=kl+1
1309 nsparc(kl)=k ! column index
1310 END DO
1311 ELSE
1312 next=1
1313 IF (last == 0) THEN
1314 kl=kl+1
1315 nsparc(kl)=int(ll-l1,mpi)
1316 kl=kl+1
1317 nsparc(kl)=j
1318 END IF
1319 END IF
1320 ll=ll+(npgrp(j+1)-npgrp(j))
1321 END IF
1322 last=next
1323
1324 END DO
1325 END IF
1326 ! next offset, end marker
1327 IF (ndiff /= 0) THEN
1328 kl=kl+1
1329 nsparc(kl)=int(ll-l1,mpi)
1330 kl=kl+1
1331 nsparc(kl)=n+1
1332 END IF
1333
1334 END DO
1335 !$OMP END PARALLEL DO
1336 END DO
1337
1338 n1=(n+1)*nspc
1339 WRITE(*,*) ' '
1340 WRITE(*,*) 'SPBITS: sparse structure constructed ',nsparr(1,n1), ' words'
1341 WRITE(*,*) 'SPBITS: dimension parameter of matrix',nsparr(2,1)
1342 IF (ibfw <= 1) THEN
1343 WRITE(*,*) 'SPBITS: index of last used location',nsparr(2,n1)
1344 ELSE
1345 WRITE(*,*) 'SPBITS: index of last used double',nsparr(2,n1/2)
1346 WRITE(*,*) 'SPBITS: index of last used single',nsparr(2,n1)
1347 END IF
1349 RETURN
1350END SUBROUTINE spbits
1351
1352
1356!
1357SUBROUTINE clbmap(in)
1358 USE mpbits
1359 USE mpdalc
1360 IMPLICIT NONE
1361
1362 INTEGER(mpi), INTENT(IN) :: in
1363
1364 INTEGER(mpl) :: noffd
1365 INTEGER(mpi) :: mb
1366
1367 ! save input parameter
1368 n2=in
1369 ! bit field array size
1370 noffd=int(n2,mpl)*int(n2-1,mpl)/2
1371 ndimb2=noffd/bs+n2
1372 mb=int(4.0e-6*real(ndimb2,mps),mpi)
1373 WRITE(*,*) ' '
1374 IF (mb > 0) THEN
1375 WRITE(*,*) 'CLBMAP: dimension of bit-map',ndimb2 , '(',mb,'MB)'
1376 ELSE
1377 WRITE(*,*) 'CLBMAP: dimension of bit-map',ndimb2 , '(< 1 MB)'
1378 END IF
1379 CALL mpalloc(bitmap,ndimb2,'INBMAP: bit storage')
1380 bitmap=0
1381 RETURN
1382END SUBROUTINE clbmap
1383
1389SUBROUTINE inbmap(im,jm) ! include element (I,J)
1390 USE mpbits
1391 IMPLICIT NONE
1392
1393 INTEGER(mpi), INTENT(IN) :: im
1394 INTEGER(mpi), INTENT(IN) :: jm
1395
1396 INTEGER(mpl) :: l
1397 INTEGER(mpi) :: i
1398 INTEGER(mpi) :: j
1399 INTEGER(mpi) :: noffj
1400 INTEGER(mpl) :: noffi
1401 INTEGER(mpi) :: m
1402
1403 IF(im == jm) RETURN ! diagonal
1404 j=min(im,jm)
1405 i=max(im,jm)
1406 IF(j <= 0) RETURN ! out low
1407 IF(i > n2) RETURN ! out high
1408 noffi=int(i-1,mpl)*int(i-2,mpl)/2 ! for J=1
1409 noffj=(j-1)
1410 l=noffi/bs+i+noffj/bs ! row offset + column offset
1411 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !)
1412 m=mod(noffj,bs)
1413 bitmap(l)=ibset(bitmap(l),m)
1414 RETURN
1415END SUBROUTINE inbmap
1416
1423SUBROUTINE gpbmap(ngroup,npgrp,npair)
1424 USE mpbits
1425 IMPLICIT NONE
1426
1427 INTEGER(mpi), INTENT(IN) :: ngroup
1428 INTEGER(mpi), DIMENSION(:,:), INTENT(IN) :: npgrp
1429 INTEGER(mpi), DIMENSION(:), INTENT(OUT) :: npair
1430
1431 INTEGER(mpl) :: l
1432 INTEGER(mpl) :: noffi
1433 INTEGER(mpi) :: i
1434 INTEGER(mpi) :: j
1435 INTEGER(mpi) :: m
1436 LOGICAL :: btest
1437
1438 npair(1:ngroup)=0
1439 l=0
1440
1441 DO i=1,ngroup
1442 npair(i)=npair(i)+npgrp(2,i)-1 ! from own group
1443 noffi=int(i-1,mpl)*int(i-2,mpl)/2
1444 l=noffi/bs+i
1445 m=0
1446 DO j=1,i-1
1447 IF (btest(bitmap(l),m)) THEN
1448 ! from other group
1449 npair(i)=npair(i)+npgrp(2,j)
1450 npair(j)=npair(j)+npgrp(2,i)
1451 END IF
1452 m=m+1
1453 IF (m >= bs) THEN
1454 l=l+1
1455 m=m-bs
1456 END IF
1457 END DO
1458 END DO
1459
1460 RETURN
1461END SUBROUTINE gpbmap
1462
1469SUBROUTINE ggbmap(ipgrp,npair,npgrp)
1470 USE mpbits
1471 IMPLICIT NONE
1472
1473 INTEGER(mpi), INTENT(IN) :: ipgrp
1474 INTEGER(mpi), INTENT(OUT) :: npair
1475 INTEGER(mpi), DIMENSION(:), INTENT(OUT) :: npgrp
1476
1477 INTEGER(mpl) :: l
1478 INTEGER(mpl) :: noffi
1479 INTEGER(mpi) :: noffj
1480 INTEGER(mpi) :: i
1481 INTEGER(mpi) :: j
1482 LOGICAL :: btest
1483
1484 npair=0
1485
1486 i=ipgrp
1487 noffi=int(i-1,mpl)*int(i-2,mpl)/2 ! for J=1
1488 l=noffi/bs+i! row offset
1489 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !)
1490 DO j=1,ipgrp-1
1491 noffj=j-1
1492 IF (btest(bitmap(l+noffj/bs),mod(noffj,bs))) THEN
1493 npair=npair+1
1494 npgrp(npair)=j
1495 END IF
1496 END DO
1497
1498 noffj=ipgrp-1
1499 DO i=ipgrp+1,n2
1500 noffi=int(i-1,mpl)*int(i-2,mpl)/2 ! for J=1
1501 l=noffi/bs+i ! row offset
1502 IF (btest(bitmap(l+noffj/bs),mod(noffj,bs))) THEN
1503 npair=npair+1
1504 npgrp(npair)=i
1505 END IF
1506 END DO
1507
1508 RETURN
1509END SUBROUTINE ggbmap
allocate array
Definition: mpdalc.f90:36
deallocate array
Definition: mpdalc.f90:42
subroutine pcbits(npgrp, nsparr, nsparc)
Analyze bit fields.
Definition: mpbits.f90:1034
subroutine ndbits(npgrp, ndims, nsparr, ihst)
Analyze bit fields.
Definition: mpbits.f90:306
subroutine clbits(in, jreqpe, jhispe, jsngpe, jextnd, idimb, ispc)
Calculate bit (field) array size, encoding.
Definition: mpbits.f90:183
subroutine plbits(in, inar, inac, idimb)
Calculate bit field array size (PARDISO).
Definition: mpbits.f90:256
subroutine spbits(npgrp, nsparr, nsparc)
Create sparsity information.
Definition: mpbits.f90:1221
subroutine irbits(i, j)
Fill bit fields (counters, rectangular part).
Definition: mpbits.f90:150
subroutine clbmap(in)
Clear (additional) bit map.
Definition: mpbits.f90:1358
subroutine inbmap(im, jm)
Fill bit map.
Definition: mpbits.f90:1390
subroutine ckbits(npgrp, ndims)
Check sparsity of matrix.
Definition: mpbits.f90:1128
subroutine ggbmap(ipgrp, npair, npgrp)
Get paired (parameter) groups from map.
Definition: mpbits.f90:1470
subroutine prbits(npgrp, nsparr)
Analyze bit fields.
Definition: mpbits.f90:935
subroutine gpbmap(ngroup, npgrp, npair)
Get pairs (statistic) from map.
Definition: mpbits.f90:1424
subroutine pblbits(npgrp, ibsize, nsparr, nsparc)
Analyze bit fields.
Definition: mpbits.f90:762
subroutine pbsbits(npgrp, ibsize, nnzero, nblock, nbkrow)
Analyze bit fields.
Definition: mpbits.f90:579
subroutine inbits(im, jm, inc)
Fill bit fields (counters, triangular part).
Definition: mpbits.f90:74
subroutine hmpent(ih, x)
entry flt.pt.
Definition: mphistab.f90:183
Bit field data.
Definition: mpbits.f90:44
integer(mpi) n2
matrix size (map)
Definition: mpbits.f90:53
integer(mpi) ireqpe
min number of pair entries
Definition: mpbits.f90:55
integer(mpl) ndimb2
dimension for bit map
Definition: mpbits.f90:49
integer(mpi) ibfw
bit field width
Definition: mpbits.f90:54
integer(mpi) iextnd
flag for extended storage (both 'halves' of sym.
Definition: mpbits.f90:57
integer(mpi) n
matrix size (counters, sparse, triangular part)
Definition: mpbits.f90:50
integer(mpi), parameter bs
number of bits in INTEGER(mpi)
Definition: mpbits.f90:63
integer(mpi) nspc
number of precision for sparse global matrix (1=D, 2=D+f)
Definition: mpbits.f90:58
integer(mpi) nar
additional rows (counters, sparse, rectangular part)
Definition: mpbits.f90:51
integer(mpi) isngpe
upper bound for pair entry single precision storage
Definition: mpbits.f90:56
integer(mpi), dimension(:), allocatable bitmap
fit field map for global parameters pairs (measurements)
Definition: mpbits.f90:62
integer(mpi) mxcnt
max value for bit field counters
Definition: mpbits.f90:59
integer(mpi) nac
additional columns (counters, sparse, rectangular part)
Definition: mpbits.f90:52
integer(mpi) nthrd
number of threads
Definition: mpbits.f90:60
integer(mpl) ndimb
dimension for bit (field) array
Definition: mpbits.f90:48
integer(mpi), dimension(:), allocatable bitfieldcounters
fit field counters for global parameters pairs (tracks)
Definition: mpbits.f90:61
(De)Allocate vectors and arrays.
Definition: mpdalc.f90:24
Definition of constants.
Definition: mpdef.f90:24