c ================================================== c = 並列 Householder三重対角化 ルーチン PTred = c = --- 全固有値、固有ベクトル求解版 --- = c ================================================== c ver. 2.00 1997 7/30 c c === 入出力仕様 c c [入力変数] c A(0:nx-1, 0:ny-1) : 対称密行列 c u_x(0:ny-1), u_y(0,ny-1) : 枢軸ベクトルワークエリア c x_k(0:nx-1), y_k(0:ny-1) : 計算用ワークエリア c rbuf(0:nx-1) : 通信用ワークエリア c n : 行列 A の大域的な次元数 c nx, ny : 行列 A の局所的な次元数 c ncelx, ncely : 二次元ラベル付時の各次元 x, y c におけるプロセッサ台数の総数 c xlogp, ylogp : 二次元ラベル付時の各次元 x, y c におけるプロセッサ台数の総数の2を底とする log の値 c cid : 一次元ラベル付における自プロセッサ番号 c ncidx, ncidy : 二次元ラベル付における自プロセッサ番号 c U(1:ny, 1:n) : Householderベクトル保存用 c ALP(1:n) : Householderベクトル定数保存用 c MPP_COMM_WORLD : MPIのコミュニケータ c MPP_DOUBLE_PRECISION, : MPIのデータタイプ c MPP_SUM c c [出力変数] c A(0:nx-1, 0:ny-1) : 三重対角行列 c c c All Rights Reserved, Copyright (C) 1998, T.Katagiri c/*- c * Copyright (c) 1998 Information Promotion Agency of Japan and T.Katagiri c * All rights reserved. c * c * Redistribution and non-commercial use in source and binary forms, c * with or without modification, are permitted provided that the following c * conditions are met: c * c * 1. Redistributions of source code must retain the above copyright c * notice, this list of conditions and the following disclaimer. c * c * 2. Redistributions in binary form must reproduce the above copyright c * notice, this list of conditions and the following disclaimer in the c * documentation and/or other materials provided with the distribution. c * c * 3. All advertising materials mentioning features or use of this c * software must display the following acknowledgement: c * c * This product includes software developed by Information Promotion c * Agency of Japan and T.Katagiri c * c * 4. The name "Information Promotion Agency" or "T.Katagiri" should not be c * used to endorse or promote products derived from this software c * without specific prior written permission. c * c * 5. Any use of the source code or the binary in a commercial product, c * whether may it be the origial representation or in some modified form, c * is not permitted without specific prior written permission. c * c * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND c * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE c * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE c * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE c * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL c * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS c * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) c * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT c * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY c * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF c * SUCH DAMAGE. c */ subroutine PTred(A, u_x, u_y, x_k, y_k, rbuf, n, nx, ny, + ncelx, ncely, xlogp, cid, ncidx, ncidy, + U, ALP) include "mppf.h" common /MPI2d/MPP_COMM_X, MPP_COMM_Y, ISTATUS c === 引数宣言 real*8, dimension (0:nx-1, 0:ny-1) :: A real*8, dimension (0:ny-1) :: u_x, u_y real*8, dimension (0:nx-1) :: x_k real*8, dimension (0:ny-1) :: y_k real*8, dimension (0:nx-1) :: rbuf integer n, nx, ny integer ncelx, ncely integer xlogp integer cid, ncidx, ncidy real*8, dimension (1:nx, 1:n) :: U real*8, dimension (1:n) :: ALP c == general val. integer i, j, k, l, m, iter c == 1d control val. integer end_iter c == 2d control val. integer r_iter, d_iter integer original_length_x, original_length_y integer local_length_x, local_length_y integer block_length_x, block_length_y integer init_x, init_y integer pivot_PE_x, pivot_PE_y c == calculation val. real*8 sigma, sigma2, sigma2_t, al, mu, mu_t real*8 tmpr1, tmpr2, tmpr3, tmpr4, tmpr5, tmpr6, tmpr7, tmpr8 real*8 tmpu1, tmpu2, tmpu3, tmpu4, tmpu5, tmpu6 c == MPI val integer ierr c == parallel control function integer CYCLIC_OWNER, CYCLIC_LOC ierr = cid c ===== 制御変数設定 c r_iter : 列方向の大域所有要素 (1〜n) c d_iter : 行方向 r_iter = ncely*idfloor(dfloat(n)/dfloat(ncely)) + ncidy + 1 if (r_iter .gt. n) r_iter = r_iter - ncely d_iter = ncelx*idfloor(dfloat(n)/dfloat(ncelx)) + ncidx + 1 if (d_iter .gt. n) d_iter = d_iter - ncelx c === 最大のデータサイズ original_length_x = idceiling(dfloat(n)/dfloat(ncelx)) c === 自分のデータサイズ local_length_x = n / ncelx k = n - local_length_x * ncelx do i=0, ncelx-1 if (k .gt. 0) then if (ncidx .eq. i) then local_length_x = local_length_x + 1 endif endif k = k - 1 enddo c === 最大のデータサイズ original_length_y = idceiling(dfloat(n)/dfloat(ncely)) c === 自分のデータサイズ local_length_y = n / ncely k = n - local_length_y * ncely do i=0, ncely-1 if (k .gt. 0) then if (ncidy .eq. i) then local_length_y = local_length_y + 1 endif endif k = k - 1 enddo init_x = 0 init_y = 0 c ===== ループ最終値設定 if (r_iter .gt. d_iter) then end_iter = d_iter else end_iter = r_iter endif if (end_iter .gt. n-2) end_iter = n-2 do iter=0, end_iter-1 c === 制御変数初期化 pivot_PE_x = CYCLIC_OWNER(iter, ncelx) pivot_PE_y = CYCLIC_OWNER(iter, ncely) block_length_x = original_length_x - CYCLIC_LOC(iter, ncelx) block_length_y = original_length_y - CYCLIC_LOC(iter, ncely) c === Y方向 枢軸ベクトル列転送 if (pivot_PE_y .eq. ncidy) then call MPP_BCAST(A(init_x,init_y), block_length_y, & MPP_DOUBLE_PRECISION, ncidy, MPP_COMM_Y, ierr) *voption vec, indep do i=0,block_length_x-1 u_y(i) = A(init_x+i,init_y) enddo else call MPP_BCAST(u_y, block_length_y, MPP_DOUBLE_PRECISION, & pivot_PE_y, MPP_COMM_Y, ierr) endif c === 枢軸ベクトル計算 c == 二乗和計算 if (pivot_PE_x .eq. ncidx) u_y(0) = 0.0d0 sigma2_t = 0.0d0 *voption vec, indep do i=0,local_length_x-1 sigma2_t = sigma2_t + u_y(i)*u_y(i) enddo call MPP_ALLREDUCE(sigma2_t, sigma2, 1, & MPP_DOUBLE_PRECISION, MPP_SUM, MPP_COMM_X, ierr) sigma = dsqrt(sigma2) c == ノルム加算 k = CYCLIC_OWNER(iter+1, ncelx) if (k .eq. ncidx) then if (u_y(0) .lt. 0.0) sigma = -sigma al = 1.0d0 / (sigma2 + dabs(u_y(0)*sigma)) u_y(0) = u_y(0) + sigma call MPP_BCAST(al, 1, MPP_DOUBLE_PRECISION, & ncidx, MPP_COMM_X, ierr) else call MPP_BCAST(al, 1, MPP_DOUBLE_PRECISION, & k, MPP_COMM_X, ierr) endif c === X方向 枢軸ベクトル列転送 if (ncidx .eq. ncidy) then call MPP_BCAST(u_y, block_length_y, MPP_DOUBLE_PRECISION, & ncidy, MPP_COMM_X, ierr) *voption vec, indep do j=0,block_length_x-1 u_x(j) = u_y(j) enddo else call MPP_BCAST(u_x, block_length_y, MPP_DOUBLE_PRECISION, & ncidy, MPP_COMM_X, ierr) endif c === y = u_x A_k m = local_length_y/8 j = 0 do k=0, m-1 tmpr1 = 0.0d0 tmpr2 = 0.0d0 tmpr3 = 0.0d0 tmpr4 = 0.0d0 tmpr5 = 0.0d0 tmpr6 = 0.0d0 tmpr7 = 0.0d0 tmpr8 = 0.0d0 do i=0,local_length_x-1 tmpr1 = tmpr1 + A(init_x+i,init_y+j ) * u_y(i) tmpr2 = tmpr2 + A(init_x+i,init_y+j+1) * u_y(i) tmpr3 = tmpr3 + A(init_x+i,init_y+j+2) * u_y(i) tmpr4 = tmpr4 + A(init_x+i,init_y+j+3) * u_y(i) tmpr5 = tmpr5 + A(init_x+i,init_y+j+4) * u_y(i) tmpr6 = tmpr6 + A(init_x+i,init_y+j+5) * u_y(i) tmpr7 = tmpr7 + A(init_x+i,init_y+j+6) * u_y(i) tmpr8 = tmpr8 + A(init_x+i,init_y+j+7) * u_y(i) enddo y_k(j) = tmpr1 * al y_k(j+1) = tmpr2 * al y_k(j+2) = tmpr3 * al y_k(j+3) = tmpr4 * al y_k(j+4) = tmpr5 * al y_k(j+5) = tmpr6 * al y_k(j+6) = tmpr7 * al y_k(j+7) = tmpr8 * al j = j + 8 enddo l = modulo(local_length_y, 8) if (l .eq. 0) goto 100 do j=m*8, local_length_y-1 tmpr1 = 0.0d0 do i=0,local_length_x-1 tmpr1 = tmpr1 + A(init_x+i,init_y+j)*u_y(i) enddo y_k(j) = tmpr1*al enddo 100 continue call reduction_vector_tree_x(y_k, ny, rbuf, nx, 0, & ncelx, ncidx, block_length_y, xlogp) c === x = A_k u_y (対称のため対角成分の u_yをブロードキャスト) c === Y方向 y_k ベクトル列転送 if (ncidx .eq. ncidy) then call MPP_BCAST(y_k, block_length_y, MPP_DOUBLE_PRECISION, & ncidx, MPP_COMM_Y, ierr) *voption vec, indep do j=0,block_length_y-1 x_k(j) = y_k(j) enddo else call MPP_BCAST(x_k, block_length_x, MPP_DOUBLE_PRECISION, & ncidx, MPP_COMM_Y, ierr) endif c === mu = u_k * u_y mu_t = 0.0d0 *voption vec, indep do i=0, local_length_x-1 mu_t = mu_t + x_k(i)*u_y(i) enddo mu_t = mu_t * al call MPP_ALLREDUCE(mu_t, mu, 1, MPP_DOUBLE_PRECISION, & MPP_SUM, MPP_COMM_X, ierr) c *** 行列更新部 c === A_k = A_k + u(mu u_k^T - y_k^T) - x u_k^T m = local_length_y/6 do k=0,m-1 j = k*6 tmpu1 = u_x(j) tmpr1 = mu * tmpu1 - y_k(j) tmpu2 = u_x(j+1) tmpr2 = mu * tmpu2 - y_k(j+1) tmpu3 = u_x(j+2) tmpr3 = mu * tmpu3 - y_k(j+2) tmpu4 = u_x(j+3) tmpr4 = mu * tmpu4 - y_k(j+3) tmpu5 = u_x(j+4) tmpr5 = mu * tmpu5 - y_k(j+4) tmpu6 = u_x(j+5) tmpr6 = mu * tmpu6 - y_k(j+5) *voption vec, indep do i=0,local_length_x-1 A(init_x+i,init_y+j) = A(init_x+i,init_y+j) & + u_y(i)*tmpr1 - x_k(i)*tmpu1 A(init_x+i,init_y+j+1) = A(init_x+i,init_y+j+1) & + u_y(i)*tmpr2 - x_k(i)*tmpu2 A(init_x+i,init_y+j+2) = A(init_x+i,init_y+j+2) & + u_y(i)*tmpr3 - x_k(i)*tmpu3 A(init_x+i,init_y+j+3) = A(init_x+i,init_y+j+3) & + u_y(i)*tmpr4 - x_k(i)*tmpu4 A(init_x+i,init_y+j+4) = A(init_x+i,init_y+j+4) & + u_y(i)*tmpr5 - x_k(i)*tmpu5 A(init_x+i,init_y+j+5) = A(init_x+i,init_y+j+5) & + u_y(i)*tmpr6 - x_k(i)*tmpu6 enddo enddo l = modulo(local_length_y,6) if (l .eq. 0) goto 300 do j=m*6,local_length_y-1 tmpu1 = u_x(j) tmpr1 = mu * tmpu1 - y_k(j) *voption vec, indep do i=0,local_length_x-1 A(init_x+i,init_y+j) = A(init_x+i,init_y+j) & + u_y(i)*tmpr1 - x_k(i)*tmpu1 enddo enddo 300 continue c === 制御変数 if (pivot_PE_x .eq. ncidx) then init_x = init_x + 1 local_length_x = local_length_x - 1 endif if (pivot_PE_y .eq. ncidy) then init_y = init_y + 1 local_length_y = local_length_y - 1 endif c === Householder逆変換用 do i=0,block_length_x-1 U(i+1,iter+1) = u_y(i) enddo ALP(iter+1) = al enddo c ===== end of iter c *** ダミーループ do iter=end_iter,n-3 c === 制御変数初期化 pivot_PE_x = CYCLIC_OWNER(iter, ncelx) pivot_PE_y = CYCLIC_OWNER(iter, ncely) block_length_x = original_length_x - CYCLIC_LOC(iter, ncelx) block_length_y = original_length_y - CYCLIC_LOC(iter, ncely) c === Y方向 枢軸ベクトル列転送 call MPP_BCAST(u_y, block_length_y, MPP_DOUBLE_PRECISION, & pivot_PE_y, MPP_COMM_Y, ierr) c === 枢軸ベクトル計算 c == 二乗和計算 if (pivot_PE_x .eq. ncidx) u_y(0) = 0.0d0 sigma2_t = 0.0d0 *voption vec, indep do i=0,local_length_x-1 sigma2_t = sigma2_t + u_y(i)*u_y(i) enddo call MPP_ALLREDUCE(sigma2_t, sigma2, 1, & MPP_DOUBLE_PRECISION, MPP_SUM, MPP_COMM_X, ierr) sigma = dsqrt(sigma2) c == ノルム加算 k = CYCLIC_OWNER(iter+1, ncelx) if (k .eq. ncidx) then if (u_y(0) .lt. 0.0) sigma = -sigma al = 1.0d0 / (sigma2 + dabs(u_y(0)*sigma)) u_y(0) = u_y(0) + sigma call MPP_BCAST(al, 1, MPP_DOUBLE_PRECISION, & ncidx, MPP_COMM_X, ierr) else call MPP_BCAST(al, 1, MPP_DOUBLE_PRECISION, & k, MPP_COMM_X, ierr) endif c === X方向 枢軸ベクトル列転送 if (ncidx .eq. ncidy) then call MPP_BCAST(u_y, block_length_y, MPP_DOUBLE_PRECISION, & ncidy, MPP_COMM_X, ierr) *voption vec, indep do j=0,block_length_x-1 u_x(j) = u_y(j) enddo else call MPP_BCAST(u_x, block_length_y, MPP_DOUBLE_PRECISION, & ncidy, MPP_COMM_X, ierr) endif c === y = u_x A_k do j=0,block_length_y-1 y_k(j) = 0.0d0 enddo call reduction_vector_tree_x(y_k, ny, rbuf, nx, 0, & ncelx, ncidx, block_length_y, xlogp) c === x = A_k u_y (対称のため対角成分の u_yをブロードキャスト) c === Y方向 y_k ベクトル列転送 if (ncidx .eq. ncidy) then call MPP_BCAST(y_k, block_length_x, MPP_DOUBLE_PRECISION, & ncidx, MPP_COMM_Y, ierr) *voption vec, indep do j=0,block_length_y-1 x_k(j) = y_k(j) enddo else call MPP_BCAST(x_k, block_length_x, MPP_DOUBLE_PRECISION, & ncidx, MPP_COMM_Y, ierr) endif mu_t = 0.0d0 call MPP_ALLREDUCE(mu_t, mu, 1, MPP_DOUBLE_PRECISION, & MPP_SUM, MPP_COMM_X, ierr) c === 制御変数 if (pivot_PE_x .eq. ncidx) then init_x = init_x + 1 local_length_x = local_length_x - 1 endif if (pivot_PE_y .eq. ncidy) then init_y = init_y + 1 local_length_y = local_length_y - 1 endif c === Householder逆変換用 do i=0,block_length_x-1 U(i+1,iter+1) = u_y(i) enddo ALP(iter+1) = al enddo c ==== end of dummy loop return end c ============================================== c = 並列Householder 三重対角化ルーチンの終り = c ==============================================