milupHPC documentation
  • src
  • cuda_utils
linalg.cu
Go to the documentation of this file.
1#include "../../include/cuda_utils/linalg.cuh"
2
3namespace CudaUtils {
4
5 __device__ int sign(real x) {
6 if (x < 0) { return -1; }
7 else if (x > 0) { return 1; }
8 else { return 0; }
9 }
10
11 __device__ int stressIndex(int particleIndex, int row, int col)
12 {
13 return particleIndex*DIM*DIM+row*DIM+col;
14 }
15
16 __device__ void copyMatrix(real src[DIM][DIM], real dst[DIM][DIM]) {
17 int i, j;
18
19 for (i = 0; i < DIM; i++) {
20 for (j = 0; j < DIM; j++) {
21 dst[i][j] = src[i][j];
22 }
23 }
24
25 }
26
27 __device__ void transposeMatrix(real m[DIM][DIM]) {
28 int i, j;
29 real mt[DIM][DIM];
30 for (i = 0; i < DIM; i++) {
31 for (j = 0; j < DIM; j++) {
32 mt[j][i] = m[i][j];
33 }
34 }
35 for (i = 0; i < DIM; i++) {
36 for (j = 0; j < DIM; j++) {
37 m[i][j] = mt[i][j];
38 }
39 }
40 }
41
42 // calculates C = A B and stores in C
43 __device__ void multiplyMatrix(real A[DIM][DIM], real B[DIM][DIM], real C[DIM][DIM]) {
44 int i, j, k;
45
46 real vprime[DIM][DIM];
47
48 for (i = 0; i < DIM; i++) {
49 for (j = 0; j < DIM; j++) {
50 vprime[i][j] = 0.0;
51 }
52 }
53
54 for (i = 0; i < DIM; i++) {
55 for (j = 0; j < DIM; j++) {
56 for (k = 0; k < DIM; k++) {
57 vprime[i][j] += A[i][k] * B[k][j];
58 }
59 }
60 }
61 for (i = 0; i < DIM; i++) {
62 for (j = 0; j < DIM; j++) {
63 C[i][j] = vprime[i][j];
64 }
65 }
66
67 }
68
69 __device__ void multiply(real A[][DIM], real B[][DIM], real C[][DIM]) {
70 int i, j, k;
71 for (i = 0; i < DIM; i++) {
72 for (j = 0; j < DIM; j++) {
73 for (k = 0; k < DIM; k++) {
74 C[i][j] += A[i][k]*B[k][j];
75 }
76 }
77 }
78 }
79
80 __device__ void identityMatrix(real A[DIM][DIM]) {
81 int i, j;
82 for (i = 0; i < DIM; i++) {
83 for (j = 0; j < DIM; j++) {
84 A[i][j] = 0.0;
85 }
86 A[i][i] = 1.0;
87 }
88 }
89
90
91 // returns the indices of the greatest non-diagonal element of M
92 __device__ int maxMatrix(real M[DIM][DIM], int *e, int *f, real *elmax) {
93 int i, j;
94 real max = 0.0;
95 int ierror = 1;
96
97 for (i = 0; i < DIM; i++) {
98 for (j = 0; j < DIM; j++) {
99 if (i == j)
100 continue;
101 if (cuda::math::abs(M[i][j]) >= max) {
102 max = cuda::math::abs(M[i][j]);
103 *e = i;
104 *f = j;
105 ierror = 0;
106 }
107 }
108 }
109 *elmax = max;
110 return ierror;
111 }
112
113
114 //
115 // help function for the jacobi method
116 // returns: M' = A^T M A, and A_ef = s = -A_ef, A_ee = A_ff = c
117 //
118 __device__ void rotateMatrix(volatile real m[DIM][DIM], volatile real c, volatile real s, volatile int e,
119 volatile int f) {
120 int i, j;
121 volatile real mprime[DIM][DIM];
122
123 // first copy the matrix
124 for (i = 0; i < DIM; i++)
125 for (j = 0; j < DIM; j++)
126 mprime[i][j] = m[i][j];
127
128 // now the elements that change
129 mprime[e][e] = c * c * m[e][e] + s * s * m[f][f] - 2 * s * c * m[e][f];
130 mprime[f][f] = c * c * m[f][f] + s * s * m[e][e] + 2 * s * c * m[e][f];
131 mprime[e][f] = (c * c - s * s) * m[e][f] + s * c * (m[e][e] - m[f][f]);
132 mprime[f][e] = mprime[e][f];
133
134 // the other elements in columns and rows e, f
135 // actually, this is only one in 3D and 0 in 2D
136 for (i = 0; i < DIM; i++) {
137 if (i == f || i == e)
138 continue;
139 mprime[e][i] = c * m[i][e] - s * m[i][f];
140 mprime[i][e] = mprime[e][i];
141 mprime[f][i] = c * m[i][f] + s * m[i][e];
142 mprime[i][f] = mprime[f][i];
143 }
144
145 // set the matrix to the rotated one
146 for (i = 0; i < DIM; i++)
147 for (j = 0; j < DIM; j++)
148 m[i][j] = mprime[i][j];
149 }
150
151
152 //
153 // computes all eigenvalues and eigenvectors of the _symmetric_ matrix M
154 // using the jacobi method and stores them in eigenvals and the eigenvecs as columns
155 // in the transformation matrix v
156 //
157 // returns the number of iterations
158 //
159 __device__ int calculateAllEigenvalues(real M[DIM][DIM], real eigenvalues[DIM], real v[DIM][DIM]) {
160 int i, j;
161 real diagM[DIM][DIM] = {0.0,};
162 real c, s, t, thta;
163 real A[DIM][DIM];
164 real vtmp[DIM][DIM];
165 int e, f;
166 int error;
167 real max = -1e300;
168 int nit = 0;
169 i = j = e = f = 0;
170 c = s = t = thta = 0.0;
171 error = 0;
172
173#define EPS_JACOBI 1e-10
174
175 for (i = 0; i < DIM; i++) {
176 for (j = 0; j < DIM; j++) {
177 diagM[i][j] = M[i][j];
178 v[i][j] = 0.0;
179 }
180 v[i][i] = 1.0;
181 }
182
183 do {
184 nit++;
185 error = maxMatrix(diagM, &e, &f, &max);
186 if (error) {
187 printf("No maximum element found.\n");
188 }
189 if (max > 0) {
190 // rotate matrix
191 thta = (diagM[f][f] - diagM[e][e]) / (2 * diagM[e][f]);
192 if (thta < 0)
193 t = -1. / (cuda::math::abs(thta) + cuda::math::sqrt(thta * thta + 1));
194 else
195 t = 1. / (cuda::math::abs(thta) + cuda::math::sqrt(thta * thta + 1));
196 // the elements of the rotation matrix
197 c = 1. / (cuda::math::sqrt(t * t + 1));
198 s = t * c;
199 // do diagM' = A^T diagM A
200 rotateMatrix(diagM, c, s, e, f);
201 identityMatrix(A);
202 A[e][e] = c;
203 A[f][f] = c;
204 A[e][f] = -s;
205 A[f][e] = s;
206 // calculate the eigenvectors
207 multiplyMatrix(v, A, vtmp);
208 copyMatrix(vtmp, v);
209 }
210 } while (max > EPS_JACOBI);
211
212 for (i = 0; i < DIM; i++) {
213 eigenvalues[i] = diagM[i][i];
214 }
215 return nit;
216 }
217
218
219 //
220 // computes the eigenvalues of the _symmetric_ matrix M
221 // using the jacobi method
222 // returns the greatest eigenvalue
223 //
224 __device__ real calculateMaxEigenvalue(real M[DIM][DIM]) {
225 int i, j;
226 real diagM[DIM][DIM] = {0.0,};
227 real c, s, t, thta;
228 int e, f;
229 int error;
230 real max;
231 real max_ev;
232 int nit = 0;
233 i = j = e = f = 0;
234 c = s = t = thta = 0.0;
235 max = max_ev = 0;
236 error = 0;
237
238
239#define EPS_JACOBI 1e-10
240
241 for (i = 0; i < DIM; i++)
242 for (j = 0; j < DIM; j++)
243 diagM[i][j] = M[i][j];
244
245 do {
246 nit++;
247 error = maxMatrix(diagM, &e, &f, &max);
248 if (error) {
249 printf("No maximum element found.\n");
250 }
251 if (max > 0) {
252 // rotate matrix
253 thta = (diagM[f][f] - diagM[e][e]) / (2 * diagM[e][f]);
254 if (thta < 0)
255 t = -1. / (cuda::math::abs(thta) + cuda::math::sqrt(thta * thta + 1));
256 else
257 t = 1. / (cuda::math::abs(thta) + cuda::math::sqrt(thta * thta + 1));
258 // the elements of the rotation matrix
259 c = 1. / (cuda::math::sqrt(t * t + 1));
260 s = t * c;
261 // do diagM' = A^T diagM A
262 rotateMatrix(diagM, c, s, e, f);
263 }
264 } while (max > EPS_JACOBI || nit < 5);
265
266 max_ev = diagM[0][0];
267 for (i = 1; i < DIM; i++) {
268 if (diagM[i][i] > max_ev) {
269 max_ev = diagM[i][i];
270 }
271 }
272 return max_ev;
273 }
274
275 __device__ real det2x2(real a, real b, real c, real d) {
276 return a * d - c * b;
277 }
278
279 __device__ int invertMatrix(real *m, real *inverted) {
280 real det;
281#if (DIM == 2)
282 real a, b, c, d;
283 a = m[0*DIM+0];
284 b = m[0*DIM+1];
285 c = m[1*DIM+0];
286 d = m[1*DIM+1];
287
288 det = det2x2(a,b,c,d);
289 // if (det < 1e-8) return -1;
290 // if (det < 1e-10) det = 1e-10;
291 det = 1./det;
292
293 inverted[0*DIM+0] = det*d;
294 inverted[0*DIM+1] = -det*b;
295 inverted[1*DIM+0] = -det*c;
296 inverted[1*DIM+1] = det*a;
297#elif (DIM == 3)
298 det = m[0 * DIM + 0] * (m[1 * DIM + 1] * m[2 * DIM + 2] - m[2 * DIM + 1] * m[1 * DIM + 2])
299 - m[0 * DIM + 1] * (m[1 * DIM + 0] * m[2 * DIM + 2] - m[1 * DIM + 2] * m[2 * DIM + 0])
300 + m[0 * DIM + 2] * (m[1 * DIM + 0] * m[2 * DIM + 1] - m[1 * DIM + 1] * m[2 * DIM + 0]);
301
302 // inverse determinante
303
304 if (det < 1e-8) return -1;
305 det = 1.0 / det;
306
307 inverted[0*DIM+0] = (m[1*DIM+ 1] * m[2*DIM+ 2] - m[2*DIM+ 1] * m[1*DIM+ 2]) * det;
308 inverted[0*DIM+1] = (m[0*DIM+ 2] * m[2*DIM+ 1] - m[0*DIM+ 1] * m[2*DIM+ 2]) * det;
309 inverted[0*DIM+2] = (m[0*DIM+ 1] * m[1*DIM+ 2] - m[0*DIM+ 2] * m[1*DIM+ 1]) * det;
310 inverted[1*DIM+0] = (m[1*DIM+ 2] * m[2*DIM+ 0] - m[1*DIM+ 0] * m[2*DIM+ 2]) * det;
311 inverted[1*DIM+1] = (m[0*DIM+ 0] * m[2*DIM+ 2] - m[0*DIM+ 2] * m[2*DIM+ 0]) * det;
312 inverted[1*DIM+2] = (m[1*DIM+ 0] * m[0*DIM+ 2] - m[0*DIM+ 0] * m[1*DIM+ 2]) * det;
313 inverted[2*DIM+0] = (m[1*DIM+ 0] * m[2*DIM+ 1] - m[2*DIM+ 0] * m[1*DIM+ 1]) * det;
314 inverted[2*DIM+1] = (m[2*DIM+ 0] * m[0*DIM+ 1] - m[0*DIM+ 0] * m[2*DIM+ 1]) * det;
315 inverted[2*DIM+2] = (m[0*DIM+ 0] * m[1*DIM+ 1] - m[1*DIM+ 0] * m[0*DIM+ 1]) * det;
316#endif
317
318 return 1;
319 }
320}
EPS_JACOBI
#define EPS_JACOBI
CudaUtils
Definition: cuda_utilities.cuh:100
CudaUtils::calculateAllEigenvalues
__device__ int calculateAllEigenvalues(real M[DIM][DIM], real eigenvalues[DIM], real v[DIM][DIM])
Computes all eigenvalues and eigenvectors of the symmetric matrix M.
Definition: linalg.cu:159
CudaUtils::maxMatrix
__device__ int maxMatrix(real M[DIM][DIM], int *e, int *f, real *elmax)
Returns the indices of the greatest non-diagonal element of the matrix M.
Definition: linalg.cu:92
CudaUtils::stressIndex
__device__ int stressIndex(int particleIndex, int row, int col)
map [i][j] to [i*DIM*DIM+j] for the tensors
Definition: linalg.cu:11
CudaUtils::calculateMaxEigenvalue
__device__ real calculateMaxEigenvalue(real M[DIM][DIM])
Computes the eigenvalues of the symmetric matrix M.
Definition: linalg.cu:224
CudaUtils::transposeMatrix
__device__ void transposeMatrix(real m[DIM][DIM])
Transpose matrix.
Definition: linalg.cu:27
CudaUtils::copyMatrix
__device__ void copyMatrix(real src[DIM][DIM], real dst[DIM][DIM])
Deep copy of matrix.
Definition: linalg.cu:16
CudaUtils::sign
__device__ int sign(real x)
Get sign of floating point variable.
Definition: linalg.cu:5
CudaUtils::det2x2
__device__ real det2x2(real a, real b, real c, real d)
Determinant of a 2x2 matrix.
Definition: linalg.cu:275
CudaUtils::multiplyMatrix
__device__ void multiplyMatrix(real A[DIM][DIM], real B[DIM][DIM], real C[DIM][DIM])
Definition: linalg.cu:43
CudaUtils::invertMatrix
__device__ int invertMatrix(real *m, real *inverted)
Invert matrix.
Definition: linalg.cu:279
CudaUtils::rotateMatrix
__device__ void rotateMatrix(volatile real m[DIM][DIM], volatile real c, volatile real s, volatile int e, volatile int f)
Rotate matrix.
Definition: linalg.cu:118
CudaUtils::multiply
__device__ void multiply(real A[][DIM], real B[][DIM], real C[][DIM])
Definition: linalg.cu:69
CudaUtils::identityMatrix
__device__ void identityMatrix(real A[DIM][DIM])
Return identity matrix.
Definition: linalg.cu:80
cuda::math::sqrt
__device__ real sqrt(real a)
Square root of a floating point value.
Definition: cuda_utilities.cu:456
cuda::math::abs
__device__ real abs(real a)
Absolute value of a floating point value.
Definition: cuda_utilities.cu:448
cuda::math::max
__device__ real max(real a, real b)
Maximum value out of two floating point values.
Definition: cuda_utilities.cu:431
real
double real
Definition: parameter.h:15
DIM
#define DIM
Dimension of the problem.
Definition: parameter.h:38

milupHPC - src/cuda_utils/linalg.cu Source File
Generated on Wed Aug 31 2022 12:16:52 by Doxygen 1.9.3