summaryrefslogtreecommitdiff
path: root/source/math.h
diff options
context:
space:
mode:
authortalha <sarcxd@gmail.com>2025-01-14 22:59:22 +0500
committertalha <sarcxd@gmail.com>2025-01-14 22:59:22 +0500
commitce28ed66a5cb155285a5971013f54919757cd65a (patch)
tree70505fcbbce543255a0ef24f2caa2a3d932ce6ba /source/math.h
parent05f82730e6d74ab604b02504cc928e764542a8c2 (diff)
BRANCH: Working on benchmarking new renderer, adding simdbranch/simd-math
Diffstat (limited to 'source/math.h')
-rwxr-xr-xsource/math.h153
1 files changed, 95 insertions, 58 deletions
diff --git a/source/math.h b/source/math.h
index 3f20e43..1ac3605 100755
--- a/source/math.h
+++ b/source/math.h
@@ -8,26 +8,21 @@
#define ABS(x) ((x) < 0 ? (-(x)) : (x))
#define MIN(x,y) ((x) < (y) ? (y) : (x))
-// @todo:
+// @todo:
// - make everything simd
-
-// @note: Regarding functions written for completeness sake
-// These operations are just defined and not expressed.
-// They are kept here for completeness sake BUT
-// since I have not had to do anything related to these, I have not created them.
-
+#define USE_SSE 1
r32 clampf(r32 x, r32 bottom, r32 top)
{
if (x < bottom)
{
x = bottom;
- }
+ }
else if (x > top)
{
x = top;
}
-
+
return x;
}
@@ -99,12 +94,12 @@ union Vec2 {
};
union Vec3 {
- struct {
- r32 x;
- r32 y;
- r32 z;
- };
- r32 data[3];
+ struct {
+ r32 x;
+ r32 y;
+ r32 z;
+ };
+ r32 data[3];
Vec2 v2() {
return Vec2{x, y};
@@ -114,22 +109,21 @@ union Vec3 {
typedef Vec3 RGB;
union Vec4 {
- struct {
- r32 x;
- r32 y;
- r32 z;
- r32 w;
- };
- r32 data[4];
+ struct {
+ r32 x;
+ r32 y;
+ r32 z;
+ r32 w;
+ };
+ r32 data[4];
+ __m128 sse;
};
// @note: matrix and all matrix operations will be done in column major
-// @todo: be able to specify and configure this in the future
-// possibly through separate functions
union Mat4 {
- Vec4 row[4];
- r32 data[4][4];
- r32 buffer[16];
+ Vec4 row[4];
+ r32 data[4][4];
+ r32 buffer[16];
};
// ==== Vec2 ====
@@ -176,8 +170,8 @@ Vec2 divide2v(Vec2 a, Vec2 b) {
}
r32 magnitude2v(Vec2 v) {
- r32 res = sqrtf(SQUARE(v.x) + SQUARE(v.y));
- return res;
+ r32 res = sqrtf(SQUARE(v.x) + SQUARE(v.y));
+ return res;
}
Vec2 normalize2v(Vec2 v) {
@@ -192,7 +186,7 @@ Vec2 normalize2v(Vec2 v) {
Vec3 vec3(r32 s);
Vec3 subtract3vf(Vec3 v, r32 scaler);
Vec3 multiply3v(Vec3 a, Vec3 b);
-Vec3 divide3v(Vec3 a, Vec3 b);
+Vec3 divide3v(Vec3 a, Vec3 b);
Vec3 add3vf(Vec3 vec, r32 scaler)
{
@@ -200,7 +194,7 @@ Vec3 add3vf(Vec3 vec, r32 scaler)
res.x = vec.x + scaler;
res.y = vec.y + scaler;
res.z = vec.z + scaler;
-
+
return res;
}
@@ -220,7 +214,7 @@ Vec3 subtract3v(Vec3 a, Vec3 b)
res.x = a.x - b.x;
res.y = a.y - b.y;
res.z = a.z - b.z;
-
+
return res;
}
@@ -230,7 +224,7 @@ Vec3 multiply3vf(Vec3 vec, r32 scaler)
res.x = vec.x * scaler;
res.y = vec.y * scaler;
res.z = vec.z * scaler;
-
+
return res;
}
@@ -241,7 +235,7 @@ Vec3 divide3vf(Vec3 vec, r32 scaler)
res.x = vec.x / scaler;
res.y = vec.y / scaler;
res.z = vec.z / scaler;
-
+
return res;
}
@@ -250,9 +244,9 @@ r32 dot3v(Vec3 a, Vec3 b)
r32 x = a.x * b.x;
r32 y = a.y * b.y;
r32 z = a.z * b.z;
-
+
r32 res = x + y + z;
-
+
return res;
}
@@ -275,25 +269,46 @@ Vec3 cross3v(Vec3 a, Vec3 b)
res.x = (a.y * b.z) - (a.z * b.y);
res.y = (a.z * b.x) - (a.x * b.z);
res.z = (a.x * b.y) - (a.y * b.x);
-
+
return res;
}
// ============================================== Vec4, Mat4 ==============================================
-
+static u64 tick_freq = SDL_GetPerformanceFrequency();
+static u64 cum_math_ticks = 0;
+static r64 cum_math_time = 0.0f;
// ==================== Vec4 ====================
Vec4 vec4(r32 s)
{
Vec4 res;
+#if USE_SSE
+ res.sse = _mm_set_ps1(s);
+#else
res.x = s;
res.y = s;
res.z = s;
res.w = s;
-
+#endif
+
return res;
}
-// @note: Written for completeness sake.
+Vec4 vec4(r32 x, r32 y, r32 z, r32 w)
+{
+ Vec4 res;
+#if USE_SSE
+ res.sse = _mm_setr_ps(x, y, z, w);
+#else
+ res.x = x;
+ res.y = y;
+ res.z = z;
+ res.w = w;
+#endif
+
+ return res;
+}
+
+// @note: Written for completeness sake.
Vec4 add4vf(Vec4 vec, r32 scaler);
Vec4 add4v(Vec4 a, Vec4 b);
Vec4 subtract4vf(Vec4 vec, r32 scaler);
@@ -318,7 +333,7 @@ Mat4 diag4m(r32 value) {
res.data[1][1] = value;
res.data[2][2] = value;
res.data[3][3] = value;
-
+
return res;
}
@@ -345,7 +360,7 @@ Mat4 add4m(Mat4 a, Mat4 b)
res.data[3][1] = a.data[3][1] + b.data[3][1];
res.data[3][2] = a.data[3][2] + b.data[3][2];
res.data[3][3] = a.data[3][3] + b.data[3][3];
-
+
return res;
}
@@ -372,13 +387,31 @@ Mat4 subtract4m(Mat4 a, Mat4 b)
res.data[3][1] = a.data[3][1] - b.data[3][1];
res.data[3][2] = a.data[3][2] - b.data[3][2];
res.data[3][3] = a.data[3][3] - b.data[3][3];
-
+
return res;
}
Vec4 multiply4mv(Mat4 m, Vec4 v)
{
- Vec4 res = vec4(0);
+ r64 prev_tick = SDL_GetPerformanceCounter();
+
+ Vec4 res = vec4(0);
+#if USE_SSE
+ __m128 scalar = _mm_shuffle_ps(v.sse, v.sse, 0x0);
+ res.sse = _mm_mul_ps(scalar, m.row[0].sse);
+
+ scalar = _mm_shuffle_ps(v.sse, v.sse, 0x55);
+ __m128 mult = _mm_mul_ps(scalar, m.row[1].sse);
+ res.sse = _mm_add_ps(res.sse, mult);
+
+ scalar = _mm_shuffle_ps(v.sse, v.sse, 0xaa);
+ mult = _mm_mul_ps(scalar, m.row[2].sse);
+ res.sse = _mm_add_ps(res.sse, mult);
+
+ scalar = _mm_shuffle_ps(v.sse, v.sse, 0xff);
+ mult = _mm_mul_ps(scalar, m.row[3].sse);
+ res.sse = _mm_add_ps(res.sse, mult);
+#else
res.x += v.x*m.data[0][0];
res.y += v.x*m.data[0][1];
@@ -399,11 +432,15 @@ Vec4 multiply4mv(Mat4 m, Vec4 v)
res.y += v.w*m.data[3][1];
res.z += v.w*m.data[3][2];
res.w += v.w*m.data[3][3];
+#endif
+
+ r64 curr_tick = SDL_GetPerformanceCounter();
+ cum_math_ticks += curr_tick - prev_tick;
return res;
}
-Mat4 multiply4m(Mat4 a, Mat4 b)
+Mat4 multiply4m(Mat4 a, Mat4 b)
{
Mat4 res = { 0 };
@@ -416,15 +453,15 @@ Mat4 multiply4m(Mat4 a, Mat4 b)
}
// ==== Matrix Transformation ====
-Mat4 scaling_matrix4m(r32 x, r32 y, r32 z)
+Mat4 scaling_matrix4m(r32 x, r32 y, r32 z)
{
// generates a 4x4 scaling matrix for scaling each of the x,y,z axis
- Mat4 res = diag4m(1.0f);
- res.data[0][0] = x;
- res.data[1][1] = y;
- res.data[2][2] = z;
-
- return res;
+ Mat4 res = diag4m(1.0f);
+ res.data[0][0] = x;
+ res.data[1][1] = y;
+ res.data[2][2] = z;
+
+ return res;
}
Mat4 translation_matrix4m(r32 x, r32 y, r32 z)
@@ -479,7 +516,7 @@ Mat4 lookat4m(Vec3 up, Vec3 forward, Vec3 right, Vec3 position)
/*
* @note: The construction of the lookat matrix is not obvious. For that reason here is the supplemental matrial I have used to understand
* things while I maintain my elementary understanding of linear algebra.
- * 1. This youtube video (https://www.youtube.com/watch?v=3ZmqJb7J5wE) helped me understand why we invert matrices.
+ * 1. This youtube video (https://www.youtube.com/watch?v=3ZmqJb7J5wE) helped me understand why we invert matrices.
* It is because, we are moving from the position matrix which is a global to the view matrix which
* is a local. It won't be very clear from this illustration alone, so you would be best served watching the video and recollecting and understanding from there.
* 2. This article (https://twodee.org/blog/17560) derives (or rather shows), in a very shallow way how we get to the look at matrix.
@@ -493,7 +530,7 @@ Mat4 lookat4m(Vec3 up, Vec3 forward, Vec3 right, Vec3 position)
res.data[3][1] = -dot3v(up, position);
res.data[3][2] = -dot3v(forward, position);
res.data[3][3] = 1.0f;
-
+
return res;
}
@@ -504,7 +541,7 @@ Vec3 camera_look_around(r32 angle_pitch, r32 angle_yaw)
camera_look.y = sinf(angle_pitch);
camera_look.z = sinf(angle_yaw) * cosf(angle_pitch);
camera_look = normalize3v(camera_look);
-
+
return camera_look;
}
@@ -512,14 +549,14 @@ Mat4 camera_create4m(Vec3 camera_pos, Vec3 camera_look, Vec3 camera_up)
{
// @note: We do this because this allows the camera to have the axis it looks at
// inwards be the +z axis.
- // If we did not do this, then the inward axis the camera looks at would be negative.
+ // If we did not do this, then the inward axis the camera looks at would be negative.
// I am still learning from learnopengl.com but I imagine that this was done for conveniences' sake.
Vec3 camera_forward_dir = normalize3v(subtract3v(camera_pos, camera_look));
Vec3 camera_right_dir = normalize3v(cross3v(camera_up, camera_forward_dir));
Vec3 camera_up_dir = normalize3v(cross3v(camera_forward_dir, camera_right_dir));
-
+
Mat4 res = lookat4m(camera_up_dir, camera_forward_dir, camera_right_dir, camera_pos);
-
+
return res;
}