summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortalha <sarcxd@gmail.com>2025-01-14 22:59:22 +0500
committertalha <sarcxd@gmail.com>2025-01-14 22:59:22 +0500
commitce28ed66a5cb155285a5971013f54919757cd65a (patch)
tree70505fcbbce543255a0ef24f2caa2a3d932ce6ba
parent05f82730e6d74ab604b02504cc928e764542a8c2 (diff)
BRANCH: Working on benchmarking new renderer, adding simdbranch/simd-math
-rwxr-xr-xsource/main.cpp395
-rwxr-xr-xsource/math.h153
2 files changed, 279 insertions, 269 deletions
diff --git a/source/main.cpp b/source/main.cpp
index 49f16fa..a4d7b2c 100755
--- a/source/main.cpp
+++ b/source/main.cpp
@@ -7,37 +7,6 @@
#include <ft2build.h>
#include FT_FREETYPE_H
-/*
-* Project Estimation/Target ~ 1 - 2 months (was so off on this)
-* well, to be fair, if I account for actual time worked, then I was not
-* so off on my estimate, I think I am still in the estimation range
-* Tasks:
-* DONE:
-* - gravity - very barebones version done
-* - horizontal motion on ground
-* - accelerate
-* - constant speed
-* - decelerate to give impression of sliding
-* - horizontal motion when falling
-* - inertia-like movement when accelerating prior to falling
-* - loss of inertia when player was at high speed before
-* sliding off and other small movement when in free fall
-* - movement and deceleration when not moving in free fall
-* - Jumping
-* - Fixed framerate to prevent weird quirks with movement
-* - Basic camera follower
-* - move from row-major to column major setup for math library
-* TODO:
-* - Efficient Quad Renderer
-* - Some way to make and define levels
-* - Level Creation
-* - Update camera follower for centering player in view (with limits) after
-* a few seconds (maybe like 2 seconds)
-* - Level completion Object
-* - Implement Broad Phase Collision for efficient collision handling
-* - Audio
-*/
-
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
@@ -144,9 +113,9 @@ struct GLRenderer {
r32_array cq_mvp_batch;
r32_array cq_color_batch;
- // ui text
+ // ui text
TextState ui_text;
-};
+};
struct Controller {
b8 move_up;
@@ -218,14 +187,14 @@ u32 gl_shader_program(char* vs, char* fs)
{
int status;
char info_log[512];
-
-
+
+
// =============
// vertex shader
u32 vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &vs, NULL);
glCompileShader(vertex_shader);
-
+
glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &status);
if (status == 0)
{
@@ -233,14 +202,14 @@ u32 gl_shader_program(char* vs, char* fs)
printf("== ERROR: Vertex Shader Compilation Failed ==\n");
printf("%s\n", info_log);
}
-
-
+
+
// ===============
// fragment shader
u32 fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, &fs, NULL);
glCompileShader(fragment_shader);
-
+
glGetShaderiv(fragment_shader, GL_COMPILE_STATUS, &status);
if (status == 0)
{
@@ -248,16 +217,16 @@ u32 gl_shader_program(char* vs, char* fs)
printf("== ERROR: Fragment Shader Compilation Failed ==\n");
printf("%s\n", info_log);
}
-
-
+
+
// ==============
// shader program
u32 shader_program = glCreateProgram();
-
+
glAttachShader(shader_program, vertex_shader);
glAttachShader(shader_program, fragment_shader);
glLinkProgram(shader_program);
-
+
glGetProgramiv(shader_program, GL_LINK_STATUS, &status);
if(status == 0)
{
@@ -265,10 +234,10 @@ u32 gl_shader_program(char* vs, char* fs)
printf("== ERROR: Shader Program Linking Failed\n");
printf("%s\n", info_log);
}
-
+
glDeleteShader(vertex_shader);
glDeleteShader(fragment_shader);
-
+
return shader_program;
}
@@ -281,14 +250,14 @@ u32 gl_shader_program_from_path(const char* vspath, const char* fspath)
printf("Error! Failed to read vertex shader file at path %s\n", vspath);
return 0;
}
-
+
char* fs = (char*)SDL_LoadFile(fspath, &read_count);
if (read_count == 0)
{
printf("Error! Failed to read fragment shader file at path %s\n", vspath);
return 0;
}
-
+
u32 shader_program = gl_shader_program(vs, fs);
return shader_program;
}
@@ -298,28 +267,68 @@ u32 gl_setup_colored_quad(u32 sp)
// @todo: make this use index buffer maybe?
r32 vertices[] = {
-1.0f, -1.0f, 0.0f, // bottom-left
- 1.0f, -1.0f, 0.0f, // bottom-right
- 1.0f, 1.0f, 0.0f, // top-right
- 1.0f, 1.0f, 0.0f, // top-right
+ 1.0f, -1.0f, 0.0f, // bottom-right
+ 1.0f, 1.0f, 0.0f, // top-right
+ 1.0f, 1.0f, 0.0f, // top-right
-1.0f, 1.0f, 0.0f, // top-left
-1.0f, -1.0f, 0.0f, // bottom-left
};
u32 vao, vbo;
glGenVertexArrays(1, &vao);
glGenBuffers(1, &vbo);
-
+
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), &vertices, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(r32), (void*)0);
-
+
glBindVertexArray(0);
-
+
// now return or store the vao, vbo state somewhere
return vao;
}
+void gl_draw_colored_quad(
+ GLRenderer* renderer,
+ Vec3 position,
+ Vec2 size,
+ Vec3 color
+) {
+ glEnable(GL_DEPTH_TEST);
+ glUseProgram(renderer->cq_sp);
+ if (renderer->cq_init == 0)
+ {
+ glUniformMatrix4fv(
+ glGetUniformLocation(renderer->cq_sp, "Projection"),
+ 1, GL_FALSE, (renderer->cam_proj).buffer
+ );
+ renderer->cq_init = 1;
+ }
+ // setting quad size
+ Mat4 model = diag4m(1.0);
+ Mat4 scale = scaling_matrix4m(size.x, size.y, 0.0f);
+ model = multiply4m(scale, model);
+ // setting quad position
+ Mat4 translation = translation_matrix4m(position.x, position.y, position.z);
+ model = multiply4m(translation, model);
+ // setting color
+ glUniform3fv(glGetUniformLocation(renderer->cq_sp, "Color"), 1, color.data);
+
+ glUniformMatrix4fv(
+ glGetUniformLocation(renderer->cq_sp, "Model"),
+ 1, GL_FALSE, model.buffer
+ );
+
+ glUniformMatrix4fv(
+ glGetUniformLocation(renderer->cq_sp, "View"),
+ 1, GL_FALSE, (renderer->cam_view).buffer
+ );
+
+ glBindVertexArray(renderer->cq_vao);
+ glDrawArrays(GL_TRIANGLES, 0, 6);
+}
+
void gl_setup_colored_quad_optimized(
GLRenderer* renderer,
u32 sp
@@ -327,7 +336,7 @@ void gl_setup_colored_quad_optimized(
// @todo: make this use index buffer maybe?
glGenVertexArrays(1, &renderer->cq_batch_vao);
glGenBuffers(1, &renderer->cq_batch_vbo);
-
+
glBindVertexArray(renderer->cq_batch_vao);
glBindBuffer(GL_ARRAY_BUFFER, renderer->cq_batch_vbo);
glBufferData(
@@ -341,10 +350,10 @@ void gl_setup_colored_quad_optimized(
glEnableVertexAttribArray(1);
glVertexAttribPointer(
- 1, 3, GL_FLOAT, GL_FALSE,
+ 1, 3, GL_FLOAT, GL_FALSE,
3 * sizeof(r32), (void*)(renderer->cq_pos_batch.capacity*sizeof(r32))
);
-
+
glBindVertexArray(0);
}
@@ -358,7 +367,7 @@ void gl_cq_flush(GLRenderer* renderer) {
);
glUniformMatrix4fv(
- glGetUniformLocation(renderer->cq_batch_sp, "Projection"),
+ glGetUniformLocation(renderer->cq_batch_sp, "Projection"),
1, GL_FALSE, (renderer->cam_proj).buffer
);
@@ -367,17 +376,17 @@ void gl_cq_flush(GLRenderer* renderer) {
// fill batch data
// position batch
glBufferSubData(
- GL_ARRAY_BUFFER,
- 0,
- renderer->cq_pos_batch.capacity*sizeof(r32),
+ GL_ARRAY_BUFFER,
+ 0,
+ renderer->cq_pos_batch.capacity*sizeof(r32),
renderer->cq_pos_batch.buffer
);
// color batch
glBufferSubData(
- GL_ARRAY_BUFFER,
- renderer->cq_pos_batch.capacity*sizeof(r32),
- renderer->cq_color_batch.capacity*sizeof(r32),
+ GL_ARRAY_BUFFER,
+ renderer->cq_pos_batch.capacity*sizeof(r32),
+ renderer->cq_color_batch.capacity*sizeof(r32),
(void*)renderer->cq_color_batch.buffer
);
@@ -396,12 +405,12 @@ void gl_draw_colored_quad_optimized(
Vec3 color
) {
Vec4 vertices[6] = {
- Vec4{-1.0f, -1.0f, 0.0f, 1.0f},// bottom-left
- Vec4{ 1.0f, -1.0f, 0.0f, 1.0f},// bottom-right
- Vec4{ 1.0f, 1.0f, 0.0f, 1.0f},// top-right
- Vec4{ 1.0f, 1.0f, 0.0f, 1.0f},// top-right
- Vec4{-1.0f, 1.0f, 0.0f, 1.0f},// top-left
- Vec4{-1.0f, -1.0f, 0.0f, 1.0f} // bottom-left
+ vec4(-1.0f, -1.0f, 0.0f, 1.0f),// bottom-left
+ vec4( 1.0f, -1.0f, 0.0f, 1.0f),// bottom-right
+ vec4( 1.0f, 1.0f, 0.0f, 1.0f),// top-right
+ vec4( 1.0f, 1.0f, 0.0f, 1.0f),// top-right
+ vec4(-1.0f, 1.0f, 0.0f, 1.0f),// top-left
+ vec4(-1.0f, -1.0f, 0.0f, 1.0f) // bottom-left
};
// setting quad size
@@ -425,7 +434,7 @@ void gl_draw_colored_quad_optimized(
vertices[4] = model_pos;
model_pos = multiply4mv(model, vertices[5]);
vertices[5] = model_pos;
-
+
array_insert(&renderer->cq_pos_batch, vertices[0].data, 4);
array_insert(&renderer->cq_pos_batch, vertices[1].data, 4);
array_insert(&renderer->cq_pos_batch, vertices[2].data, 4);
@@ -448,54 +457,14 @@ void gl_draw_colored_quad_optimized(
}
}
-void gl_draw_colored_quad(
- GLRenderer* renderer,
- Vec3 position,
- Vec2 size,
- Vec3 color
-) {
- glEnable(GL_DEPTH_TEST);
- glUseProgram(renderer->cq_sp);
- if (renderer->cq_init == 0)
- {
- glUniformMatrix4fv(
- glGetUniformLocation(renderer->cq_sp, "Projection"),
- 1, GL_FALSE, (renderer->cam_proj).buffer
- );
- renderer->cq_init = 1;
- }
- // setting quad size
- Mat4 model = diag4m(1.0);
- Mat4 scale = scaling_matrix4m(size.x, size.y, 0.0f);
- model = multiply4m(scale, model);
- // setting quad position
- Mat4 translation = translation_matrix4m(position.x, position.y, position.z);
- model = multiply4m(translation, model);
- // setting color
- glUniform3fv(glGetUniformLocation(renderer->cq_sp, "Color"), 1, color.data);
-
- glUniformMatrix4fv(
- glGetUniformLocation(renderer->cq_sp, "Model"),
- 1, GL_FALSE, model.buffer
- );
-
- glUniformMatrix4fv(
- glGetUniformLocation(renderer->cq_sp, "View"),
- 1, GL_FALSE, (renderer->cam_view).buffer
- );
-
- glBindVertexArray(renderer->cq_vao);
- glDrawArrays(GL_TRIANGLES, 0, 6);
-}
-
void gl_setup_text(TextState* state, FT_Face font_face)
{
FT_Set_Pixel_Sizes(font_face, state->pixel_size, state->pixel_size);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-
+
glGenTextures(1, &(state->texture_atlas_id));
glBindTexture(GL_TEXTURE_2D_ARRAY, state->texture_atlas_id);
-
+
// generate texture
glTexImage3D(
GL_TEXTURE_2D_ARRAY,
@@ -509,15 +478,15 @@ void gl_setup_text(TextState* state, FT_Face font_face)
GL_UNSIGNED_BYTE,
0
);
-
+
// generate characters
for (u32 c = 0; c < 128; c++)
{
if (FT_Load_Char(font_face, c, FT_LOAD_RENDER))
{
printf("ERROR :: Freetype failed to load glyph: %c", c);
- }
- else
+ }
+ else
{
glTexSubImage3D(
GL_TEXTURE_2D_ARRAY,
@@ -531,30 +500,30 @@ void gl_setup_text(TextState* state, FT_Face font_face)
GL_UNSIGNED_BYTE,
font_face->glyph->bitmap.buffer
);
-
+
// set texture options
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-
+
TextChar tc;
tc.size = Vec2{
- (r32)font_face->glyph->bitmap.width,
+ (r32)font_face->glyph->bitmap.width,
(r32)font_face->glyph->bitmap.rows
};
tc.bearing = Vec2{
- (r32)font_face->glyph->bitmap_left,
+ (r32)font_face->glyph->bitmap_left,
(r32)font_face->glyph->bitmap_top
};
tc.advance = font_face->glyph->advance.x;
-
+
state->char_map[c] = tc;
}
}
-
+
glBindTexture(GL_TEXTURE_2D_ARRAY, 0);
-
+
// @note: this data is used for GL_TRIANGLE_STRIP
// as such the order for vertices for this is AntiCW -> CW -> AntiCW
// that can be seen in this array as it goes from ACW -> CW
@@ -564,16 +533,16 @@ void gl_setup_text(TextState* state, FT_Face font_face)
1.0f, 1.0f,
1.0f, 0.0f
};
-
+
glGenVertexArrays(1, &(state->vao));
glGenBuffers(1, &(state->vbo));
-
+
glBindVertexArray(state->vao);
glBindBuffer(GL_ARRAY_BUFFER, state->vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0);
-
+
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
}
@@ -583,18 +552,18 @@ void gl_render_text(GLRenderer *renderer, char* text, Vec2 position, r32 size, V
glDisable(GL_DEPTH_TEST);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
-
+
glUseProgram(renderer->ui_text.sp);
- glUniformMatrix4fv(glGetUniformLocation(renderer->ui_text.sp, "View"),
+ glUniformMatrix4fv(glGetUniformLocation(renderer->ui_text.sp, "View"),
1, GL_FALSE, renderer->cam_view.buffer);
- glUniformMatrix4fv(glGetUniformLocation(renderer->ui_text.sp, "Projection"),
+ glUniformMatrix4fv(glGetUniformLocation(renderer->ui_text.sp, "Projection"),
1, GL_FALSE, renderer->cam_proj.buffer);
glUniform3fv(glGetUniformLocation(renderer->ui_text.sp, "TextColor"), 1, color.data);
glBindVertexArray(renderer->ui_text.vao);
glBindTexture(GL_TEXTURE_2D_ARRAY, renderer->ui_text.texture_atlas_id);
glBindBuffer(GL_ARRAY_BUFFER, renderer->ui_text.vbo);
glActiveTexture(GL_TEXTURE0);
-
+
u32 running_index = 0;
r32 startx = position.x;
r32 starty = position.y;
@@ -602,7 +571,7 @@ void gl_render_text(GLRenderer *renderer, char* text, Vec2 position, r32 size, V
r32 scale = size/renderer->ui_text.pixel_size;
memset(renderer->ui_text.transforms, 0, renderer->ui_text.chunk_size);
memset(renderer->ui_text.char_indexes, 0, renderer->ui_text.chunk_size);
-
+
char *char_iter = text;
while (*char_iter != '\0')
{
@@ -620,22 +589,22 @@ void gl_render_text(GLRenderer *renderer, char* text, Vec2 position, r32 size, V
{
r32 xpos = linex + (scale * render_char.bearing.x);
r32 ypos = starty - (renderer->ui_text.pixel_size - render_char.bearing.y) * scale;
-
+
r32 w = scale * renderer->ui_text.pixel_size;
r32 h = scale * renderer->ui_text.pixel_size;
-
+
Mat4 sm = scaling_matrix4m(w, h, 0);
Mat4 tm = translation_matrix4m(xpos, ypos, 0);
Mat4 model = multiply4m(tm, sm);
renderer->ui_text.transforms[running_index] = model;
renderer->ui_text.char_indexes[running_index] = int(*char_iter);
-
+
linex += (render_char.advance >> 6) * scale;
running_index++;
if (running_index > renderer->ui_text.chunk_size - 1)
{
r32 transform_loc = glGetUniformLocation(renderer->ui_text.sp, "LetterTransforms");
- glUniformMatrix4fv(transform_loc, renderer->ui_text.chunk_size,
+ glUniformMatrix4fv(transform_loc, renderer->ui_text.chunk_size,
GL_FALSE, &(renderer->ui_text.transforms[0].buffer[0]));
r32 texture_map_loc = glGetUniformLocation(renderer->ui_text.sp, "TextureMap");
glUniform1iv(texture_map_loc, renderer->ui_text.chunk_size, renderer->ui_text.char_indexes);
@@ -651,7 +620,7 @@ void gl_render_text(GLRenderer *renderer, char* text, Vec2 position, r32 size, V
{
u32 render_count = running_index < renderer->ui_text.chunk_size ? running_index : renderer->ui_text.chunk_size;
r32 transform_loc = glGetUniformLocation(renderer->ui_text.sp, "LetterTransforms");
- glUniformMatrix4fv(transform_loc, render_count,
+ glUniformMatrix4fv(transform_loc, render_count,
GL_FALSE, &(renderer->ui_text.transforms[0].buffer[0]));
r32 texture_map_loc = glGetUniformLocation(renderer->ui_text.sp, "TextureMap");
glUniform1iv(texture_map_loc, render_count, renderer->ui_text.char_indexes);
@@ -695,17 +664,6 @@ Vec2 get_move_dir(Controller c) {
return dir;
}
-void update_camera(GLRenderer *renderer) {
- if (renderer->cam_update == true) {
- renderer->cam_view = camera_create4m(
- renderer->cam_pos,
- add3v(renderer->cam_pos, renderer->cam_look),
- renderer->preset_up_dir
- );
- renderer->cam_update = false;
- }
-}
-
Vec3 get_world_position_from_percent(GameState state, Vec3 v) {
Vec3 world_pos = v;
world_pos.x = state.render_scale.x*state.world_size.x*v.x/100.0f;
@@ -734,19 +692,19 @@ int main(int argc, char* argv[])
{
u32 scr_width = 1024;
u32 scr_height = 768;
-
+
if (SDL_Init(SDL_INIT_VIDEO) != 0)
{
printf("Error initialising SDL2: %s\n", SDL_GetError());
return -1;
}
-
+
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
-
+
SDL_Window* window = SDL_CreateWindow("simple platformer",
- SDL_WINDOWPOS_UNDEFINED,
+ SDL_WINDOWPOS_UNDEFINED,
SDL_WINDOWPOS_UNDEFINED,
scr_width, scr_height,
SDL_WINDOW_OPENGL);
@@ -756,20 +714,20 @@ int main(int argc, char* argv[])
printf("ERROR :: OpenGL context creation failed: %s\n", SDL_GetError());
return -1;
}
-
+
// load glad
if (!gladLoadGLLoader((GLADloadproc)SDL_GL_GetProcAddress)) {
- printf("ERROR :: Failed to initialize Glad\n");
- return -1;
+ printf("ERROR :: Failed to initialize Glad\n");
+ return -1;
}
-
+
// vsync controls: 0 = OFF | 1 = ON (Default)
SDL_GL_SetSwapInterval(0);
-
+
GLRenderer *renderer = new GLRenderer();
// @resume: I am working on creating an efficient quad renderer via
- // instancing, in order to achieve that I have absolutely butchered the
+ // instancing, in order to achieve that I have absolutely butchered the
// performance of my program by making it render 200,000 quads
// the goal is to use this to see when performance plummets
//
@@ -791,7 +749,7 @@ int main(int argc, char* argv[])
u32 quad_sp = gl_shader_program_from_path(
- "./source/shaders/colored_quad.vs.glsl",
+ "./source/shaders/colored_quad.vs.glsl",
"./source/shaders/colored_quad.fs.glsl"
);
u32 ui_text_sp = gl_shader_program_from_path(
@@ -808,9 +766,9 @@ int main(int argc, char* argv[])
renderer->cq_batch_sp = cq_batch_sp;
gl_setup_colored_quad_optimized(renderer, cq_batch_sp);
-
-
- r32 render_scale = 8.0f;
+
+
+ r32 render_scale = 2.0f;
// ==========
// setup text
// 1. setup free type library stuff
@@ -821,10 +779,10 @@ int main(int argc, char* argv[])
printf("ERROR :: Could not init freetype library\n");
return -1;
}
-
+
FT_Error error = FT_New_Face(
- ft_lib,
- "assets/fonts/Roboto.ttf",
+ ft_lib,
+ "assets/fonts/Roboto.ttf",
0, &roboto_font_face
);
if (error == FT_Err_Unknown_File_Format)
@@ -852,8 +810,8 @@ int main(int argc, char* argv[])
128*sizeof(TextChar)
);
gl_setup_text(&(renderer->ui_text), roboto_font_face);
-
-
+
+
// ============
// setup camera
Vec3 preset_up_dir = Vec3{0.0f, 1.0f, 0.0f};
@@ -862,12 +820,12 @@ int main(int argc, char* argv[])
renderer->cam_pos = Vec3{0.0f, 0.0f, 1.0f};
renderer->cam_look = camera_look_around(TO_RAD(0.0f), -TO_RAD(90.0f));
renderer->cam_view = camera_create4m(
- renderer->cam_pos,
+ renderer->cam_pos,
add3v(renderer->cam_pos, renderer->cam_look), renderer->preset_up_dir
);
renderer->cam_proj = orthographic4m(
- 0.0f, (r32)scr_width*render_scale,
- 0.0f, (r32)scr_height*render_scale,
+ 0.0f, (r32)scr_width*render_scale,
+ 0.0f, (r32)scr_height*render_scale,
0.1f, 10.0f
);
@@ -894,7 +852,7 @@ int main(int argc, char* argv[])
// @thinking: level object handling
// there should be a most smallest supported unit
// smallest_size: 16x16
- // object placement should be in pixels
+ // object placement should be in pixels
// in order to scale to different resolutions it should be multiplied by
// scaling factor
Vec2 atom_size = {16.0f, 16.0f};
@@ -922,20 +880,24 @@ int main(int argc, char* argv[])
Controller controller = {0};
r32 key_down_time[5] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
b8 is_key_down_x = false;
-
+
// gravity calculations
b8 collidex = 0;
b8 collidey = 0;
b8 is_gravity = 0;
-
+
b8 game_running = 1;
FrameTimer timer = frametimer();
+ r64 hp_tick_freq = SDL_GetPerformanceFrequency();
+ u64 hp_ticks = 0.0f;
- while (game_running)
+ while (game_running)
{
update_frame_timer(&timer);
//enforce_frame_rate(&timer, 60);
+ hp_ticks = SDL_GetPerformanceCounter();
+ cum_math_ticks = 0;
controller.jump = 0;
controller.toggle_gravity = 0;
@@ -1005,7 +967,7 @@ int main(int argc, char* argv[])
}
}
}
-
+
// @section: input processing
if (controller.toggle_gravity)
{
@@ -1027,7 +989,7 @@ int main(int argc, char* argv[])
PlatformKey horizontal_move = PK_NIL;
is_key_down_x = false;
if (
- key_down_time[PK_A] != 0.0f ||
+ key_down_time[PK_A] != 0.0f ||
key_down_time[PK_D] != 0.0f
) {
horizontal_move = (
@@ -1035,12 +997,12 @@ int main(int argc, char* argv[])
);
}
- if (horizontal_move == PK_A && controller.move_left)
+ if (horizontal_move == PK_A && controller.move_left)
{
p_move_dir.x = -1.0f;
is_key_down_x = true;
- }
- if (horizontal_move == PK_D && controller.move_right)
+ }
+ if (horizontal_move == PK_D && controller.move_right)
{
p_move_dir.x = 1.0f;
is_key_down_x = true;
@@ -1066,10 +1028,10 @@ int main(int argc, char* argv[])
// without separate checks
if (is_key_down_x) {
r32 updated_force = (
- effective_force + p_move_dir.x*move_accelx*timer.tDelta
+ effective_force + p_move_dir.x*move_accelx*timer.tDelta
);
updated_force = clampf(
- updated_force, -move_accelx, move_accelx
+ updated_force, -move_accelx, move_accelx
);
effective_force = updated_force;
} else {
@@ -1081,7 +1043,7 @@ int main(int argc, char* argv[])
}
r32 updated_force = effective_force + friction;
effective_force = (
- ABS(updated_force) < 0.5f ?
+ ABS(updated_force) < 0.5f ?
0.0f : updated_force
);
}
@@ -1089,15 +1051,15 @@ int main(int argc, char* argv[])
r32 smoothing_force = effective_force;
r32 net_force = 0.0f;
r32 active_force = 0.0f;
- if (!collidex) {
+ if (!collidex) {
net_force += effective_force;
{
- // @note: air resistance
+ // @note: air resistance
// (arbitrary force in opposite direction to reduce speed)
// reason: seems that it would work well for the case where
// player moves from platform move to free_fall
// since the max speed in respective stages is different this can
- // function as a speed smoother, without too many checks and
+ // function as a speed smoother, without too many checks and
// explicit checking
b8 is_force_pos = effective_force > 0.0f;
b8 is_force_neg = effective_force < 0.0f;
@@ -1108,7 +1070,7 @@ int main(int argc, char* argv[])
friction = fall_accelx*timer.tDelta;
}
net_force += friction;
- }
+ }
{
// @note: player movement force
@@ -1123,7 +1085,7 @@ int main(int argc, char* argv[])
}
effective_force = net_force;
}
-
+
{
// horizontal motion setting
r32 dx1 = effective_force;
@@ -1157,7 +1119,7 @@ int main(int argc, char* argv[])
pd_1.y = dy1;
}
}
- else
+ else
{
Vec2 dir = get_move_dir(controller);
pd_1 = dir * render_scale;
@@ -1173,21 +1135,21 @@ int main(int argc, char* argv[])
}
}
-
+
// @section: collision
Vec3 next_player_position;
next_player_position.x = state.player.position.x + pd_1.x;
next_player_position.y = state.player.position.y + pd_1.y;
Rect player_next = rect(next_player_position, state.player.size);
-
+
Rect collision_targets[2] = {state.wall, state.floor};
b8 is_collide_x = 0;
b8 is_collide_y = 0;
for (u32 i = 0; i < 2; i++) {
Rect target = collision_targets[i];
-
+
// @func: check_if_player_colliding_with_target
b8 t_collide_x = 0;
@@ -1279,20 +1241,21 @@ int main(int argc, char* argv[])
}
if (renderer->cam_update == true) {
renderer->cam_view = camera_create4m(
- renderer->cam_pos,
- add3v(renderer->cam_pos, renderer->cam_look),
+ renderer->cam_pos,
+ add3v(renderer->cam_pos, renderer->cam_look),
renderer->preset_up_dir
);
renderer->cam_update = false;
}
}
-
+
// output
glClearColor(0.8f, 0.5f, 0.7f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
-
+
// player
- gl_draw_colored_quad(renderer,
+#if DRAW_ELEMENTS
+ gl_draw_colored_quad(renderer,
state.player.position, // position
state.player.size, // size
Vec3{0.45f, 0.8f, 0.2f});
@@ -1302,24 +1265,26 @@ int main(int argc, char* argv[])
state.floor.size,
Vec3{1.0f, 1.0f, 1.0f});
// wall
- gl_draw_colored_quad(renderer,
+ gl_draw_colored_quad(renderer,
state.wall.position,
state.wall.size,
Vec3{1.0f, 0.0f, 0.0f});
+#endif
// benchmark code
- u32 max_cq_count = 1000;
- u32 max_row_ele = 50;
+ u32 max_cq_count = 10000;
+ u32 max_row_ele = 500;
u32 max_col_ele = max_cq_count/max_row_ele;
Vec2 screen_render_size = (state.render_scale * state.screen_size)/2.0f;
Vec2 based_size = Vec2{
- screen_render_size.x / max_row_ele,
+ screen_render_size.x / max_row_ele,
screen_render_size.y / max_col_ele
};
+#if 1
for (int i=0;i<max_cq_count;i++) {
Vec3 pos_i = Vec3{
- (2.0f*based_size.x + atom_size.x)*(r32)(i%max_row_ele),
- (2.0f*based_size.y + atom_size.y)*(r32)(i/max_row_ele),
+ (2.0f*based_size.x + atom_size.x)*(r32)(i%max_row_ele),
+ (2.0f*based_size.y + atom_size.y)*(r32)(i/max_row_ele),
-5.0f
};
r32 cf_r = ((r32)(i%max_row_ele))/(r32)max_row_ele;
@@ -1343,12 +1308,20 @@ int main(int argc, char* argv[])
#endif
}
gl_cq_flush(renderer);
+#endif
array_clear(&renderer->cq_pos_batch);
array_clear(&renderer->cq_color_batch);
renderer->cq_batch_count = 0;
// render ui text
+ u64 new_ticks = SDL_GetPerformanceCounter();
+ r64 hp_time = (r64)(new_ticks - hp_ticks) / (r64) hp_tick_freq;
+ SDL_Log("ticks time: %fs", hp_time);
+
+ cum_math_time = (r64)cum_math_ticks/(r64)tick_freq;
+ SDL_Log("multiply4mv time: %fs", cum_math_time);
+
if (is_collide_x || is_collide_y)
{
gl_render_text(renderer,
@@ -1356,7 +1329,7 @@ int main(int argc, char* argv[])
Vec2{500.0f, 700.0f}, // position
28.0f, // size
Vec3{0.0f, 0.0f, 0.0f}); // color
-
+
char movedir_output[50];
sprintf(movedir_output, "move_dir = %f", p_move_dir.x);
gl_render_text(renderer,
@@ -1372,7 +1345,7 @@ int main(int argc, char* argv[])
Vec2{500.0f, 100.0f}, // position
28.0f, // size
Vec3{0.0f, 0.0f, 0.0f}); // color
-
+
}
//char accel_output[50];
//sprintf(accel_output, "effective_force %f", effective_force);
@@ -1381,7 +1354,7 @@ int main(int argc, char* argv[])
// Vec2{500.0f, 150.0f}, // position
// 28.0f*render_scale, // size
// Vec3{0.0f, 0.0f, 0.0f}); // color
-
+
char fmt_buffer[50];
//sprintf(fmt_buffer, "player moving? %d", is_key_down_x);
//gl_render_text(renderer,
@@ -1390,13 +1363,13 @@ int main(int argc, char* argv[])
// 28.0f*render_scale, // size
// Vec3{0.0f, 0.0f, 0.0f}); // color
- sprintf(fmt_buffer, "frametime: %f", timer.tDelta);
- gl_render_text(renderer,
- fmt_buffer,
- Vec2{900.0f, 90.0f}, // position
- 28.0f*render_scale, // size
- Vec3{0.0f, 0.0f, 0.0f}); // color
-
+ //sprintf(fmt_buffer, "frametime: %f", timer.tDelta);
+ //gl_render_text(renderer,
+ // fmt_buffer,
+ // Vec2{900.0f, 90.0f}, // position
+ // 28.0f*render_scale, // size
+ // Vec3{0.0f, 0.0f, 0.0f}); // color
+
//sprintf(fmt_buffer, "%f pixels", pd_1.x);
//gl_render_text(renderer,
@@ -1414,7 +1387,7 @@ int main(int argc, char* argv[])
SDL_GL_SwapWindow(window);
}
-
+
arena_clear(&batch_arena);
free(renderer->ui_text.transforms);
free(renderer->ui_text.char_indexes);
diff --git a/source/math.h b/source/math.h
index 3f20e43..1ac3605 100755
--- a/source/math.h
+++ b/source/math.h
@@ -8,26 +8,21 @@
#define ABS(x) ((x) < 0 ? (-(x)) : (x))
#define MIN(x,y) ((x) < (y) ? (y) : (x))
-// @todo:
+// @todo:
// - make everything simd
-
-// @note: Regarding functions written for completeness sake
-// These operations are just defined and not expressed.
-// They are kept here for completeness sake BUT
-// since I have not had to do anything related to these, I have not created them.
-
+#define USE_SSE 1
r32 clampf(r32 x, r32 bottom, r32 top)
{
if (x < bottom)
{
x = bottom;
- }
+ }
else if (x > top)
{
x = top;
}
-
+
return x;
}
@@ -99,12 +94,12 @@ union Vec2 {
};
union Vec3 {
- struct {
- r32 x;
- r32 y;
- r32 z;
- };
- r32 data[3];
+ struct {
+ r32 x;
+ r32 y;
+ r32 z;
+ };
+ r32 data[3];
Vec2 v2() {
return Vec2{x, y};
@@ -114,22 +109,21 @@ union Vec3 {
typedef Vec3 RGB;
union Vec4 {
- struct {
- r32 x;
- r32 y;
- r32 z;
- r32 w;
- };
- r32 data[4];
+ struct {
+ r32 x;
+ r32 y;
+ r32 z;
+ r32 w;
+ };
+ r32 data[4];
+ __m128 sse;
};
// @note: matrix and all matrix operations will be done in column major
-// @todo: be able to specify and configure this in the future
-// possibly through separate functions
union Mat4 {
- Vec4 row[4];
- r32 data[4][4];
- r32 buffer[16];
+ Vec4 row[4];
+ r32 data[4][4];
+ r32 buffer[16];
};
// ==== Vec2 ====
@@ -176,8 +170,8 @@ Vec2 divide2v(Vec2 a, Vec2 b) {
}
r32 magnitude2v(Vec2 v) {
- r32 res = sqrtf(SQUARE(v.x) + SQUARE(v.y));
- return res;
+ r32 res = sqrtf(SQUARE(v.x) + SQUARE(v.y));
+ return res;
}
Vec2 normalize2v(Vec2 v) {
@@ -192,7 +186,7 @@ Vec2 normalize2v(Vec2 v) {
Vec3 vec3(r32 s);
Vec3 subtract3vf(Vec3 v, r32 scaler);
Vec3 multiply3v(Vec3 a, Vec3 b);
-Vec3 divide3v(Vec3 a, Vec3 b);
+Vec3 divide3v(Vec3 a, Vec3 b);
Vec3 add3vf(Vec3 vec, r32 scaler)
{
@@ -200,7 +194,7 @@ Vec3 add3vf(Vec3 vec, r32 scaler)
res.x = vec.x + scaler;
res.y = vec.y + scaler;
res.z = vec.z + scaler;
-
+
return res;
}
@@ -220,7 +214,7 @@ Vec3 subtract3v(Vec3 a, Vec3 b)
res.x = a.x - b.x;
res.y = a.y - b.y;
res.z = a.z - b.z;
-
+
return res;
}
@@ -230,7 +224,7 @@ Vec3 multiply3vf(Vec3 vec, r32 scaler)
res.x = vec.x * scaler;
res.y = vec.y * scaler;
res.z = vec.z * scaler;
-
+
return res;
}
@@ -241,7 +235,7 @@ Vec3 divide3vf(Vec3 vec, r32 scaler)
res.x = vec.x / scaler;
res.y = vec.y / scaler;
res.z = vec.z / scaler;
-
+
return res;
}
@@ -250,9 +244,9 @@ r32 dot3v(Vec3 a, Vec3 b)
r32 x = a.x * b.x;
r32 y = a.y * b.y;
r32 z = a.z * b.z;
-
+
r32 res = x + y + z;
-
+
return res;
}
@@ -275,25 +269,46 @@ Vec3 cross3v(Vec3 a, Vec3 b)
res.x = (a.y * b.z) - (a.z * b.y);
res.y = (a.z * b.x) - (a.x * b.z);
res.z = (a.x * b.y) - (a.y * b.x);
-
+
return res;
}
// ============================================== Vec4, Mat4 ==============================================
-
+static u64 tick_freq = SDL_GetPerformanceFrequency();
+static u64 cum_math_ticks = 0;
+static r64 cum_math_time = 0.0f;
// ==================== Vec4 ====================
Vec4 vec4(r32 s)
{
Vec4 res;
+#if USE_SSE
+ res.sse = _mm_set_ps1(s);
+#else
res.x = s;
res.y = s;
res.z = s;
res.w = s;
-
+#endif
+
return res;
}
-// @note: Written for completeness sake.
+Vec4 vec4(r32 x, r32 y, r32 z, r32 w)
+{
+ Vec4 res;
+#if USE_SSE
+ res.sse = _mm_setr_ps(x, y, z, w);
+#else
+ res.x = x;
+ res.y = y;
+ res.z = z;
+ res.w = w;
+#endif
+
+ return res;
+}
+
+// @note: Written for completeness sake.
Vec4 add4vf(Vec4 vec, r32 scaler);
Vec4 add4v(Vec4 a, Vec4 b);
Vec4 subtract4vf(Vec4 vec, r32 scaler);
@@ -318,7 +333,7 @@ Mat4 diag4m(r32 value) {
res.data[1][1] = value;
res.data[2][2] = value;
res.data[3][3] = value;
-
+
return res;
}
@@ -345,7 +360,7 @@ Mat4 add4m(Mat4 a, Mat4 b)
res.data[3][1] = a.data[3][1] + b.data[3][1];
res.data[3][2] = a.data[3][2] + b.data[3][2];
res.data[3][3] = a.data[3][3] + b.data[3][3];
-
+
return res;
}
@@ -372,13 +387,31 @@ Mat4 subtract4m(Mat4 a, Mat4 b)
res.data[3][1] = a.data[3][1] - b.data[3][1];
res.data[3][2] = a.data[3][2] - b.data[3][2];
res.data[3][3] = a.data[3][3] - b.data[3][3];
-
+
return res;
}
Vec4 multiply4mv(Mat4 m, Vec4 v)
{
- Vec4 res = vec4(0);
+ r64 prev_tick = SDL_GetPerformanceCounter();
+
+ Vec4 res = vec4(0);
+#if USE_SSE
+ __m128 scalar = _mm_shuffle_ps(v.sse, v.sse, 0x0);
+ res.sse = _mm_mul_ps(scalar, m.row[0].sse);
+
+ scalar = _mm_shuffle_ps(v.sse, v.sse, 0x55);
+ __m128 mult = _mm_mul_ps(scalar, m.row[1].sse);
+ res.sse = _mm_add_ps(res.sse, mult);
+
+ scalar = _mm_shuffle_ps(v.sse, v.sse, 0xaa);
+ mult = _mm_mul_ps(scalar, m.row[2].sse);
+ res.sse = _mm_add_ps(res.sse, mult);
+
+ scalar = _mm_shuffle_ps(v.sse, v.sse, 0xff);
+ mult = _mm_mul_ps(scalar, m.row[3].sse);
+ res.sse = _mm_add_ps(res.sse, mult);
+#else
res.x += v.x*m.data[0][0];
res.y += v.x*m.data[0][1];
@@ -399,11 +432,15 @@ Vec4 multiply4mv(Mat4 m, Vec4 v)
res.y += v.w*m.data[3][1];
res.z += v.w*m.data[3][2];
res.w += v.w*m.data[3][3];
+#endif
+
+ r64 curr_tick = SDL_GetPerformanceCounter();
+ cum_math_ticks += curr_tick - prev_tick;
return res;
}
-Mat4 multiply4m(Mat4 a, Mat4 b)
+Mat4 multiply4m(Mat4 a, Mat4 b)
{
Mat4 res = { 0 };
@@ -416,15 +453,15 @@ Mat4 multiply4m(Mat4 a, Mat4 b)
}
// ==== Matrix Transformation ====
-Mat4 scaling_matrix4m(r32 x, r32 y, r32 z)
+Mat4 scaling_matrix4m(r32 x, r32 y, r32 z)
{
// generates a 4x4 scaling matrix for scaling each of the x,y,z axis
- Mat4 res = diag4m(1.0f);
- res.data[0][0] = x;
- res.data[1][1] = y;
- res.data[2][2] = z;
-
- return res;
+ Mat4 res = diag4m(1.0f);
+ res.data[0][0] = x;
+ res.data[1][1] = y;
+ res.data[2][2] = z;
+
+ return res;
}
Mat4 translation_matrix4m(r32 x, r32 y, r32 z)
@@ -479,7 +516,7 @@ Mat4 lookat4m(Vec3 up, Vec3 forward, Vec3 right, Vec3 position)
/*
* @note: The construction of the lookat matrix is not obvious. For that reason here is the supplemental matrial I have used to understand
* things while I maintain my elementary understanding of linear algebra.
- * 1. This youtube video (https://www.youtube.com/watch?v=3ZmqJb7J5wE) helped me understand why we invert matrices.
+ * 1. This youtube video (https://www.youtube.com/watch?v=3ZmqJb7J5wE) helped me understand why we invert matrices.
* It is because, we are moving from the position matrix which is a global to the view matrix which
* is a local. It won't be very clear from this illustration alone, so you would be best served watching the video and recollecting and understanding from there.
* 2. This article (https://twodee.org/blog/17560) derives (or rather shows), in a very shallow way how we get to the look at matrix.
@@ -493,7 +530,7 @@ Mat4 lookat4m(Vec3 up, Vec3 forward, Vec3 right, Vec3 position)
res.data[3][1] = -dot3v(up, position);
res.data[3][2] = -dot3v(forward, position);
res.data[3][3] = 1.0f;
-
+
return res;
}
@@ -504,7 +541,7 @@ Vec3 camera_look_around(r32 angle_pitch, r32 angle_yaw)
camera_look.y = sinf(angle_pitch);
camera_look.z = sinf(angle_yaw) * cosf(angle_pitch);
camera_look = normalize3v(camera_look);
-
+
return camera_look;
}
@@ -512,14 +549,14 @@ Mat4 camera_create4m(Vec3 camera_pos, Vec3 camera_look, Vec3 camera_up)
{
// @note: We do this because this allows the camera to have the axis it looks at
// inwards be the +z axis.
- // If we did not do this, then the inward axis the camera looks at would be negative.
+ // If we did not do this, then the inward axis the camera looks at would be negative.
// I am still learning from learnopengl.com but I imagine that this was done for conveniences' sake.
Vec3 camera_forward_dir = normalize3v(subtract3v(camera_pos, camera_look));
Vec3 camera_right_dir = normalize3v(cross3v(camera_up, camera_forward_dir));
Vec3 camera_up_dir = normalize3v(cross3v(camera_forward_dir, camera_right_dir));
-
+
Mat4 res = lookat4m(camera_up_dir, camera_forward_dir, camera_right_dir, camera_pos);
-
+
return res;
}