commit 56986bc1d6f9bb29f08ff7d27f0a3e93d994ff18
parent c76c3ad493685bc3f6593ce23f00da5f1ea1cae3
Author: noone <vazkats@gmail.com>
Date: Wed, 7 May 2025 17:55:09 +0300
Some small changes again.
At full speed to the CD ROM DRIVE implementation
Diffstat:
4 files changed, 75 insertions(+), 46 deletions(-)
diff --git a/src/defs.h b/src/defs.h
@@ -2,6 +2,8 @@
static const int debug = 0; // Set to 0 to disable debug prints
+#include <assert.h>
+
#define LOG_ERR(x) fprintf(stderr, (x))
/* #define PANIC(...) do { fprintf(stderr, __VA_ARGS__); exit(EXIT_FAILURE); } while(0) */
@@ -20,3 +22,5 @@ static const int debug = 0; // Set to 0 to disable debug prints
#define LOG(level, fmt, ...) \
do { if (level >= CURRENT_LOG_LEVEL) fprintf(stderr, fmt, __VA_ARGS__); } while (0)
+#define GPU_LOG(fmt, ...) \
+ do { fprintf(stderr, fmt, __VA_ARGS__); } while (0)
diff --git a/src/gpu.c b/src/gpu.c
@@ -323,6 +323,7 @@ GPU_gp0_drawing_offset(GPU* gpu)
But gp0_drawing_offset is called every frame apparently.
*/
REN_display(gpu->ren);
+
}
void
@@ -350,17 +351,20 @@ GPU_gp0_mask_bit_setting(GPU* gpu)
void
GPU_gp0_quad_mono_opaque(GPU* gpu)
{
- ivec2 positions[4];
- C colors[4];
+ ivec2 positions[4];
+ C colors[4];
- positions[0] = POSITION_from_gp0(gpu->gp0_command.buffer[1]);
- positions[1] = POSITION_from_gp0(gpu->gp0_command.buffer[2]);
- positions[2] = POSITION_from_gp0(gpu->gp0_command.buffer[3]);
- positions[3] = POSITION_from_gp0(gpu->gp0_command.buffer[4]);
+ positions[0] = POSITION_from_gp0(gpu->gp0_command.buffer[1]);
+ positions[1] = POSITION_from_gp0(gpu->gp0_command.buffer[2]);
+ positions[2] = POSITION_from_gp0(gpu->gp0_command.buffer[3]);
+ positions[3] = POSITION_from_gp0(gpu->gp0_command.buffer[4]);
- colors[0] = colors[1] = colors[2] = colors[3] = COLOR_from_gp0(gpu->gp0_command.buffer[0]);
+ colors[0] = colors[1] = colors[2] = colors[3] = COLOR_from_gp0(gpu->gp0_command.buffer[0]);
- REN_push_quad(gpu->ren, positions, colors);
+ //GPU_LOG("Draw quad mono opaque at x: %d, y: %d\n", positions[0].x, positions[0].y, NULL);
+ //GPU_LOG("Draw quad mono opaque with R: %d, G: %d, B: %d\n", colors[0].r, colors[0].g, colors[0].b, NULL);
+
+ REN_push_quad(gpu->ren, positions, colors);
}
void
@@ -379,7 +383,7 @@ GPU_gp0_triangle_shaded_opaque(GPU* gpu)
REN_push_triangle(gpu->ren, positions, colors);
- LOG(LOG_DEBUG, "Draw triangle shaded\n", NULL);
+ //GPU_LOG("Draw triangle shaded at x: %d, y: %d\n", positions[0].x, positions[0].y, NULL);
}
void
@@ -393,7 +397,7 @@ GPU_gp0_quad_texture_blend_opaque(GPU* gpu)
positions[2] = POSITION_from_gp0(gpu->gp0_command.buffer[5]);
positions[3] = POSITION_from_gp0(gpu->gp0_command.buffer[7]);
- colors[0] = colors[1] = colors[2] = colors[3] = (C){0x00, 0x00, 0x80};
+ colors[0] = colors[1] = colors[2] = colors[3] = (C){0x80, 0x00, 0x00};
REN_push_quad(gpu->ren, positions, colors);
}
@@ -414,6 +418,7 @@ GPU_gp0_quad_shaded_opaque(GPU* gpu)
colors[2] = COLOR_from_gp0(gpu->gp0_command.buffer[4]);
colors[3] = COLOR_from_gp0(gpu->gp0_command.buffer[6]);
+ //GPU_LOG("I GOT CALLED\n", NULL);
REN_push_quad(gpu->ren, positions, colors);
}
diff --git a/src/sr.c b/src/sr.c
@@ -106,7 +106,21 @@ C_new(u32 b)
return c;
};
-void REN_FB_set(REN* ren, i32 x, i32 y, C c) { (!ren->fb || x<0 || y<0 || x>=W || y>=H) ? 0 : memcpy(ren->fb + ((x + y * W)), &c, 3); }
+void REN_FB_set(REN* ren, i32 x, i32 y, C c) {
+
+ u32 *fb;
+
+ if (!ren->fb || x < 0 || y < 0 || x >= W || y >= H) return;
+
+ // Clamp color values
+ c.r = c.r > 255 ? 255 : c.r;
+ c.g = c.g > 255 ? 255 : c.g;
+ c.b = c.b > 255 ? 255 : c.b;
+
+ // Direct write instead of memcpy
+ fb = ren->fb + (x + y * W);
+ *fb = (c.r) | (c.g << 8) | (c.b << 16);
+}
//C*
//FB_get(i32 x, i32 y)
@@ -123,10 +137,8 @@ REN_new()
ren->window = SDL_CreateWindow("Ultimecia", 400 , 300, WIN_W, WIN_H, SDL_WINDOW_SHOWN);
ren->renderer = SDL_CreateRenderer(ren->window, -1, 0);
ren->tex = SDL_CreateTexture(ren->renderer, SDL_PIXELFORMAT_RGB888, SDL_TEXTUREACCESS_STREAMING, W, H);
- ren->verts = (ivec2*)malloc(sizeof(ivec2) * VERTEX_BUFFER_LEN);
- ren->colors = (C*)malloc(sizeof(C) * VERTEX_BUFFER_LEN);
- ren->verts = (ivec2*)malloc(sizeof(ivec2) * 10000);
- ren->colors = (C*)malloc(sizeof(C) * 10000);
+ ren->verts = (ivec2*)malloc(sizeof(ivec2) * 10000); // Single allocation with larger size
+ ren->colors = (C*)malloc(sizeof(C) * 10000); // Single allocation with larger size
ren->fb = (u32*)malloc(W*H*sizeof(u32));
memset(ren->fb, 0, (u32)(W*H)*sizeof(u32));
ren->nvertices = 0;
@@ -160,44 +172,50 @@ draw_scanline(REN* ren, int y, int x1, C c1, int x2, C c2)
void
REN_triangle(REN* ren, ivec2 verts[3], C colors[3])
{
- float dx01, dx02, dx12, dr01, dg01, db01, dr02, dg02, db02, dr12, dg12, db12, xL, xR, rL, rR, gL, gR, bL, bR;
+ // Check for degenerate triangles
+ if (verts[0].x == verts[1].x && verts[0].y == verts[1].y) return;
+ if (verts[1].x == verts[2].x && verts[1].y == verts[2].y) return;
+ if (verts[2].x == verts[0].x && verts[2].y == verts[0].y) return;
/* Sort vertices by y-coordinate to ensure we process from top to bottom */
if (verts[0].y > verts[1].y) { swap_vec2(&verts[0], &verts[1]); swap_color(&colors[0], &colors[1]); }
if (verts[0].y > verts[2].y) { swap_vec2(&verts[0], &verts[2]); swap_color(&colors[0], &colors[2]); }
if (verts[1].y > verts[2].y) { swap_vec2(&verts[1], &verts[2]); swap_color(&colors[1], &colors[2]); }
- // Calculate edge slopes
- dx01 = (verts[1].x - verts[0].x) / (float)(verts[1].y - verts[0].y + 1);
- dx02 = (verts[2].x - verts[0].x) / (float)(verts[2].y - verts[0].y + 1);
- dx12 = (verts[2].x - verts[1].x) / (float)(verts[2].y - verts[1].y + 1);
+ // Calculate edge slopes using fixed-point arithmetic
+ i32 dx01 = ((verts[1].x - verts[0].x) << 16) / (verts[1].y - verts[0].y + 1);
+ i32 dx02 = ((verts[2].x - verts[0].x) << 16) / (verts[2].y - verts[0].y + 1);
+ i32 dx12 = ((verts[2].x - verts[1].x) << 16) / (verts[2].y - verts[1].y + 1);
- dr01 = (colors[1].r - colors[0].r) / (float)(verts[1].y - verts[0].y + 1);
- dg01 = (colors[1].g - colors[0].g) / (float)(verts[1].y - verts[0].y + 1);
- db01 = (colors[1].b - colors[0].b) / (float)(verts[1].y - verts[0].y + 1);
+ // Calculate color slopes using fixed-point arithmetic
+ i32 dr01 = ((colors[1].r - colors[0].r) << 16) / (verts[1].y - verts[0].y + 1);
+ i32 dg01 = ((colors[1].g - colors[0].g) << 16) / (verts[1].y - verts[0].y + 1);
+ i32 db01 = ((colors[1].b - colors[0].b) << 16) / (verts[1].y - verts[0].y + 1);
- dr02 = (colors[2].r - colors[0].r) / (float)(verts[2].y - verts[0].y + 1);
- dg02 = (colors[2].g - colors[0].g) / (float)(verts[2].y - verts[0].y + 1);
- db02 = (colors[2].b - colors[0].b) / (float)(verts[2].y - verts[0].y + 1);
+ i32 dr02 = ((colors[2].r - colors[0].r) << 16) / (verts[2].y - verts[0].y + 1);
+ i32 dg02 = ((colors[2].g - colors[0].g) << 16) / (verts[2].y - verts[0].y + 1);
+ i32 db02 = ((colors[2].b - colors[0].b) << 16) / (verts[2].y - verts[0].y + 1);
- dr12 = (colors[2].r - colors[1].r) / (float)(verts[2].y - verts[1].y + 1);
- dg12 = (colors[2].g - colors[1].g) / (float)(verts[2].y - verts[1].y + 1);
- db12 = (colors[2].b - colors[1].b) / (float)(verts[2].y - verts[1].y + 1);
+ i32 dr12 = ((colors[2].r - colors[1].r) << 16) / (verts[2].y - verts[1].y + 1);
+ i32 dg12 = ((colors[2].g - colors[1].g) << 16) / (verts[2].y - verts[1].y + 1);
+ i32 db12 = ((colors[2].b - colors[1].b) << 16) / (verts[2].y - verts[1].y + 1);
// Rasterize top part
- xL = verts[0].x, rL = colors[0].r, gL = colors[0].g, bL = colors[0].b;
- xR = verts[0].x, rR = colors[0].r, gR = colors[0].g, bR = colors[0].b;
+ i32 xL = verts[0].x << 16, rL = colors[0].r << 16, gL = colors[0].g << 16, bL = colors[0].b << 16;
+ i32 xR = verts[0].x << 16, rR = colors[0].r << 16, gR = colors[0].g << 16, bR = colors[0].b << 16;
- for (int y = verts[0].y; y < verts[1].y; y++) {
- draw_scanline(ren, y, (int)xL, (C){rL, gL, bL}, (int)xR, (C){rR, gR, bR});
+ for (i32 y = verts[0].y; y < verts[1].y; y++) {
+ draw_scanline(ren, y, xL >> 16, (C){rL >> 16, gL >> 16, bL >> 16},
+ xR >> 16, (C){rR >> 16, gR >> 16, bR >> 16});
xL += dx01; rL += dr01; gL += dg01; bL += db01;
xR += dx02; rR += dr02; gR += dg02; bR += db02;
}
// Rasterize bottom part
- xL = verts[1].x, rL = colors[1].r, gL = colors[1].g, bL = colors[1].b;
- for (int y = verts[1].y; y < verts[2].y; y++) {
- draw_scanline(ren, y, (int)xL, (C){rL, gL, bL}, (int)xR, (C){rR, gR, bR});
+ xL = verts[1].x << 16, rL = colors[1].r << 16, gL = colors[1].g << 16, bL = colors[1].b << 16;
+ for (i32 y = verts[1].y; y < verts[2].y; y++) {
+ draw_scanline(ren, y, xL >> 16, (C){rL >> 16, gL >> 16, bL >> 16},
+ xR >> 16, (C){rR >> 16, gR >> 16, bR >> 16});
xL += dx12; rL += dr12; gL += dg12; bL += db12;
xR += dx02; rR += dr02; gR += dg02; bR += db02;
}
@@ -224,28 +242,25 @@ REN_push_quad(REN* ren, ivec2 verts[4], C colors[4])
REN_flush(ren);
+ // First triangle: vertices 0,1,2
for (i = 0; i < 3; i++) {
ren->verts[ren->nvertices] = verts[i];
ren->colors[ren->nvertices] = colors[i];
ren->nvertices++;
}
-
for (i = 1; i < 4; i++) {
ren->verts[ren->nvertices] = verts[i];
ren->colors[ren->nvertices] = colors[i];
ren->nvertices++;
}
- int a;
}
void
REN_flush(REN* ren) {
- u32 i;
- if (ren->nvertices > 0) { // Always draw whatever is in the buffer
- for (i = 0; i < ren->nvertices; i += 3)
- REN_triangle(ren, ren->verts + i, ren->colors + i);
- ren->nvertices = 0; // Reset buffer for next frame
- }
+ u32 i;
+ for (i = 0; i < ren->nvertices; i += 3)
+ REN_triangle(ren, ren->verts + i, ren->colors + i);
+ ren->nvertices = 0; // Reset buffer for next frame
}
void
@@ -258,6 +273,11 @@ REN_draw(REN* ren)
void
REN_display(REN* ren)
{
+ // Flush any remaining vertices before displaying
+ if (ren->nvertices > 0) {
+ REN_flush(ren);
+ }
+
REN_draw(ren);
SDL_RenderPresent(ren->renderer);
}
diff --git a/src/sr.h b/src/sr.h
@@ -6,8 +6,8 @@
#define W 640
#define H 480
-#define WIN_W 640
-#define WIN_H 480
+#define WIN_W 1280
+#define WIN_H 960
#define VERTEX_BUFFER_LEN 64*1024
#define VEC2I_SWAP(x, y) { ivec2 temp = x; x = y; y = temp; }