summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkkard2 <[email protected]>2025-09-07 16:21:10 +0200
committerkkard2 <[email protected]>2025-09-07 16:21:10 +0200
commit704445e1c469d7856f0e19d30d68cb1b5cecfb83 (patch)
tree3699fc836ca77824e24be26ad794e88c837fe865
parent2e3a8b6b9b7c69a05b36286ef11bf4945e79c5b3 (diff)
make triangle code more performant
-rw-r--r--run.bat2
-rw-r--r--sponge.h90
2 files changed, 47 insertions, 45 deletions
diff --git a/run.bat b/run.bat
index 0ec23a4..f58c538 100644
--- a/run.bat
+++ b/run.bat
@@ -1,4 +1,4 @@
del out.exe
del *.obj
-cl platform_win32.c "examples\%1" user32.lib gdi32.lib /W3 /Fe:out.exe /Z7
+cl platform_win32.c "examples\%1" user32.lib gdi32.lib /W3 /Fe:out.exe /Z7 /O2
out.exe
diff --git a/sponge.h b/sponge.h
index 1c13a9c..07d8533 100644
--- a/sponge.h
+++ b/sponge.h
@@ -39,54 +39,55 @@ void sponge_draw_triangle_col3(
#define SPONGE__MIN(x, y) ((x) < (y) ? (x) : (y))
#define SPONGE__MAX(x, y) ((x) > (y) ? (x) : (y))
-// TODO(kard): these color functions will probably be public in the future
+typedef struct {
+ float a;
+ float r;
+ float g;
+ float b;
+} sponge__ColorF;
-void sponge__col_unpack(uint32_t col, uint32_t *a, uint32_t *r, uint32_t *g, uint32_t *b) {
- *b = col & 0xFF;
+sponge__ColorF sponge__colf_unpack(uint32_t col) {
+ uint32_t b = col & 0xFF;
col = col >> 8;
- *g = col & 0xFF;
+ uint32_t g = col & 0xFF;
col = col >> 8;
- *r = col & 0xFF;
+ uint32_t r = col & 0xFF;
col = col >> 8;
- *a = col & 0xFF;
+ uint32_t a = col & 0xFF;
+ sponge__ColorF result = { .a = (float)a, .r = (float)r, .g = (float)g, .b = (float)b };
+ return result;
}
-uint32_t sponge__col_pack(uint32_t a, uint32_t r, uint32_t g, uint32_t b) {
+uint32_t sponge__colf_pack(sponge__ColorF color) {
uint32_t result = 0;
- result |= SPONGE__CLAMP(a, 0x00, 0xFF);
+ result |= SPONGE__CLAMP((uint32_t)color.a, 0x00, 0xFF);
result = result << 8;
- result |= SPONGE__CLAMP(r, 0x00, 0xFF);
+ result |= SPONGE__CLAMP((uint32_t)color.r, 0x00, 0xFF);
result = result << 8;
- result |= SPONGE__CLAMP(g, 0x00, 0xFF);
+ result |= SPONGE__CLAMP((uint32_t)color.g, 0x00, 0xFF);
result = result << 8;
- result |= SPONGE__CLAMP(b, 0x00, 0xFF);
+ result |= SPONGE__CLAMP((uint32_t)color.b, 0x00, 0xFF);
return result;
}
-uint32_t sponge__col_add(uint32_t col0, uint32_t col1) {
- uint32_t a0, r0, g0, b0;
- uint32_t a1, r1, g1, b1;
- sponge__col_unpack(col0, &a0, &r0, &g0, &b0);
- sponge__col_unpack(col1, &a1, &r1, &g1, &b1);
- uint32_t a = a0 + a1;
- uint32_t r = r0 + r1;
- uint32_t g = g0 + g1;
- uint32_t b = b0 + b1;
- return sponge__col_pack(a, r, g, b);
+sponge__ColorF sponge__colf_mul(sponge__ColorF color, float f) {
+ color.a *= f;
+ color.r *= f;
+ color.g *= f;
+ color.b *= f;
+ return color;
}
-uint32_t sponge__col_mul(uint32_t col, float t) {
- float b = (float)(col & 0xFF) * t;
- col = col >> 8;
- float g = (float)(col & 0xFF) * t;
- col = col >> 8;
- float r = (float)(col & 0xFF) * t;
- col = col >> 8;
- float a = (float)(col & 0xFF) * t;
-
- return sponge__col_pack((uint32_t)a, (uint32_t)r, (uint32_t)g, (uint32_t)b);
+sponge__ColorF sponge__colf_add(sponge__ColorF color0, sponge__ColorF color1) {
+ sponge__ColorF result;
+ result.a = color0.a + color1.a;
+ result.r = color0.r + color1.r;
+ result.g = color0.g + color1.g;
+ result.b = color0.b + color1.b;
+ return result;
}
+
typedef struct {
sponge_Vec2 v0;
sponge_Vec2 v1;
@@ -94,11 +95,11 @@ typedef struct {
float d01;
float d11;
float denom;
-} sponge_BarycentricContext;
+} sponge__BarycentricContext;
// TODO(kard): measure if caching this makes sense
-sponge_BarycentricContext sponge__barycentric_init(sponge_Vec2 t0, sponge_Vec2 t1, sponge_Vec2 t2) {
- sponge_BarycentricContext result;
+sponge__BarycentricContext sponge__barycentric_init(sponge_Vec2 t0, sponge_Vec2 t1, sponge_Vec2 t2) {
+ sponge__BarycentricContext result;
result.v0 = sponge_sub2(t1, t0);
result.v1 = sponge_sub2(t2, t0);
result.d00 = sponge_dot2(result.v0, result.v0);
@@ -109,7 +110,7 @@ sponge_BarycentricContext sponge__barycentric_init(sponge_Vec2 t0, sponge_Vec2 t
}
void sponge__barycentric(
- sponge_BarycentricContext ctx, sponge_Vec2 p,
+ sponge__BarycentricContext ctx, sponge_Vec2 p,
sponge_Vec2 t0,
sponge_Vec2 t1,
sponge_Vec2 t2,
@@ -220,10 +221,14 @@ void sponge_draw_triangle_col3(
uint32_t min_y = (uint32_t)SPONGE__CLAMP(smin_y, 0, (int32_t)(c.height - 1));
uint32_t max_y = (uint32_t)SPONGE__CLAMP(smax_y, 0, (int32_t)(c.height - 1));
- sponge_BarycentricContext ctx = sponge__barycentric_init(t0, t1, t2);
+ sponge__BarycentricContext ctx = sponge__barycentric_init(t0, t1, t2);
uint32_t *row = c.pixels + (min_y * c.stride);
+ sponge__ColorF color0f = sponge__colf_unpack(color0);
+ sponge__ColorF color1f = sponge__colf_unpack(color1);
+ sponge__ColorF color2f = sponge__colf_unpack(color2);
+
for (uint32_t y = min_y; y <= max_y; y++, row += c.stride) {
for (uint32_t x = x0; x <= max_x; x++) {
float u, v, w;
@@ -232,14 +237,11 @@ void sponge_draw_triangle_col3(
if (u > 0.0f && v > 0.0f && w > 0.0f)
{
// TODO(kard): make more robust, this probably has a lot of off by 1 errors
- uint32_t c0 = sponge__col_mul(color0, u);
- uint32_t c1 = sponge__col_mul(color1, v);
- uint32_t c2 = sponge__col_mul(color2, w);
- uint32_t result = 0;
- result = sponge__col_add(result, c0);
- result = sponge__col_add(result, c1);
- result = sponge__col_add(result, c2);
- row[x] = result;
+ sponge__ColorF c0 = sponge__colf_mul(color0f, u);
+ sponge__ColorF c1 = sponge__colf_mul(color1f, v);
+ sponge__ColorF c2 = sponge__colf_mul(color2f, w);
+ sponge__ColorF result = sponge__colf_add(c0, sponge__colf_add(c1, c2));
+ row[x] = sponge__colf_pack(result);
}
}
}