OPTIMIZATION: Bytecode engine

2026-05-15 18:58:40 +00:00 · 2026-04-02 20:21:58 +02:00 · 2026-04-02 20:21:58 +02:00 · e60f232812
commit e60f232812
parent 3169ec51cb
1 changed files with 184 additions and 45 deletions
--- a/src/userland/gui/grapher.c
+++ b/src/userland/gui/grapher.c
@ -143,6 +143,23 @@ enum {
 typedef struct { int type; double value; } Token;
 typedef struct { int type; double value; int var_idx; int left, right; } ASTNode;

+enum {
+    OP_PUSH_NUM, OP_PUSH_VAR,
+    OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_POW,
+    OP_NEG,
+    OP_SIN, OP_COS, OP_TAN, OP_SQRT, OP_ABS, OP_LOG
+};
+
+typedef struct {
+    int op;
+    double val;
+    int var_idx;
+} Instruction;
+
+#define MAX_BC_SIZE 256
+static Instruction lhs_bc[MAX_BC_SIZE], rhs_bc[MAX_BC_SIZE];
+static int lhs_bc_len = 0, rhs_bc_len = 0;
+


 static bool is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
@ -329,6 +346,73 @@ static double eval_ast(ASTNode *n, int idx, double x, double y, double z) {
    return 0;
 }

+// =================
+// Bytecode Compiler
+// =================
+static int compile_ast(ASTNode *nodes, int idx, Instruction *bc, int *len) {
+    if (idx < 0 || *len >= MAX_BC_SIZE) return 0;
+    ASTNode *n = &nodes[idx];
+    
+    // Post-order traversal for stack machine
+    compile_ast(nodes, n->left, bc, len);
+    compile_ast(nodes, n->right, bc, len);
+    
+    Instruction *inst = &bc[(*len)++];
+    switch (n->type) {
+        case NODE_NUM: inst->op = OP_PUSH_NUM; inst->val = n->value; break;
+        case NODE_VAR: inst->op = OP_PUSH_VAR; inst->var_idx = n->var_idx; break;
+        case NODE_ADD: inst->op = OP_ADD; break;
+        case NODE_SUB: inst->op = OP_SUB; break;
+        case NODE_MUL: inst->op = OP_MUL; break;
+        case NODE_DIV: inst->op = OP_DIV; break;
+        case NODE_POW: inst->op = OP_POW; break;
+        case NODE_NEG: inst->op = OP_NEG; break;
+        case NODE_SIN: inst->op = OP_SIN; break;
+        case NODE_COS: inst->op = OP_COS; break;
+        case NODE_TAN: inst->op = OP_TAN; break;
+        case NODE_SQRT: inst->op = OP_SQRT; break;
+        case NODE_ABS: inst->op = OP_ABS; break;
+        case NODE_LOG: inst->op = OP_LOG; break;
+    }
+    return 1;
+}
+
+static double run_bc(Instruction *bc, int len, double x, double y, double z) {
+    if (len == 0) return 0;
+    double stack[32];
+    int sp = 0;
+    
+    for (int i = 0; i < len; i++) {
+        Instruction *inst = &bc[i];
+        switch (inst->op) {
+            case OP_PUSH_NUM: stack[sp++] = inst->val; break;
+            case OP_PUSH_VAR: stack[sp++] = (inst->var_idx == 0 ? x : inst->var_idx == 1 ? y : z); break;
+            case OP_ADD: { double b = stack[--sp]; double a = stack[--sp]; stack[sp++] = a + b; break; }
+            case OP_SUB: { double b = stack[--sp]; double a = stack[--sp]; stack[sp++] = a - b; break; }
+            case OP_MUL: { double b = stack[--sp]; double a = stack[--sp]; stack[sp++] = a * b; break; }
+            case OP_DIV: { 
+                double b = stack[--sp]; double a = stack[--sp]; 
+                stack[sp++] = (my_fabs(b) < 1e-15) ? 1e15 : a / b; break; 
+            }
+            case OP_POW: {
+                double b = stack[--sp]; double a = stack[--sp];
+                if (b == 2.0) stack[sp++] = a * a;
+                else if (b == 3.0) stack[sp++] = a * a * a;
+                else stack[sp++] = my_pow(a, b);
+                break;
+            }
+            case OP_NEG: stack[sp-1] = -stack[sp-1]; break;
+            case OP_SIN: stack[sp-1] = my_sin(stack[sp-1]); break;
+            case OP_COS: stack[sp-1] = my_cos(stack[sp-1]); break;
+            case OP_TAN: stack[sp-1] = my_tan(stack[sp-1]); break;
+            case OP_SQRT: stack[sp-1] = my_sqrt(stack[sp-1]); break;
+            case OP_ABS:  stack[sp-1] = my_fabs(stack[sp-1]); break;
+            case OP_LOG:  stack[sp-1] = my_ln(stack[sp-1]); break;
+        }
+    }
+    return sp > 0 ? stack[sp-1] : 0;
+}
+
 // Check which variables an AST subtree uses
 static void ast_find_vars(ASTNode *n, int idx, bool *has_x, bool *has_y, bool *has_z) {
    if (idx < 0) return;
@ -490,11 +574,11 @@ static void project_3d(double px, double py, double pz, int *sx, int *sy) {
 // Evaluate the implicit function: f(x,y,z) = LHS - RHS
 // ====================================================
 static double eval_implicit(double x, double y, double z) {
-    return eval_ast(lhs_nodes, lhs_root, x, y, z) - eval_ast(rhs_nodes, rhs_root, x, y, z);
+    return run_bc(lhs_bc, lhs_bc_len, x, y, z) - run_bc(rhs_bc, rhs_bc_len, x, y, z);
 }

 static double eval_rhs_only(double x, double y, double z) {
-    return eval_ast(rhs_nodes, rhs_root, x, y, z);
+    return run_bc(rhs_bc, rhs_bc_len, x, y, z);
 }

 // ===========================
@ -575,6 +659,11 @@ static void parse_equation(void) {
    }

    eq_valid = true;
+
+    // Compile to bytecode
+    lhs_bc_len = 0; rhs_bc_len = 0;
+    if (lhs_root >= 0) compile_ast(lhs_nodes, lhs_root, lhs_bc, &lhs_bc_len);
+    if (rhs_root >= 0) compile_ast(rhs_nodes, rhs_root, rhs_bc, &rhs_bc_len);
 }

 // =========
@ -769,15 +858,21 @@ static void render_3d_axes(void) {
    project_3d(0, 0, range_3d, &ax, &ay); gfb_line(ox, oy, ax, ay, 0xFF4444FF);
 }

-static void render_3d_explicit(void) {
-    double step = range_3d * 2.0 / (GRID_3D - 1);
-    double zmin = 1e30, zmax = -1e30;
-// why are you reading this lol
-    if (surface_needs_eval) {
-        for (int j = 0; j < GRID_3D; j++) {
+// =======================
+// Parallel Evaluation Job
+// =======================
+typedef struct {
+    int start_j, end_j;
+    double range;
+    double step;
+} eval_job_t;
+
+static void eval_3d_explicit_job(void *arg) {
+    eval_job_t *job = (eval_job_t *)arg;
+    for (int j = job->start_j; j < job->end_j; j++) {
        for (int i = 0; i < GRID_3D; i++) {
-                double wx = -range_3d + i * step;
-                double wy = -range_3d + j * step;
+            double wx = -job->range + i * job->step;
+            double wy = -job->range + j * job->step;
            double wz = eval_rhs_only(wx, wy, 0);
            surf_x[j][i] = wx;
            surf_y_3d[j][i] = wy;
@ -790,6 +885,66 @@ static void render_3d_explicit(void) {
    }
 }

+static void eval_3d_implicit_job(void *arg) {
+    eval_job_t *job = (eval_job_t *)arg;
+    int z_steps = 100;
+    double z_step = job->range * 2.0 / z_steps;
+
+    for (int j = job->start_j; j < job->end_j; j++) {
+        for (int i = 0; i < GRID_3D; i++) {
+            surf_v1[j][i] = surf_v2[j][i] = false;
+            double wx = -job->range + i * job->step;
+            double wy = -job->range + j * job->step;
+            surf_x[j][i] = wx;
+            surf_y_3d[j][i] = wy;
+
+            double prev_f = eval_implicit(wx, wy, -job->range);
+            int roots_found = 0;
+            for (int k = 1; k <= z_steps && roots_found < 2; k++) {
+                double zz = -job->range + k * z_step;
+                double cur_f = eval_implicit(wx, wy, zz);
+                if ((prev_f > 0) != (cur_f > 0) && my_fabs(prev_f) < 1e10 && my_fabs(cur_f) < 1e10) {
+                    double za = zz - z_step, zb = zz;
+                    for (int b = 0; b < 15; b++) {
+                        double zm = (za + zb) * 0.5;
+                        double fm = eval_implicit(wx, wy, zm);
+                        if ((prev_f > 0) != (fm > 0)) zb = zm; else { za = zm; prev_f = fm; }
+                    }
+                    if (roots_found == 0) {
+                        surf_z1[j][i] = (za + zb) * 0.5; surf_v1[j][i] = true;
+                    } else {
+                        surf_z2[j][i] = (za + zb) * 0.5; surf_v2[j][i] = true;
+                    }
+                    roots_found++;
+                }
+                prev_f = cur_f;
+            }
+        }
+    }
+}
+
+static void render_3d_explicit(void) {
+    double step = range_3d * 2.0 / (GRID_3D - 1);
+    double zmin = 1e30, zmax = -1e30;
+// why are you reading this lol
+    if (surface_needs_eval) {
+        int num_chunks = 4; // Parallelize into 4 chunks (matching typical core count)
+        eval_job_t jobs[4];
+        void *job_args[4];
+        int rows_per_chunk = GRID_3D / num_chunks;
+        
+        for (int c = 0; c < num_chunks; c++) {
+            jobs[c].start_j = c * rows_per_chunk;
+            jobs[c].end_j = (c == num_chunks - 1) ? GRID_3D : (c + 1) * rows_per_chunk;
+            jobs[c].range = range_3d;
+            jobs[c].step = step;
+            job_args[c] = &jobs[c];
+        }
+
+        extern void sys_parallel_run(void (*fn)(void*), void **args, int count);
+        sys_parallel_run(eval_3d_explicit_job, job_args, num_chunks);
+    }
+    
    // Compute min/max for coloring based on what's visible
    for (int j = 0; j < GRID_3D; j++) {
        for (int i = 0; i < GRID_3D; i++) {
@ -835,37 +990,21 @@ static void render_3d_implicit(void) {
    double zmin = 1e30, zmax = -1e30;

    if (surface_needs_eval) {
-        for (int j = 0; j < GRID_3D; j++) {
-            for (int i = 0; i < GRID_3D; i++) {
-                surf_v1[j][i] = surf_v2[j][i] = false;
-                double wx = -range_3d + i * step;
-                double wy = -range_3d + j * step;
-                surf_x[j][i] = wx;
-                surf_y_3d[j][i] = wy;
+        int num_chunks = 4;
+        eval_job_t jobs[4];
+        void *job_args[4];
+        int rows_per_chunk = GRID_3D / num_chunks;
        
-                double prev_f = eval_implicit(wx, wy, -range_3d);
-                int roots_found = 0;
-                for (int k = 1; k <= z_steps && roots_found < 2; k++) {
-                    double zz = -range_3d + k * z_step;
-                    double cur_f = eval_implicit(wx, wy, zz);
-                    if ((prev_f > 0) != (cur_f > 0) && my_fabs(prev_f) < 1e10 && my_fabs(cur_f) < 1e10) {
-                        double za = zz - z_step, zb = zz;
-                        for (int b = 0; b < 15; b++) { // High bisection iterations for precision
-                            double zm = (za + zb) * 0.5;
-                            double fm = eval_implicit(wx, wy, zm);
-                            if ((prev_f > 0) != (fm > 0)) zb = zm; else { za = zm; prev_f = fm; }
-                        }
-                        if (roots_found == 0) {
-                            surf_z1[j][i] = (za + zb) * 0.5; surf_v1[j][i] = true;
-                        } else {
-                            surf_z2[j][i] = (za + zb) * 0.5; surf_v2[j][i] = true;
-                        }
-                        roots_found++;
-                    }
-                    prev_f = cur_f;
-                }
-            }
+        for (int c = 0; c < num_chunks; c++) {
+            jobs[c].start_j = c * rows_per_chunk;
+            jobs[c].end_j = (c == num_chunks - 1) ? GRID_3D : (c + 1) * rows_per_chunk;
+            jobs[c].range = range_3d;
+            jobs[c].step = step;
+            job_args[c] = &jobs[c];
        }
+
+        extern void sys_parallel_run(void (*fn)(void*), void **args, int count);
+        sys_parallel_run(eval_3d_implicit_job, job_args, num_chunks);
    }
    
    // Compute min/max for coloring based on what's visible