diff --git a/kernel/core/stubs.c b/kernel/core/stubs.c
index 92acfa8..72c6543 100644
--- a/kernel/core/stubs.c
+++ b/kernel/core/stubs.c
@@ -53,6 +53,47 @@ static int parse_int(const char *s)
     return result * sign;
 }
 
+/* Performance tracking for chat sessions */
+typedef struct {
+    uint64_t last_prefill_us;
+    uint64_t last_decode_us;
+    uint64_t last_total_us;
+    int last_prompt_tokens;
+    int last_generated_tokens;
+    uint64_t session_total_tokens;
+    uint64_t session_total_time_us;
+    int session_messages;
+    bool valid;
+} chat_perf_t;
+
+static chat_perf_t g_chat_perf = {0};
+
+/* rdtsc for timing */
+#if defined(__x86_64__)
+static inline uint64_t chat_get_cycles(void) {
+    uint32_t lo, hi;
+    __asm__ volatile ("rdtsc" : "=a"(lo), "=d"(hi));
+    return ((uint64_t)hi << 32) | lo;
+}
+#elif defined(__aarch64__)
+static inline uint64_t chat_get_cycles(void) {
+    uint64_t val;
+    __asm__ volatile ("mrs %0, cntvct_el0" : "=r"(val));
+    return val;
+}
+#else
+static inline uint64_t chat_get_cycles(void) { return 0; }
+#endif
+
+/* Convert cycles to microseconds (approximate) */
+static inline uint64_t chat_cycles_to_us(uint64_t cycles) {
+#if defined(__aarch64__)
+    return cycles / 1000;  /* HVF uses 1GHz virtual timer */
+#else
+    return cycles / 2000;  /* Approximate for ~2GHz x86 */
+#endif
+}
+
 /* Command processor implementation */
 void command_processor_init(struct embodios_model *model)
 {
@@ -66,54 +107,189 @@ void command_processor_init(struct embodios_model *model)
 /* Enhanced command processing */
 void process_command(const char *command)
 {
+    /* Skip empty commands */
+    if (!command || command[0] == '\0') {
+        return;
+    }
+
     /* Basic built-in commands */
-    if (strcmp(command, "help") == 0) {
-        console_printf("\nEMBODIOS Commands:\n");
-        console_printf("================\n\n");
-        console_printf("AI:\n");
-        console_printf("  chat <message>  - Chat with the AI model\n");
-        console_printf("  benchmark       - Run inference benchmark\n");
-        console_printf("  deterministic   - Control deterministic timing mode\n");
+    if (strcmp(command, "help") == 0 || strcmp(command, "?") == 0) {
+        console_printf("\n");
+        console_printf(" ╔════════════════════════════════════════╗\n");
+        console_printf(" ║         EMBODIOS Commands              ║\n");
+        console_printf(" ╚════════════════════════════════════════╝\n");
+        console_printf("\n");
+        console_printf(" [AI Chat]\n");
+        console_printf("   talk             Enter interactive chat mode\n");
+        console_printf("   chat <message>   Single message chat\n");
+        console_printf("   perf             Show last chat performance\n");
+        console_printf("   status           Show AI model status\n");
+        console_printf("\n");
+        console_printf(" [System]\n");
+        console_printf("   mem              Show memory usage\n");
+        console_printf("   lspci            List PCI devices\n");
+        console_printf("   reboot           Reboot system\n");
+        console_printf("\n");
+        console_printf(" Type 'help all' for advanced commands.\n");
+        console_printf(" Type 'help ai' for AI-specific commands.\n");
+        console_printf("\n");
+    } else if (strcmp(command, "help ai") == 0) {
+        console_printf("\n");
+        console_printf(" [Interactive Chat]\n");
+        console_printf("   talk             Enter chat mode (type 'exit' to leave)\n");
+        console_printf("   chat <msg>       Single message (for scripting)\n");
+        console_printf("\n");
+        console_printf(" [Performance]\n");
+        console_printf("   perf             Show last chat timing stats\n");
+        console_printf("   benchmark        Full inference benchmark\n");
+        console_printf("   deterministic    Control real-time timing mode\n");
+        console_printf("\n");
+        console_printf(" [Status]\n");
+        console_printf("   status           Show model/tokenizer/engine status\n");
+        console_printf("   models           List all loaded models\n");
+        console_printf("   model            Show active model details\n");
+        console_printf("\n");
+        console_printf(" [Advanced]\n");
+        console_printf("   stream <prompt>  Low-level streaming inference\n");
+        console_printf("   gguf <prompt>    GGUF format inference\n");
+        console_printf("   bpeinit/bpetest  Tokenizer init/test\n");
+        console_printf("   loadmodel        Load model from disk\n");
+        console_printf("\n");
+    } else if (strcmp(command, "help all") == 0 || strcmp(command, "help advanced") == 0) {
+        console_printf("\n");
+        console_printf(" [All Commands]\n\n");
+        console_printf(" Model Management:\n");
+        console_printf("   models             List all loaded models\n");
+        console_printf("   model              Show active model info\n");
+        console_printf("   model load <name>  Load embedded model by name\n");
+        console_printf("   model switch <id>  Switch to model by ID\n");
+        console_printf("   model unload <id>  Unload model by ID\n");
         console_printf("\n");
-        console_printf("System:\n");
-        console_printf("  help            - Show this help\n");
-        console_printf("  mem             - Show memory info\n");
-        console_printf("  heap            - Show heap stats\n");
-        console_printf("  memtest         - Run memory stress test\n");
-        console_printf("  lspci           - List PCI devices\n");
-        console_printf("  reboot          - Reboot system\n");
+        console_printf(" AI Inference:\n");
+        console_printf("   talk               Interactive chat session\n");
+        console_printf("   chat <msg>         Single message chat\n");
+        console_printf("   perf               Show last chat performance\n");
+        console_printf("   benchmark          Full inference benchmark\n");
+        console_printf("   stream <prompt>    Low-level streaming inference\n");
+        console_printf("   gguf <prompt>      GGUF format inference\n");
+        console_printf("   deterministic      Real-time timing control\n");
         console_printf("\n");
-        console_printf("Type 'help advanced' for all commands.\n");
-    } else if (strcmp(command, "help advanced") == 0) {
-        console_printf("\nAdvanced Commands:\n");
-        console_printf("==================\n\n");
-        console_printf("Model Management:\n");
-        console_printf("  models, model, model load/switch/unload\n");
+        console_printf(" Hardware:\n");
+        console_printf("   dmatest/dmastats   DMA subsystem tests/stats\n");
+        console_printf("   pcitest/pcistats   PCI subsystem tests/stats\n");
+        console_printf("   lspci              List PCI devices\n");
         console_printf("\n");
-        console_printf("AI Inference (legacy):\n");
-        console_printf("  ai, infer, gguf, stream, ggufinit, streaminit, bpeinit, bpetest\n");
+        console_printf(" Storage:\n");
+        console_printf("   blkinfo/blkdevs    Block device info\n");
+        console_printf("   blktest/blkperf    Block device tests\n");
+        console_printf("   blkread <sec> [n]  Read raw sectors\n");
+        console_printf("   loadmodel          Load model from disk\n");
         console_printf("\n");
-        console_printf("Hardware:\n");
-        console_printf("  dmatest, dmastats, pcitest, pcistats\n");
+        console_printf(" Network:\n");
+        console_printf("   net/netinfo        Network configuration\n");
+        console_printf("   nettest            Network self-tests\n");
+        console_printf("   ping <ip>          Ping remote host\n");
         console_printf("\n");
-        console_printf("Storage:\n");
-        console_printf("  blkinfo, blktest, blkperf, blkread, blkdevs, loadmodel, loadtiny\n");
+        console_printf(" Industrial:\n");
+        console_printf("   modbustest         Modbus TCP test\n");
+        console_printf("   ethercattest       EtherCAT frame test\n");
+        console_printf("   timingtest         Real-time timing verification\n");
         console_printf("\n");
-        console_printf("Network:\n");
-        console_printf("  net, netinfo, nettest, ping <ip>\n");
+        console_printf(" Testing:\n");
+        console_printf("   memtest/locktest   Memory and locking tests\n");
+        console_printf("   quanttest/bench    Quantization tests\n");
+        console_printf("   tvmload/run/bench  TVM runtime tests\n");
         console_printf("\n");
-        console_printf("Industrial Protocols:\n");
-        console_printf("  modbustest      - Run Modbus TCP integration test\n");
-        console_printf("  ethercattest    - Run EtherCAT frame processing test\n");
-        console_printf("  timingtest      - Verify industrial timing requirements\n");
+    } else if (strcmp(command, "status") == 0) {
+        /* System and AI status overview */
+        extern bool streaming_inference_is_ready(void);
+        extern int gguf_model_embedded(void);
+        extern const struct gguf_model_arch *gguf_parser_get_arch(void);
+        extern const uint8_t *get_embedded_gguf_model(size_t *out_size);
+
         console_printf("\n");
-        console_printf("Testing:\n");
-        console_printf("  locktest, quanttest, quantbench, benchgguf, validate\n");
+        console_printf(" ╔════════════════════════════════════════╗\n");
+        console_printf(" ║           EMBODIOS Status              ║\n");
+        console_printf(" ╚════════════════════════════════════════╝\n");
         console_printf("\n");
-        console_printf("TVM Runtime:\n");
-        console_printf("  tvmload, tvmrun, tvmbench\n");
-    } else if (strncmp(command, "chat ", 5) == 0) {
-        /* Simple unified chat command - auto-initializes everything */
+
+        /* AI Model Status */
+        console_printf(" [AI Model]\n");
+        if (gguf_model_embedded()) {
+            size_t model_size = 0;
+            get_embedded_gguf_model(&model_size);
+            console_printf("   Embedded Model:  Yes (%zu MB)\n", model_size / (1024*1024));
+        } else {
+            console_printf("   Embedded Model:  No\n");
+        }
+
+        if (gguf_parser_get_arch()) {
+            console_printf("   Model Loaded:    Yes\n");
+        } else {
+            console_printf("   Model Loaded:    No\n");
+        }
+
+        if (bpe_tokenizer_is_initialized()) {
+            console_printf("   Tokenizer:       Ready\n");
+        } else {
+            console_printf("   Tokenizer:       Not initialized\n");
+        }
+
+        if (streaming_inference_is_ready()) {
+            console_printf("   Inference:       Ready\n");
+        } else {
+            console_printf("   Inference:       Not initialized\n");
+        }
+
+        console_printf("\n");
+        console_printf(" [Memory]\n");
+        heap_stats();
+        console_printf("\n");
+
+        /* Quick tip */
+        if (!streaming_inference_is_ready()) {
+            console_printf(" Tip: Run 'chat <message>' to auto-initialize and start chatting.\n\n");
+        } else {
+            console_printf(" Tip: AI ready! Try 'chat Hello, world!'\n\n");
+        }
+
+    } else if (strcmp(command, "perf") == 0) {
+        /* Show performance stats from last chat */
+        if (!g_chat_perf.valid) {
+            console_printf("\n No chat performance data yet. Run 'chat' or 'talk' first.\n\n");
+            return;
+        }
+
+        console_printf("\n");
+        console_printf(" ╔════════════════════════════════════════╗\n");
+        console_printf(" ║        Chat Performance Stats          ║\n");
+        console_printf(" ╚════════════════════════════════════════╝\n");
+        console_printf("\n");
+        console_printf(" [Last Message]\n");
+        console_printf("   Prompt tokens:     %d\n", g_chat_perf.last_prompt_tokens);
+        console_printf("   Generated tokens:  %d\n", g_chat_perf.last_generated_tokens);
+        console_printf("   Total time:        %llu ms\n", (unsigned long long)(g_chat_perf.last_total_us / 1000));
+
+        if (g_chat_perf.last_generated_tokens > 0) {
+            uint64_t tokens_per_sec = (g_chat_perf.last_generated_tokens * 1000000ULL) / g_chat_perf.last_total_us;
+            console_printf("   Throughput:        %llu tok/s\n", (unsigned long long)tokens_per_sec);
+        }
+
+        if (g_chat_perf.session_messages > 1) {
+            console_printf("\n [Session Totals]\n");
+            console_printf("   Messages:          %d\n", g_chat_perf.session_messages);
+            console_printf("   Total tokens:      %llu\n", (unsigned long long)g_chat_perf.session_total_tokens);
+            console_printf("   Total time:        %llu ms\n", (unsigned long long)(g_chat_perf.session_total_time_us / 1000));
+            if (g_chat_perf.session_total_tokens > 0) {
+                uint64_t avg_tps = (g_chat_perf.session_total_tokens * 1000000ULL) / g_chat_perf.session_total_time_us;
+                console_printf("   Avg throughput:    %llu tok/s\n", (unsigned long long)avg_tps);
+            }
+        }
+
+        console_printf("\n Tip: Run 'benchmark' for detailed timing breakdown.\n\n");
+
+    } else if (strcmp(command, "talk") == 0) {
+        /* Interactive chat mode */
         extern int streaming_inference_init(bool preallocate);
         extern bool streaming_inference_is_ready(void);
         extern int streaming_inference_generate(const int *, int, int *, int);
@@ -123,45 +299,205 @@ void process_command(const char *command)
         extern int gguf_model_embedded(void);
         extern const struct gguf_model_arch *gguf_parser_get_arch(void);
 
-        const char *prompt = command + 5;
-        while (*prompt == ' ') prompt++; /* Skip whitespace */
+        console_printf("\n");
+        console_printf(" ╔════════════════════════════════════════╗\n");
+        console_printf(" ║         EMBODIOS Chat Mode             ║\n");
+        console_printf(" ╚════════════════════════════════════════╝\n");
+        console_printf("\n");
+        console_printf(" Type your message and press Enter.\n");
+        console_printf(" Commands: 'exit' to leave, 'perf' for stats\n");
+        console_printf("\n");
+
+        /* Auto-initialize everything */
+        if (!gguf_parser_get_arch()) {
+            if (!gguf_model_embedded()) {
+                console_printf(" Error: No AI model available.\n\n");
+                return;
+            }
+            size_t gguf_size = 0;
+            const uint8_t *gguf_data = get_embedded_gguf_model(&gguf_size);
+            if (!gguf_data || gguf_size == 0) {
+                console_printf(" Error: Failed to access model data.\n\n");
+                return;
+            }
+            console_printf(" Loading model (%zu MB)...", gguf_size / (1024*1024));
+            console_flush();
+            if (gguf_load_model((void *)gguf_data, gguf_size) < 0) {
+                console_printf(" failed.\n\n");
+                return;
+            }
+            console_printf(" done.\n");
+        }
+
+        if (!bpe_tokenizer_is_initialized()) {
+            console_printf(" Initializing tokenizer...");
+            console_flush();
+            bpe_tokenizer_init();
+            console_printf(" done.\n");
+        }
+
+        if (!streaming_inference_is_ready()) {
+            console_printf(" Initializing inference engine...");
+            console_flush();
+            if (streaming_inference_init(false) != 0) {
+                console_printf(" failed.\n\n");
+                return;
+            }
+            console_printf(" done.\n");
+        }
+
+        console_printf("\n Ready! Start chatting.\n\n");
+
+        /* Reset session stats */
+        g_chat_perf.session_messages = 0;
+        g_chat_perf.session_total_tokens = 0;
+        g_chat_perf.session_total_time_us = 0;
+
+        /* Chat loop */
+        char input_buf[256];
+        while (1) {
+            console_printf("You> ");
+            console_readline(input_buf, sizeof(input_buf));
+
+            /* Check for exit commands */
+            if (strcmp(input_buf, "exit") == 0 || strcmp(input_buf, "quit") == 0 ||
+                strcmp(input_buf, "q") == 0 || strcmp(input_buf, "/exit") == 0) {
+                break;
+            }
+
+            /* Show perf inline */
+            if (strcmp(input_buf, "perf") == 0 || strcmp(input_buf, "/perf") == 0) {
+                if (g_chat_perf.valid && g_chat_perf.session_messages > 0) {
+                    console_printf("\n [Session: %d msgs, %llu tokens, %llu tok/s avg]\n\n",
+                        g_chat_perf.session_messages,
+                        (unsigned long long)g_chat_perf.session_total_tokens,
+                        g_chat_perf.session_total_time_us > 0 ?
+                            (unsigned long long)((g_chat_perf.session_total_tokens * 1000000ULL) / g_chat_perf.session_total_time_us) : 0);
+                } else {
+                    console_printf("\n [No stats yet]\n\n");
+                }
+                continue;
+            }
+
+            /* Skip empty input */
+            if (input_buf[0] == '\0') continue;
+
+            /* Tokenize */
+            int prompt_tokens[256];
+            int prompt_len = 0;
+            if (bpe_tokenizer_is_initialized()) {
+                prompt_len = bpe_tokenizer_encode(input_buf, prompt_tokens, 256, false, false);
+            }
+            if (prompt_len <= 0) {
+                prompt_tokens[0] = 1;
+                prompt_len = 1;
+            }
+
+            /* Generate with timing */
+            uint64_t start = chat_get_cycles();
+
+            int output_tokens[128];
+            int generated = streaming_inference_generate(prompt_tokens, prompt_len, output_tokens, 50);
+
+            uint64_t end = chat_get_cycles();
+            uint64_t elapsed_us = chat_cycles_to_us(end - start);
+
+            /* Display response */
+            console_printf("\nAI>  ");
+            if (generated > 0) {
+                char decoded[512];
+                int len = bpe_tokenizer_decode(output_tokens, generated, decoded, sizeof(decoded));
+                if (len > 0) {
+                    console_printf("%s", decoded);
+                } else {
+                    for (int i = 0; i < generated; i++) {
+                        const char *tok = streaming_inference_get_token(output_tokens[i]);
+                        if (tok) console_printf("%s", tok);
+                    }
+                }
+            } else {
+                console_printf("(no response)");
+            }
+            console_printf("\n\n");
+
+            /* Update perf stats */
+            g_chat_perf.last_prompt_tokens = prompt_len;
+            g_chat_perf.last_generated_tokens = generated;
+            g_chat_perf.last_total_us = elapsed_us;
+            g_chat_perf.session_messages++;
+            g_chat_perf.session_total_tokens += generated;
+            g_chat_perf.session_total_time_us += elapsed_us;
+            g_chat_perf.valid = true;
+        }
+
+        /* Show session summary */
+        console_printf("\n");
+        console_printf(" ────────────────────────────────────────\n");
+        console_printf(" Session ended.\n");
+        if (g_chat_perf.session_messages > 0) {
+            console_printf(" Messages: %d | Tokens: %llu | Time: %llu ms\n",
+                g_chat_perf.session_messages,
+                (unsigned long long)g_chat_perf.session_total_tokens,
+                (unsigned long long)(g_chat_perf.session_total_time_us / 1000));
+        }
+        console_printf(" ────────────────────────────────────────\n\n");
+
+    } else if (strncmp(command, "chat ", 5) == 0 || strcmp(command, "chat") == 0) {
+        /* Single message chat command */
+        extern int streaming_inference_init(bool preallocate);
+        extern bool streaming_inference_is_ready(void);
+        extern int streaming_inference_generate(const int *, int, int *, int);
+        extern const char *streaming_inference_get_token(int);
+        extern const uint8_t *get_embedded_gguf_model(size_t *out_size);
+        extern int gguf_load_model(void *data, size_t size);
+        extern int gguf_model_embedded(void);
+        extern const struct gguf_model_arch *gguf_parser_get_arch(void);
+
+        const char *prompt = command + 4;
+        while (*prompt == ' ') prompt++;
 
         if (*prompt == '\0') {
-            console_printf("Usage: chat <your message>\n");
-            console_printf("Example: chat Hello, how are you?\n");
+            console_printf("\n");
+            console_printf(" Usage: chat <message>   (single message)\n");
+            console_printf("        talk             (interactive mode)\n");
+            console_printf("\n");
+            console_printf(" Example: chat Hello, how are you?\n\n");
             return;
         }
 
-        /* Auto-initialize: Load model if needed */
+        /* Auto-initialize (silent if already ready) */
         if (!gguf_parser_get_arch()) {
             if (!gguf_model_embedded()) {
-                console_printf("Error: No AI model available\n");
+                console_printf(" Error: No AI model available.\n\n");
                 return;
             }
             size_t gguf_size = 0;
             const uint8_t *gguf_data = get_embedded_gguf_model(&gguf_size);
             if (!gguf_data || gguf_size == 0) {
-                console_printf("Error: Failed to get model data\n");
+                console_printf(" Error: Failed to access model.\n\n");
                 return;
             }
-            console_printf("Loading model...\n");
+            console_printf(" Loading model...");
+            console_flush();
             if (gguf_load_model((void *)gguf_data, gguf_size) < 0) {
-                console_printf("Error: Model load failed\n");
+                console_printf(" failed.\n\n");
                 return;
             }
+            console_printf(" OK\n");
         }
 
-        /* Auto-initialize: BPE tokenizer */
         if (!bpe_tokenizer_is_initialized()) {
             bpe_tokenizer_init();
         }
 
-        /* Auto-initialize: Inference engine */
         if (!streaming_inference_is_ready()) {
+            console_printf(" Initializing...");
+            console_flush();
             if (streaming_inference_init(false) != 0) {
-                console_printf("Error: Inference init failed\n");
+                console_printf(" failed.\n\n");
                 return;
             }
+            console_printf(" OK\n");
         }
 
         /* Tokenize */
@@ -171,34 +507,43 @@ void process_command(const char *command)
             prompt_len = bpe_tokenizer_encode(prompt, prompt_tokens, 256, false, false);
         }
         if (prompt_len <= 0) {
-            prompt_tokens[0] = 1; /* BOS fallback */
+            prompt_tokens[0] = 1;
             prompt_len = 1;
         }
 
-        console_printf("\nYou: %s\n", prompt);
-        console_printf("AI: ");
+        /* Generate with timing */
+        uint64_t start = chat_get_cycles();
 
-        /* Generate */
         int output_tokens[128];
         int generated = streaming_inference_generate(prompt_tokens, prompt_len, output_tokens, 50);
 
+        uint64_t end = chat_get_cycles();
+        uint64_t elapsed_us = chat_cycles_to_us(end - start);
+
+        /* Display response */
+        console_printf("\n");
         if (generated > 0) {
-            /* Decode tokens to readable text */
             char decoded[512];
             int len = bpe_tokenizer_decode(output_tokens, generated, decoded, sizeof(decoded));
             if (len > 0) {
-                console_printf("%s", decoded);
+                console_printf("%s\n", decoded);
             } else {
-                /* Fallback to raw tokens */
                 for (int i = 0; i < generated; i++) {
                     const char *tok = streaming_inference_get_token(output_tokens[i]);
                     if (tok) console_printf("%s", tok);
                 }
+                console_printf("\n");
             }
-            console_printf("\n\n");
         } else {
-            console_printf("(no response)\n\n");
+            console_printf("(no response)\n");
         }
+        console_printf("\n");
+
+        /* Update perf stats */
+        g_chat_perf.last_prompt_tokens = prompt_len;
+        g_chat_perf.last_generated_tokens = generated;
+        g_chat_perf.last_total_us = elapsed_us;
+        g_chat_perf.valid = true;
     } else if (strncmp(command, "ai ", 3) == 0) {
         /* TinyStories interactive inference */
         const char *prompt = command + 3;
@@ -1712,8 +2057,33 @@ void process_command(const char *command)
     } else if (strcmp(command, "reboot") == 0) {
         console_printf("Rebooting...\n");
         arch_reboot();
+    } else if (strcmp(command, "clear") == 0 || strcmp(command, "cls") == 0) {
+        /* Clear screen using ANSI escape codes (works with serial terminals) */
+        console_printf("\033[2J\033[H");
+    } else if (strcmp(command, "version") == 0 || strcmp(command, "ver") == 0) {
+        extern const char* kernel_version;
+        extern const char* kernel_build;
+        console_printf("\n");
+        console_printf(" EMBODIOS %s\n", kernel_version);
+        console_printf(" Build: %s\n\n", kernel_build);
     } else {
-        console_printf("Unknown command: %s\n", command);
+        /* Unknown command - provide helpful suggestions */
+        console_printf("\n Unknown command: '%s'\n", command);
+
+        /* Check for common typos/similar commands */
+        if (strncmp(command, "ch", 2) == 0) {
+            console_printf(" Did you mean: chat <message>?\n");
+        } else if (strncmp(command, "he", 2) == 0) {
+            console_printf(" Did you mean: help?\n");
+        } else if (strncmp(command, "ben", 3) == 0) {
+            console_printf(" Did you mean: benchmark?\n");
+        } else if (strncmp(command, "mod", 3) == 0) {
+            console_printf(" Did you mean: models or model?\n");
+        } else if (strncmp(command, "mem", 3) == 0 && strcmp(command, "mem") != 0) {
+            console_printf(" Did you mean: mem or memtest?\n");
+        }
+
+        console_printf(" Type 'help' for available commands.\n\n");
     }
 }