Introduced minifb instead of SDL2 for now. (so simple to integrate..) Progress with cdrom Added some misc docs for preservation's sake - ultimecia

commit 65dcc262b854e3dad76cfcce9d8a9c041b43f888
parent f94ef545e4e4ef7002788d3bdab7e4c3c3f90cbe
Author: noone <vazkats@gmail.com>
Date:   Wed,  1 Oct 2025 19:03:29 +0300

Introduced minifb instead of SDL2 for now. (so simple to integrate..)
Progress with cdrom
Added some misc docs for preservation's sake

Diffstat:
A lib/include/MiniFB.h  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lib/include/MiniFB_cpp.h  | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lib/include/MiniFB_enums.h  | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lib/include/MiniFB_ios.h  | 7 +++++++
A lib/libminifb.a  | 0 
M makefile  | 24 +++++++++++-------------
A misc/cdrom_exploration.txt  | 274 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A misc/gpu.txt  | 1250 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A misc/gte.txt  | 1000 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A misc/psx_documentation_project.pdf  | 0 
A misc/spu.txt  | 526 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A misc/system.txt  | 865 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A ntani.txt  | 4 ++++
M src/bios.c  | 10 +++++-----
M src/cdrom.c  | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M src/cdrom.h  | 40 ++++++++++++++++++++++++++++++----------
M src/interconnect.c  | 12 ++++++------
M src/interconnect.h  | 2 ++
M src/irq.c  | 9 ++++++---
M src/main.c  | 63 +++++++++++++++++++++++++++++++++++++++------------------------
M src/sr.c  | 158 ++++++++++++++++++++++++++++++++++++++++---------------------------------------
M src/sr.h  | 3 ++-
A src/time.c  | 0 
A src/time.h  | 0 
M test.cc  | 1 -

25 files changed, 4667 insertions(+), 177 deletions(-)
diff --git a/lib/include/MiniFB.h b/lib/include/MiniFB.h
@@ -0,0 +1,107 @@
+#ifndef _MINIFB_H_
+#define _MINIFB_H_
+
+#include "MiniFB_enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef __ANDROID__
+#define MFB_RGB(r, g, b)        (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b)
+#define MFB_ARGB(a, r, g, b)    (((uint32_t) a) << 24) | (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b)
+#else
+    #ifdef HOST_WORDS_BIGENDIAN
+    #define MFB_RGB(r, g, b)     (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b)
+    #define MFB_ARGB(a, r, g, b) (((uint32_t) a) << 24) | (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b)
+    #else
+    #define MFB_ARGB(r, g, b)    (((uint32_t) a) << 24) | (((uint32_t) b) << 16) | (((uint32_t) g) << 8) | ((uint32_t) r)
+    #define MFB_RGB(r, g, b)     (((uint32_t) b) << 16) | (((uint32_t) g) << 8) | ((uint32_t) r)
+    #endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Create a window that is used to display the buffer sent into the mfb_update function, returns 0 if fails
+struct mfb_window * mfb_open(const char *title, unsigned width, unsigned height);
+struct mfb_window * mfb_open_ex(const char *title, unsigned width, unsigned height, unsigned flags);
+
+// Update the display
+// Input buffer is assumed to be a 32-bit buffer of the size given in the open call
+// Will return a negative status if something went wrong or the user want to exit
+// Also updates the window events
+mfb_update_state    mfb_update(struct mfb_window *window, void *buffer);
+
+mfb_update_state    mfb_update_ex(struct mfb_window *window, void *buffer, unsigned width, unsigned height);
+
+// Only updates the window events
+mfb_update_state    mfb_update_events(struct mfb_window *window);
+
+// Close the window
+void                mfb_close(struct mfb_window *window);
+
+// Set user data
+void                mfb_set_user_data(struct mfb_window *window, void *user_data);
+void *              mfb_get_user_data(struct mfb_window *window);
+
+// Set viewport (useful when resize)
+bool                mfb_set_viewport(struct mfb_window *window, unsigned offset_x, unsigned offset_y, unsigned width, unsigned height);
+// Let mfb to calculate the best fit from your framebuffer original size
+bool                mfb_set_viewport_best_fit(struct mfb_window *window, unsigned old_width, unsigned old_height);
+
+// DPI
+// [Deprecated]: Probably a better name will be mfb_get_monitor_scale
+void                mfb_get_monitor_dpi(struct mfb_window *window, float *dpi_x, float *dpi_y);
+// Use this instead
+void                mfb_get_monitor_scale(struct mfb_window *window, float *scale_x, float *scale_y);
+
+// Callbacks
+void                mfb_set_active_callback(struct mfb_window *window, mfb_active_func callback);
+void                mfb_set_resize_callback(struct mfb_window *window, mfb_resize_func callback);
+void                mfb_set_close_callback(struct mfb_window* window, mfb_close_func callback);
+void                mfb_set_keyboard_callback(struct mfb_window *window, mfb_keyboard_func callback);
+void                mfb_set_char_input_callback(struct mfb_window *window, mfb_char_input_func callback);
+void                mfb_set_mouse_button_callback(struct mfb_window *window, mfb_mouse_button_func callback);
+void                mfb_set_mouse_move_callback(struct mfb_window *window, mfb_mouse_move_func callback);
+void                mfb_set_mouse_scroll_callback(struct mfb_window *window, mfb_mouse_scroll_func callback);
+
+// Getters
+const char *        mfb_get_key_name(mfb_key key);
+
+bool                mfb_is_window_active(struct mfb_window *window);
+unsigned            mfb_get_window_width(struct mfb_window *window);
+unsigned            mfb_get_window_height(struct mfb_window *window);
+int                 mfb_get_mouse_x(struct mfb_window *window);             // Last mouse pos X
+int                 mfb_get_mouse_y(struct mfb_window *window);             // Last mouse pos Y
+float               mfb_get_mouse_scroll_x(struct mfb_window *window);      // Mouse wheel X as a sum. When you call this function it resets.
+float               mfb_get_mouse_scroll_y(struct mfb_window *window);      // Mouse wheel Y as a sum. When you call this function it resets.
+const uint8_t *     mfb_get_mouse_button_buffer(struct mfb_window *window); // One byte for every button. Press (1), Release 0. (up to 8 buttons)
+const uint8_t *     mfb_get_key_buffer(struct mfb_window *window);          // One byte for every key. Press (1), Release 0.
+
+// FPS
+void                mfb_set_target_fps(uint32_t fps);
+unsigned            mfb_get_target_fps(void);
+bool                mfb_wait_sync(struct mfb_window *window);
+
+// Timer
+struct mfb_timer *  mfb_timer_create(void);
+void                mfb_timer_destroy(struct mfb_timer *tmr);
+void                mfb_timer_reset(struct mfb_timer *tmr);
+double              mfb_timer_now(struct mfb_timer *tmr);
+double              mfb_timer_delta(struct mfb_timer *tmr);
+double              mfb_timer_get_frequency(void);
+double              mfb_timer_get_resolution(void);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef __cplusplus
+}
+
+#if !defined(MINIFB_AVOID_CPP_HEADERS)
+    #include "MiniFB_cpp.h"
+#endif
+
+#endif
+
+#endif
diff --git a/lib/include/MiniFB_cpp.h b/lib/include/MiniFB_cpp.h
@@ -0,0 +1,186 @@
+#pragma once
+
+#if defined(__cplusplus)
+
+#include <functional>
+#include "MiniFB.h"
+
+//-------------------------------------
+// To be able to distinguish these C++ functions, using std::function, from C functions, using raw function pointers, we need to reverse params order.
+//
+// Note that FROM the compiler point of view
+//   mfb_set_XXX_callback(window, &my_c_func)
+// and
+//   mfb_set_XXX_callback(window, [](...) {})
+// have the same parameters.
+//-------------------------------------
+void mfb_set_active_callback      (std::function<void(struct mfb_window *, bool)>                                func, struct mfb_window *window);
+void mfb_set_resize_callback      (std::function<void(struct mfb_window *, int, int)>                            func, struct mfb_window *window);
+void mfb_set_close_callback       (std::function<bool(struct mfb_window *)>                                      func, struct mfb_window *window);
+void mfb_set_keyboard_callback    (std::function<void(struct mfb_window *, mfb_key, mfb_key_mod, bool)>          func, struct mfb_window *window);
+void mfb_set_char_input_callback  (std::function<void(struct mfb_window *, unsigned int)>                        func, struct mfb_window *window);
+void mfb_set_mouse_button_callback(std::function<void(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)> func, struct mfb_window *window);
+void mfb_set_mouse_move_callback  (std::function<void(struct mfb_window *, int, int)>                            func, struct mfb_window *window);
+void mfb_set_mouse_scroll_callback(std::function<void(struct mfb_window *, mfb_key_mod, float, float)>           func, struct mfb_window *window);
+//-------------------------------------
+
+//-------------------------------------
+template <class T>
+void mfb_set_active_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, bool));
+
+template <class T>
+void mfb_set_resize_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int));
+
+template <class T>
+void mfb_set_keyboard_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key, mfb_key_mod, bool));
+
+template <class T>
+void mfb_set_char_input_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, unsigned int));
+
+template <class T>
+void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool));
+
+template <class T>
+void mfb_set_mouse_move_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int));
+
+template <class T>
+void mfb_set_mouse_scroll_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key_mod, float, float));
+//-------------------------------------
+
+//-------------------------------------
+// To avoid clumsy hands
+//-------------------------------------
+class mfb_stub {
+    mfb_stub() : m_window(0x0) {}
+
+    friend void mfb_set_active_callback      (std::function<void(struct mfb_window *window, bool)>                          func, struct mfb_window *window);
+    friend void mfb_set_resize_callback      (std::function<void(struct mfb_window *, int, int)>                            func, struct mfb_window *window);
+    friend void mfb_set_close_callback       (std::function<bool(struct mfb_window *)>                                      func, struct mfb_window *window);
+    friend void mfb_set_keyboard_callback    (std::function<void(struct mfb_window *, mfb_key, mfb_key_mod, bool)>          func, struct mfb_window *window);
+    friend void mfb_set_char_input_callback  (std::function<void(struct mfb_window *, unsigned int)>                        func, struct mfb_window *window);
+    friend void mfb_set_mouse_button_callback(std::function<void(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)> func, struct mfb_window *window);
+    friend void mfb_set_mouse_move_callback  (std::function<void(struct mfb_window *, int, int)>                            func, struct mfb_window *window);
+    friend void mfb_set_mouse_scroll_callback(std::function<void(struct mfb_window *, mfb_key_mod, float, float)>           func, struct mfb_window *window);
+
+    template <class T>
+    friend void mfb_set_active_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, bool));
+    template <class T>
+    friend void mfb_set_resize_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int));
+    template <class T>
+    friend void mfb_set_close_callback(struct mfb_window *window, T *obj, bool (T::*method)(struct mfb_window *));
+    template <class T>
+    friend void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool));
+    template <class T>
+    friend void mfb_set_keyboard_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key, mfb_key_mod, bool));
+    template <class T>
+    friend void mfb_set_char_input_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, unsigned int));
+    template <class T>
+    friend void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool));
+    template <class T>
+    friend void mfb_set_mouse_move_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int));
+    template <class T>
+    friend void mfb_set_mouse_scroll_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key_mod, float, float));
+
+    static mfb_stub *GetInstance(struct mfb_window *window);
+
+    static void active_stub(struct mfb_window *window, bool isActive);
+    static void resize_stub(struct mfb_window *window, int width, int height);
+    static bool close_stub(struct mfb_window *window);
+    static void keyboard_stub(struct mfb_window *window, mfb_key key, mfb_key_mod mod, bool isPressed);
+    static void char_input_stub(struct mfb_window *window, unsigned int);
+    static void mouse_btn_stub(struct mfb_window *window, mfb_mouse_button button, mfb_key_mod mod, bool isPressed);
+    static void mouse_move_stub(struct mfb_window *window, int x, int y);
+    static void scroll_stub(struct mfb_window *window, mfb_key_mod mod, float deltaX, float deltaY);
+
+    struct mfb_window                                                           *m_window;
+    std::function<void(struct mfb_window *window, bool)>                        m_active;
+    std::function<void(struct mfb_window *window, int, int)>                    m_resize;
+    std::function<bool(struct mfb_window *window)>                              m_close;
+    std::function<void(struct mfb_window *window, mfb_key, mfb_key_mod, bool)>  m_keyboard;
+    std::function<void(struct mfb_window *window, unsigned int)>                m_char_input;
+    std::function<void(struct mfb_window *window, mfb_mouse_button, mfb_key_mod, bool)>   m_mouse_btn;
+    std::function<void(struct mfb_window *window, int, int)>                    m_mouse_move;
+    std::function<void(struct mfb_window *window, mfb_key_mod, float, float)>   m_scroll;
+};
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_active_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, bool)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_active = std::bind(method, obj, _1, _2);
+    mfb_set_active_callback(window, mfb_stub::active_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_resize_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, int, int)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_resize = std::bind(method, obj, _1, _2, _3);
+    mfb_set_resize_callback(window, mfb_stub::resize_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_close_callback(struct mfb_window *window, T *obj, bool (T::*method)(struct mfb_window *window)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_close = std::bind(method, obj, _1);
+    mfb_set_close_callback(window, mfb_stub::close_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_keyboard_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, mfb_key, mfb_key_mod, bool)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_keyboard = std::bind(method, obj, _1, _2, _3, _4);
+    mfb_set_keyboard_callback(window, mfb_stub::keyboard_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_char_input_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, unsigned int)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_char_input = std::bind(method, obj, _1, _2);
+    mfb_set_char_input_callback(window, mfb_stub::char_input_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, mfb_mouse_button, mfb_key_mod, bool)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_mouse_btn = std::bind(method, obj, _1, _2, _3, _4);
+    mfb_set_mouse_button_callback(window, mfb_stub::mouse_btn_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_mouse_move_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, int, int)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_mouse_move = std::bind(method, obj, _1, _2, _3);
+    mfb_set_mouse_move_callback(window, mfb_stub::mouse_move_stub);
+}
+
+//-------------------------------------
+template <class T>
+inline void mfb_set_mouse_scroll_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, mfb_key_mod, float, float)) {
+    using namespace std::placeholders;
+
+    mfb_stub    *stub = mfb_stub::GetInstance(window);
+    stub->m_scroll = std::bind(method, obj, _1, _2, _3, _4);
+    mfb_set_mouse_scroll_callback(window, mfb_stub::scroll_stub);
+}
+
+#endif
diff --git a/lib/include/MiniFB_enums.h b/lib/include/MiniFB_enums.h
@@ -0,0 +1,186 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+// Enums
+typedef enum {
+    STATE_OK             =  0,
+    STATE_EXIT           = -1,
+    STATE_INVALID_WINDOW = -2,
+    STATE_INVALID_BUFFER = -3,
+    STATE_INTERNAL_ERROR = -4,
+} mfb_update_state;
+
+typedef enum {
+    MOUSE_BTN_0, // No mouse button
+    MOUSE_BTN_1,
+    MOUSE_BTN_2,
+    MOUSE_BTN_3,
+    MOUSE_BTN_4,
+    MOUSE_BTN_5,
+    MOUSE_BTN_6,
+    MOUSE_BTN_7,
+} mfb_mouse_button;
+#define MOUSE_LEFT   MOUSE_BTN_1
+#define MOUSE_RIGHT  MOUSE_BTN_2
+#define MOUSE_MIDDLE MOUSE_BTN_3
+
+typedef enum {
+    KB_KEY_UNKNOWN       = -1,
+
+    KB_KEY_SPACE         = 32,
+    KB_KEY_APOSTROPHE    = 39,
+    KB_KEY_COMMA         = 44,
+    KB_KEY_MINUS         = 45,
+    KB_KEY_PERIOD        = 46,
+    KB_KEY_SLASH         = 47,
+    KB_KEY_0             = 48,
+    KB_KEY_1             = 49,
+    KB_KEY_2             = 50,
+    KB_KEY_3             = 51,
+    KB_KEY_4             = 52,
+    KB_KEY_5             = 53,
+    KB_KEY_6             = 54,
+    KB_KEY_7             = 55,
+    KB_KEY_8             = 56,
+    KB_KEY_9             = 57,
+    KB_KEY_SEMICOLON     = 59,
+    KB_KEY_EQUAL         = 61,
+    KB_KEY_A             = 65,
+    KB_KEY_B             = 66,
+    KB_KEY_C             = 67,
+    KB_KEY_D             = 68,
+    KB_KEY_E             = 69,
+    KB_KEY_F             = 70,
+    KB_KEY_G             = 71,
+    KB_KEY_H             = 72,
+    KB_KEY_I             = 73,
+    KB_KEY_J             = 74,
+    KB_KEY_K             = 75,
+    KB_KEY_L             = 76,
+    KB_KEY_M             = 77,
+    KB_KEY_N             = 78,
+    KB_KEY_O             = 79,
+    KB_KEY_P             = 80,
+    KB_KEY_Q             = 81,
+    KB_KEY_R             = 82,
+    KB_KEY_S             = 83,
+    KB_KEY_T             = 84,
+    KB_KEY_U             = 85,
+    KB_KEY_V             = 86,
+    KB_KEY_W             = 87,
+    KB_KEY_X             = 88,
+    KB_KEY_Y             = 89,
+    KB_KEY_Z             = 90,
+    KB_KEY_LEFT_BRACKET  = 91,
+    KB_KEY_BACKSLASH     = 92,
+    KB_KEY_RIGHT_BRACKET = 93,
+    KB_KEY_GRAVE_ACCENT  = 96,
+    KB_KEY_WORLD_1       = 161,
+    KB_KEY_WORLD_2       = 162,
+
+    KB_KEY_ESCAPE        = 256,
+    KB_KEY_ENTER         = 257,
+    KB_KEY_TAB           = 258,
+    KB_KEY_BACKSPACE     = 259,
+    KB_KEY_INSERT        = 260,
+    KB_KEY_DELETE        = 261,
+    KB_KEY_RIGHT         = 262,
+    KB_KEY_LEFT          = 263,
+    KB_KEY_DOWN          = 264,
+    KB_KEY_UP            = 265,
+    KB_KEY_PAGE_UP       = 266,
+    KB_KEY_PAGE_DOWN     = 267,
+    KB_KEY_HOME          = 268,
+    KB_KEY_END           = 269,
+    KB_KEY_CAPS_LOCK     = 280,
+    KB_KEY_SCROLL_LOCK   = 281,
+    KB_KEY_NUM_LOCK      = 282,
+    KB_KEY_PRINT_SCREEN  = 283,
+    KB_KEY_PAUSE         = 284,
+    KB_KEY_F1            = 290,
+    KB_KEY_F2            = 291,
+    KB_KEY_F3            = 292,
+    KB_KEY_F4            = 293,
+    KB_KEY_F5            = 294,
+    KB_KEY_F6            = 295,
+    KB_KEY_F7            = 296,
+    KB_KEY_F8            = 297,
+    KB_KEY_F9            = 298,
+    KB_KEY_F10           = 299,
+    KB_KEY_F11           = 300,
+    KB_KEY_F12           = 301,
+    KB_KEY_F13           = 302,
+    KB_KEY_F14           = 303,
+    KB_KEY_F15           = 304,
+    KB_KEY_F16           = 305,
+    KB_KEY_F17           = 306,
+    KB_KEY_F18           = 307,
+    KB_KEY_F19           = 308,
+    KB_KEY_F20           = 309,
+    KB_KEY_F21           = 310,
+    KB_KEY_F22           = 311,
+    KB_KEY_F23           = 312,
+    KB_KEY_F24           = 313,
+    KB_KEY_F25           = 314,
+    KB_KEY_KP_0          = 320,
+    KB_KEY_KP_1          = 321,
+    KB_KEY_KP_2          = 322,
+    KB_KEY_KP_3          = 323,
+    KB_KEY_KP_4          = 324,
+    KB_KEY_KP_5          = 325,
+    KB_KEY_KP_6          = 326,
+    KB_KEY_KP_7          = 327,
+    KB_KEY_KP_8          = 328,
+    KB_KEY_KP_9          = 329,
+    KB_KEY_KP_DECIMAL    = 330,
+    KB_KEY_KP_DIVIDE     = 331,
+    KB_KEY_KP_MULTIPLY   = 332,
+    KB_KEY_KP_SUBTRACT   = 333,
+    KB_KEY_KP_ADD        = 334,
+    KB_KEY_KP_ENTER      = 335,
+    KB_KEY_KP_EQUAL      = 336,
+    KB_KEY_LEFT_SHIFT    = 340,
+    KB_KEY_LEFT_CONTROL  = 341,
+    KB_KEY_LEFT_ALT      = 342,
+    KB_KEY_LEFT_SUPER    = 343,
+    KB_KEY_RIGHT_SHIFT   = 344,
+    KB_KEY_RIGHT_CONTROL = 345,
+    KB_KEY_RIGHT_ALT     = 346,
+    KB_KEY_RIGHT_SUPER   = 347,
+    KB_KEY_MENU          = 348
+} mfb_key;
+#define KB_KEY_LAST     KB_KEY_MENU
+
+typedef enum {
+    KB_MOD_SHIFT        = 0x0001,
+    KB_MOD_CONTROL      = 0x0002,
+    KB_MOD_ALT          = 0x0004,
+    KB_MOD_SUPER        = 0x0008,
+    KB_MOD_CAPS_LOCK    = 0x0010,
+    KB_MOD_NUM_LOCK     = 0x0020
+} mfb_key_mod;
+
+typedef enum {
+    WF_RESIZABLE          = 0x01,
+    WF_FULLSCREEN         = 0x02,
+    WF_FULLSCREEN_DESKTOP = 0x04,
+    WF_BORDERLESS         = 0x08,
+    WF_ALWAYS_ON_TOP      = 0x10,
+} mfb_window_flags;
+
+// Opaque pointer
+struct mfb_window;
+struct mfb_timer;
+
+// Event callbacks
+typedef void(*mfb_active_func)(struct mfb_window *window, bool isActive);
+typedef void(*mfb_resize_func)(struct mfb_window *window, int width, int height);
+typedef bool(*mfb_close_func)(struct mfb_window* window);
+typedef void(*mfb_keyboard_func)(struct mfb_window *window, mfb_key key, mfb_key_mod mod, bool isPressed);
+typedef void(*mfb_char_input_func)(struct mfb_window *window, unsigned int code);
+typedef void(*mfb_mouse_button_func)(struct mfb_window *window, mfb_mouse_button button, mfb_key_mod mod, bool isPressed);
+typedef void(*mfb_mouse_move_func)(struct mfb_window *window, int x, int y);
+typedef void(*mfb_mouse_scroll_func)(struct mfb_window *window, mfb_key_mod mod, float deltaX, float deltaY);
+
diff --git a/lib/include/MiniFB_ios.h b/lib/include/MiniFB_ios.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "MiniFB_enums.h"
+
+void user_implemented_init(struct mfb_window *window);
+
+void user_implemented_update(struct mfb_window *window);
diff --git a/lib/libminifb.a b/lib/libminifb.a
Binary files differ.
diff --git a/makefile b/makefile
@@ -1,28 +1,26 @@
-#Makefile for Ultimecia PSX Emulator
+# Ultimecia PSX Emulator Makefile
 
-CC      := cc
-CFLAGS  := -Wall -Wpedantic -std=c99 -g -O3 -I/opt/homebrew/include/ -I/opt/homebrew/include/lua5.4
-LDFLAGS := -L/opt/homebrew/lib/
-LIBS    := -lSDL2 -llua
+CC      := clang
+CFLAGS  := -Wall -Wpedantic -g -Ilib/include $(shell /opt/homebrew/bin/pkg-config --cflags sdl2 lua)
+LDFLAGS := $(shell /opt/homebrew/bin/pkg-config --libs sdl2 lua) -framework Cocoa -framework Metal -framework MetalKit
+LIBS    := lib/libminifb.a
 
 SRC     := $(wildcard src/*.c)
 OBJ     := $(SRC:.c=.o)
 BIN     := bin/ultimecia
-JOBS    := 10
 
 all: $(BIN)
 
 $(BIN): $(OBJ)
-		@mkdir -p bin
-			$(CC) $(OBJ) -o $@ $(LDFLAGS) $(LIBS)
+	@mkdir -p bin
+	$(CC) $(OBJ) -o $@ $(LDFLAGS) $(LIBS)
 
-# Pattern rule for compiling .c to .o
+# Compile .c to .o
 %.o: %.c
-		$(CC) $(CFLAGS) -c $< -o $@
+	$(CC) $(CFLAGS) -c $< -o $@
 
-# Clean
 clean:
-		rm -f src/*.o
-			rm -f $(BIN)
+	rm -f src/*.o
+	rm -f $(BIN)
 
 .PHONY: all clean
diff --git a/misc/cdrom_exploration.txt b/misc/cdrom_exploration.txt
@@ -0,0 +1,274 @@
+--------------------------------------------------------------------------
+Quick CDrom explanation...                                     2000/doomed
+
+There's a *LOT* missing here, and the other half might be incorrect, so
+i won't take any responsibility for strange stuff happening. It should
+give you some pointers in the right direction for your own CD explorations
+though. More might follow at some later time..
+--------------------------------------------------------------------------
+CDREG0 = $1f801800
+CDREG1 = $1f801801
+CDREG2 = $1f801802
+CDREG3 = $1f801803
+--------------------------------------------------------------------------
+CDREG0   write    : 0 - to send a command
+                    1 - to get the result
+         read     : I/O status?
+                    bit 0- 0 REG1 command send
+                         - 1 REG1 data read
+                    bit 1- 0 data transfer finished
+                           1 data transfer ready/in progress
+                    bit 7- 1 command being processed.
+
+CDREG1   write    : command
+         read     : results
+
+CDREG2   write    : send arguments
+         write    : 7 = flush arg buffer?
+
+CDREG3   write    : 7 = flush irq
+         read     : hi nibble: ?
+                    low nibble: interrupt status
+--------------------------------------------------------------------------
+Modes for SetMode:
+M_Speed        bit 7      0: normal speed  1: double speed
+M_Strsnd       bit 6      0: ADPCM off     1: ADPCM on
+M_Size         bit 5      0: 2048 byte     1: 2340 byte
+M_Size2        bit 4      0: -             1: 2328 byte
+M_SF           bit 3      0: Channel off   1: Channel on
+M_Report       bit 2      0: Report off    1: Report on
+M_AutoPause    bit 1      0: AutoPause off 1: AutoPause on
+M_CDDA         bit 0      0: CD-DA off     1: CD-DA on
+
+These modes can be set using the setmode command. 
+--------------------------------------------------------------------------
+Status bits:
+Play       	bit 7      playing CD-DA
+Seek       	bit 6      seeking
+Read       	bit 5      reading data sectors
+ShellOpen  	bit 4      once shell open
+SeekError  	bit 3      seek error detected
+Standby    	bit 2      spindle motor rotating
+Error      	bit 1      command error detected
+
+These are the bit values for the status byte recieved from CD commands.
+--------------------------------------------------------------------------
+Interrupt values:
+NoIntr      $00        No interrupt
+DataReady   $01        Data Ready
+Acknowledge $02        Command Complete
+Complete    $03        Acknowledge
+DataEnd     $04        End of Data Detected
+DiskError   $05        Error Detected
+
+These are returned in the low nibble of CDREG3. First write a 1 to CDREG0
+before reading CDREG3. When a command is completed it returns 3.
+To acknowledge an irq value after you've handled it, write a 1 to CDREG0
+then a 7 to both CDREG2 and CDREG3. Another interrupt may be queued, so
+you should check CDREG3 again if 0 or if there's another interrupt to
+be handled.
+--------------------------------------------------------------------------
+Sync           $00         -                 status
+Nop            $01         -                 status
+Setloc         $02         min,sec,sector    status
+Play           $03    B    -                 status
+Forward        $04    B    -                 status
+Backward       $05    B    -                 status
+ReadN          $06    B    -                 status
+Standby        $07    B    -                 status
+Stop           $08    B    -                 status
+Pause          $09    B    -                 status
+Init           $0a         -                 status
+Mute           $0b         -                 status
+Demute         $0c         -                 status
+Setfilter      $0d         file,channel      status
+Setmode        $0e         mode              status
+Getparam       $0f         -                 status,mode,file?,chan?,?,?
+GetlocL        $10         -                 min,sec,sector,mode,file,channel
+GetlocP        $11         -                 track,index,min,sec,frame,amin,
+                                                 asec,aframe
+GetTN          $13         -                 status,first,total (BCD)
+GetTD          $14         track(BCD)        status,min,sec (BCD)
+SeekL          $15    B    *                 status
+SeekP          $16    B    *                 status
+Test           $19         #                 depends on parameter
+ID             $1A    B    -                 success,flag1,flag2,00
+                                                 4 letters of ID (SCEx)
+ReadS          $1B    B    -                 status
+Reset          $1C         -                 status
+ReadTOC        $1E    B?   -                 status
+
+* These commands' targets are set using Setloc.
+# Command 19 is really a portal to another set of commands.
+
+B means blocking.  These commands return an immediate result saying the
+command was started, but you need to wait for an IRQ in order to get
+real results.
+
+Command descriptions:
+00 Sync:       Command does not succeed until all other commands complete.
+               This can be used for synchronization - hence the name.
+01 Nop:        Does nothing; use this if you just want the status.
+02 Setloc:     This command, with its parameters, sets the target for
+               commands with a * for their parameter list.
+03 Play:       Plays audio sectors from the last point seeked.  This is
+               almost identical to CdlReadS, believe it or not.  The main
+               difference is that this does not trigger a completed read
+               IRQ.  CdlPlay may be used on data sectors.  However, all
+               sectors from data tracks are treated as 00, so no sound is
+               played.  As CdlPlay is reading, the audio data appears in
+               the sector buffer, but is not reliable.  Game Shark
+               "enhancement CDs" for the 2.x and 3.x versions used this
+               to get around the PSX copy protection.
+04 Forward:    Seek to next track ?
+05 Backward:   Seek to beginning of current track, or previous track if
+               early in current track (like a CD player's back button)
+06 ReadN:      Read with retry.  Each sector causes an IRQ (type 1) if
+               ModeRept is on (I think).  ReadN and ReadS cause errors if
+               you're trying to read a non-PSX CD or audio CD without a
+               mod chip.
+07 Standby:    CD-ROM aborts all reads and playing, but continues
+               spinning.  CD-ROM does not attempt to keep its place.
+08 Stop:       Stops motor.  Official way to restart is 0A, but almost
+               any command will restart it.
+09 Pause:      Like Standby, except the point is to maintain the current
+               location within reasonable error.
+0A Init:       Multiple effects at once.  Setmode = 00, Standby, abort
+               all commands.
+0B Mute:       Turn off CDDA stream to SPU.
+0C Demute:     Turn on CDDA streaming to SPU.
+0D Setfilter:  Automatic ADPCM (CD-ROM XA) filter ignores sectors except
+               those which have the same channel and file (parameters)
+               in their subheader area.  This is the mechanism used to
+               select which of multiple songs in a single XA to play.
+               Setfilter does not affect actual reading (sector reads
+               still occur for all sectors).
+0E Setmode:    Sets parameters such as read mode and spin speed.  See
+               chart above the command list.
+0F Getparam:   ??? returns status, mode, file, channel, ?, ?
+10 GetlocL:    Retrieves first 6 (8?) bytes of last read sector (header)
+               This is used to know where the sector came from, but is
+               generally pointless in 2340 byte read mode.  All results
+               are in BCD ($12 is considered track twelve, not eighteen)
+               Command may execute concurrently with a read or play
+               (GetlocL returns results immediately).
+11 GetlocP:    Retrieves 8 of 12 bytes of sub-Q data for the last-read
+               sector.  Same purpose as GetlocL, but more powerful, and
+               works while playing audio.  All results are in BCD.
+                   track:  track number ($AA for lead-out area)
+                   index:  index number (INDEX lines in CUE sheets)
+                   min:    minute number within track
+                   sec:    second number within track
+                   frame:  sector number within "sec" (0 to 74)
+                   amin:   minute number on entire disk
+                   asec:   second number on entire disk
+                   aframe: sector number within "asec" (0 to 74)
+13 GetTN:      Get first track number and number of tracks in the TOC.
+14 GetTD:      Gets start of specified track (does it return sector??)
+15 SeekL:      Seek to Setloc's location in data mode (can only seek to
+               data sectors, but is accurate to the sector)
+16 SeekP:      Seek to Setloc's location in audio mode (can seek to
+               any sector, but is only accurate to the second)
+19 Test:       This function has many subcommands that are completely
+               different.
+1A ID:         Returns copy protection status.  StatError for invalid
+               data CD, StatStandby for valid PSX CD or audio CD.  The
+               following bits I'm unsure about, but I think the 3rd
+               byte has $80 bit for "CD denied" and $10 bit for
+               "import".  $80 = copy, $90 = denied import, $10 =
+               accepted import (Yaroze only).  The 5th through 8th
+               bytes are the SCEx ASCII string from the CD. 
+1B ReadS:      Read without automatic retry.
+1C Reset:      Same as opening and closing the drive door.
+1E ReadTOC:    Reread the Table of Contents without reset. 
+
+-----------------------------------------------------------------------
+--------------------------------------------------------------------------
+To send a command:
+
+- First send any arguments by writing 0 to CDREG0, then all arguments
+  sequentially to CDREG2
+
+- Then write 0 to CDREG0, and the command to CDREG1.
+
+To wait for a command to complete:
+
+- Wait until a CDrom irq occurs (bit 3 of the interrupt regs) The cause
+  of the cdrom irq is in the low nibble of CDREG3. This is usually 3
+  on a succesful comletion. Failure to complete the command will result
+  in a 5. If you don't wish to use irq's you can just check for the
+  low nibble of cdreg3 to become something other than 0, but make sure
+  it doesn't get cleared in any irq setup by the bios or some such.
+
+To Get the results
+
+- Write a 1 to CDREG0, then read CDREG0, If bit 5 is set, read a return
+  value from CDREG1, then read CDREG0 again repeat until bit 5 goes low.
+
+To Clear the irq
+
+- After command completion the irq cause should be cleared, do this by
+  writing a 1 to CDREG0 then 7 to CDREG2 and CDREG3. My guess is that
+  the write to CDREG2 clears the arguments previously set from some
+  buffer.
+  Note that irq's are queued, and if you clear the current, another may
+  come up directly..
+--------------------------------------------------------------------------
+To init the CD:
+
+-Flush all irq's
+-CDREG0=0
+-CDREG3=0
+-Com_Delay=4901 ($1f801020)
+-Send 2 NOP's
+-Command $0a, no args.              (<- what's this??)
+-Demute
+--------------------------------------------------------------------------
+To set up the cd for audio playback, some weird init stuff needs to be
+done:
+
+CDREG0=2
+CDREG2=$80
+CDREG3=0
+CDREG0=3
+CDREG1=$80
+CDREG2=0
+CDREG3=$20
+
+Also don't forget to init the SPU. (CDvol and CD enable especially)
+--------------------------------------------------------------------------
+You should not send some commands while the CD is seeking. (ie. status
+returns with bit 6 set.) Thing is that the status only gets updated after
+a new command. I haven't tested this for other command, but for the
+play command ($03) you can just keep repeating the command and checking
+the status returned by that, for bit 6 to go low(and bit 7 to go high in
+this case) If you don't and try to do a getloc directly after the play
+command reports it's done, the cd will stop. (I guess the cd can't
+get it's current location while it's seeking, so the logic stops the seek
+to get an exact fix, but never restarts..)
+
+
+-----------------------------------------------------------------------
+19 subcommands.
+-----------------------------------------------------------------------
+
+For one reason or another, there is a counter that counts the number of
+SCEx strings received by the CD-ROM controller.
+
+Be aware that the results for these commands can exceed 8 bytes.
+
+04: Read SCEx counter (returned in 1st byte?)
+05: Reset SCEx counter.  This also sets 1A's SCEx response to
+    00 00 00 00, but doesn't appear to force a protection failure.
+20: Returns an ASCII string specifying where the CD-ROM firmware is
+    intended to be used ("for Japan", "for U/C").
+22: Returns a chip number inside the PSX in use.
+23: Returns another chip number.
+24: Returns yet another chip number.  Same as 22's on some PSXs.
+
+--------------------------------------------------------------------------
+3/nov/1999	Initial version
+3/feb/2000	Update. Big thanks to Barubary, who rewrote a large part.
+--------------------------------------------------------------------------
+psx.padua.org                www.padua.org                  doomed@c64.org
+--------------------------------------------------------------------------
diff --git a/misc/gpu.txt b/misc/gpu.txt
@@ -0,0 +1,1250 @@
+===========================================================================
+GPU information.
+===========================================================================
+About this document.
+---------------------------------------------------------------------------
+This document is a collection of all info on the GPU i could find and my
+own notes. Most of this is the result of experiment, so not all info might
+be correct. This document is most probably not complete, and not all
+capabilities and quirks of the GPU are documented. No responsibility is
+taken for anything that might occur using the information in this document.
+
+The K-communications text and the one by Nagra/Blackbag are the basis of
+this document.
+
+Notations and conventions
+When the format of data is given it's shown as a bitwise representation
+like this:
+
+pixel|                                               |
+bit  |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00|
+desc.|S |Blue          |Green         |Red           |
+
+The "pixel" row shows how large the data is in the frame buffer. Each mark
+one this line denotes the size of the data in frame buffer pixels, as that
+is the mininum size that kind be addressed.
+The bit row shows which bits of the data are used, and separators are used
+to show where the different elements of the data stop and start. MSB is on
+the left, LSB is on the right. Stuff like |0f-08| means bit $0f to bit $08.
+The desc. row shows the description of the different elements. With
+separators where the element starts and ends.
+
+--------------------------------------------------------------------------
+The Graphics Processing Unit (GPU) - overview.
+--------------------------------------------------------------------------
+The GPU is the unit responsible for the graphical output of the PSX. It
+handles display and drawing of all graphics. It has the control over an 1MB
+frame buffer and contains a 2Kb texture cache. It has a command and
+data port. It has a 64 byte command FIFO buffer, which can hold up to
+3 commands and is connected to a DMA channel for transfer of image data and
+linked command lists and a DMA channel for reverse clearing an OT.
+
+---------------------------------------------------------------------------
+The Frame Buffer.
+---------------------------------------------------------------------------
+The frame buffer is the memory which stores all grpahic data which the GPU
+can access and manipulate, while drawing and displaying an image . The
+memory is under the GPU and cannot be accessed by the CPU directly. It is
+operated solely by the GPU. The frame buffer has a size of 1 MB and is 
+treated as a space of 1024 pixels wide and 512 pixels high. Each "pixel"
+has the size of one word (16 bit). It is not treated linearly like usual
+memory, but is accessed through coordinates, with an upperleft corner of
+(0,0) and a lower right corner of (1023,511).
+
+When data is displayed from the frame buffer, a rectangular area is read
+from the specified coordinate within this memory. The size of this area can
+be chosen from several hardware defined types. Note that these hardware
+sizes are only valid when the X and Y stop/start registers are at their
+default values. This display area can be displayed in two color formats,
+being 15bit direct and 24bit direct. The data format of one pixel is as
+follows:
+
+15bitDirect display.
+
+pixel|                                               |
+bit  |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00|
+desc.|M |Blue          |Green         |Red           |
+
+This means each color has a value of 0-31. The MSB of a pixel (M) is used
+to mask the pixel.
+
+24bit Direct Display.
+
+The GPU can also be set to 24bit mode, in which case 3 bytes form one
+pixel, 1 byte for each color. Data in this mode is arranged as follows:
+
+pixel|0      |1      |2      |
+Bit  |F-8|7-0|F-8|7-0|F-8|7-0|
+desc.|G0 |R0 |R1 |B0 |B1 |G1 |
+
+Thus 2 display pixels are encoded in 3 frame buffer pixels. They are
+displayed as follows: [R0,G0,B0] [R1,G1,B1]
+
+---------------------------------------------------------------------------
+Primitives.
+---------------------------------------------------------------------------
+A basic firgure which the GPU can draw is called a primitive, and it can
+draw the following:
+
+* Polygon
+ The GPU can draw 3 point and 4 point polygons. Each point of the polygon
+ specifies a point in the frame buffer. The polygon can be gouroud shaded.
+ The correct order of vertices for 4 point polygons is as follows:
+
+ 1--2    Note: A 4 point polygon is processed internally as two 3 point
+ |  |    polygons.
+ 3--4    Note: When drawing a polygon the GPU will not draw the right
+         most and bottom edge. So a (0,0)-(32,32) rectangle will actually
+ be drawn as (0,0)-(31,31). Make sure adjoining polygons have the same
+ coordinates if you want them to touch eachother!. Haven't checked how this
+ works with 3 point polygons.
+ 
+* Polygon with texture
+A primitive of this type is the same as above, except that a texture is
+applied. Each vertex of the polygon maps to a point on a texture page in
+the frame buffer. The polygon can be gouroud shaded.
+
+Note: Because a 4 point polygon is processed internally as two 3 point
+      polygons, texture mapping is also done independently for both halfs.
+      This has some annoying consequences.
+
+* Rectangle
+A rectangle is defined by the location of the top left corner and its width
+and height. Width and height can be either free, 8*8 or 16*16. It's drawn
+much faster than a polygon, but gouroud shading is not possible.
+ 
+* Sprite
+A sprite is a textured rectangle, defined as a rectangle with coordinates
+on a texture page. Like the rectangle is drawn much faster than the polygon
+equivalent. No gouroud shading possible.
+
+Note: Even though the primitive is called a sprite, it has nothing in
+      common with the traditional sprite, other than that it's a rectangular
+piece of graphics. Unlike the psx sprite, the traditional sprite is NOT
+drawn to the bitmap, but gets sent to the screen instead of the actual
+graphics data at that location at display time.
+
+* Line
+A line is a straight line between 2 specified points. The line can be
+gouroud shaded. A special form is the polyline, for which an arbitrary
+number of points can be specified.
+
+* Dot
+The dot primitive draws one pixel at the specified coordinate and in the
+specified color. It is actually a special form of rectangle, with a size
+of 1*1.
+
+---------------------------------------------------------------------------
+Texture
+---------------------------------------------------------------------------
+A texture is an image put on a polygon or sprite. It is necessary to
+prepare the data beforehand in the frame buffer. This image is called a
+texture pattern. The texture pattern is located on a texture page which
+has a standard size and is located somewhere in the frame buffer, see
+below. The data of a texture can be stored in 3 different modes:
+
+* 15bitDirect mode.
+
+bit  |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00|
+desc.|S |Blue          |Green         |Red           |
+
+This means each color has a value of 0-31. The MSB of a pixel (S)is used
+to specify it the pixel is semi transparent or not. More on that later.
+
+
+* 8bit CLUT mode,
+ Each pixel is defined by 8bits and the value of the pixel is converted to
+ a 15bit color using the CLUT(color lookup table) much like standard vga
+ pictures. So in effect you have 256 colors which are in 15bit precision.
+
+ Bit: |0F-08|07-00|
+ desc:|I1   |I0   |
+
+ I0 is the index to the CLUT for the left pixel, I1 for the right.
+
+* 4bitCLUT mode,
+ Same as above except that only 16 colors can be used. Data is arranged as
+ follows:
+
+ Bit   |F-C|B-8|7-4|3-0|
+ desc. |I3 |I2 |I1 |I0 |
+ 0 is drawn to the left
+
+ 
+* Texture Pages
+
+Texture pages have a unit size of 256*256 pixels, regardless of colormode.
+This means that in the frame buffer they will be 64 pixels wide for 4bit
+CLUT, 128 pixels wide for 8bit CLUT and 256 pixels wide for 15bit direct.
+The pixels are addressed with coordinates relative to the location of the
+texture page, not the framebuffer. So the topleft texture coordinate on
+a texture page is (0,0) and the bottom right one is (255,255)
+ The pages can be located in the frame buffer on X multiples of 64 and Y
+multiples of 256. More than one texture page can be set up, but each
+primitive can only contain texture from one page.
+
+* Texture Windows
+The area within a texture window is repeated throughout the texture
+page. The data is not actually stored all over the texture page but
+the GPU reads the repeated patterns as if they were there. The X and Y
+and H and W must be multiples of 8.
+
+* CLUT (Color Lookup Table)
+The clut is a the table where the colors are stored for the image data in
+the CLUT modes. The pixels of those images are used as indexes to this
+table. The clut is arranged in the frame buffer as a 256x1 image for the
+8bit clut mode, and a 16x1 image for the 4bit clut mode. Each pixel as a 16
+bit value, the first 15 used of a 15 bit color, and the 16th used for
+semitransparency. The clut data can be arranged in the frame buffer at X
+multiples of 16 (X=0,16,32,48,etc) and anywhere in the Y range of 0-511.
+More than one clut can be prepared but only one can be used for each
+primitive.
+
+* Texture Caching
+
+If polygons with texture are displayed, the GPU needs to read these from
+the frame buffer. This slows down the drawing process, and as a result
+the number of polygons that can be drawn in a given timespan. To speed up
+this process the GPU is equipped with a texture cache, so a given piece
+of texture needs not to be read multiple times in succession.
+The texture cache size depends on the color mode used for the textures.
+In 4 bit CLUT mode it has a size of 64x64, in 8 bit CLUT it's 32x64 and in
+15bitDirect is 32x32. A general speed up can be achieved by setting up
+textures according to these sizes. For further speed gain a more precise
+knowledge of how the cache works is necessary.
+
+- Cache blocks
+
+The texture page is divided into non-overlapping cache blocks, each of a
+unit size according to color mode. These cache blocks are tiled within
+the texture page.
+
++-----+-----+-----+--
+|cache|     |     |
+|block|     |
+|    0|   1 |    2   ..
++-----+-----+--
+|     |     |
+
+..
+
+- Cache entries
+
+Each cache block is divided into 256 cache entries, which are numbered
+sequentially, and are 8 bytes wide. So a cache entry holds 16 4bit clut
+pixels 8 8bit clut pixels, or 4 15bitdirect pixels.
+
+4bit and 8bit clut:        15bitdirect:
++----+----+----+----+     +----+----+----+----+----+----+----+----+
+|   0|   1|   2|   3|     |   0|   1|   2|   3|   4|   5|   6|   7|
++----+----+----+----+     +----+----+----+----+----+----+----+----+
+|   4|   5|   6|   7|     |   8|   9|   a|   b|   c|   d|   e|   f|
++----+----+----+----+     +----+----+----+----+----+----+----+----+
+|   8|   9|  ..           |  10|  11|  ..
++----+----+--             +----+----+--
+|   c|  ..|               |  18|  ..|
++----+--                  +----+--
+|  ..                     |  ..
+
+
+The cache can hold only one cache entry by the same number, so if f.e.  a
+piece of texture spans multiple cache blocks and it has data on entry 9 if
+block 1, but also on entry 9 of block 2, these cannot be in the cache at
+once.
+
+
+---------------------------------------------------------------------------
+Rendering options.
+---------------------------------------------------------------------------
+There are 3 modes which affect the way the GPU renders the primitives to
+the frame buffer.
+
+* Semi Transparency
+When semi transparency is set for a pixel, the GPU first reads the pixel it
+wants to write to, and then calculates the color it will write from the 2
+pixels according to the semitransparency mode selected. Processing speed is
+lower in this mode because additional reading and calculating are
+necessary. There are 4 semitransparency modes in the GPU.
+
+B=  the pixel read from the image in the frame buffer, F = the
+halftransparent pixel
+
+* 0.5 x B + 0.5 x F
+* 1.0 x B + 1.0 x F
+* 1.0 x B - 1.0 x F
+* 1.0 x B +0.25 x F
+
+A new semi transparency mode can be set for each primitive. For primitives
+without texture semi transparency can be selected. For primitives with
+texture semi transparency is stored in the MSB of each pixel, so some pixels
+can be set to STP others can be drawn opaque. For the CLUT modes the STP bit
+is obtained from the CLUT. So if a color index points to a color in the
+CLUT with the MSB set, it will be drawn semi transparent.
+
+When the color is black(BGR=0), STP is processed different from when it's not
+black (BGR<>0). The table below shows the differences:
+
+             transparency proccessing (bit 1 of command packet)
+BGR    STP       off             on
+0,0,0   0    Transparent     Transparent
+0,0,0   1  Non-transparent Non-Transparent
+x,x,x   0  Non-Transparent Non-Transparent
+x,x,x   1  Non-Transparent   Transparent
+
+* Shading
+The GPU has a shading function, which will scale the color of a primitive
+to a specified brightness. There are 2 shading modes: Flat shading, and
+gouraud shading. Flat shading is the mode in which one brightness value is
+specified for the entire primitive. In Gouraud shading mode, a different
+brightness value can be given for each vertex of a primitive, and the
+brightness between these points is automatically interpolated.
+
+* Mask
+
+The mask function will prevent to GPU to write to specific pixels when
+drawing in the framebuffer. This means that when the gpu is drawing a
+primitive to a masked area, it will first read the pixel at the coordinate
+it wants to write to, check if it's masking bit is set, and if so refrain
+from writing to that particular pixel. The masking bit is the MSB of the
+pixel, just like the STP bit.
+To set this masking bit, the GPU provides a mask out mode, which will set
+the MSB of any pixel it writes. If both mask out and mask evaluation are
+on, the GPU will not draw to pixels with set MSB's, and will draw pixels
+with set MSB's to the others, these in turn becoming masked pixels.
+
+---------------------------------------------------------------------------
+Drawing Environment
+---------------------------------------------------------------------------
+The drawing environment specifies all global parameters the GPU needs for
+drawing primitives.
+
+* Drawing offset.
+ This locates the top left corner of the drawing area. Coordinates of
+ primitives originate to this point. So if the drawing offset is (0,240)
+ and a vertex of a poligon is located at (16,20) it will be drawn to the
+ frame buffer at (0+16,240+20).
+ 
+* Drawing clip area
+ This specifies the maximum range the GPU draws primitives to. So in effect
+ it specifies the top left and bottom right corner of the drawing area.
+ 
+* Dither enable
+ When dither is enabled the GPU will dither areas during shading. It will
+ process internally in 24 bit and ditter the colors when converting back to
+ 15bit. When it is off, the lower 3 bits of each color simply get
+ discarded.
+ 
+* Draw to display enable.
+ This will enable/disable any drawing to the area that is currently
+ displayed.
+ 
+* Mask enable
+ When turned on any pixel drawn to the framebuffer by the GPU will have a
+ set masking bit. (= set MSB)
+ 
+* Mask judgement enable
+ Specifies if the mask data from the frame buffer is evaluated at the time
+ of drawing.
+
+---------------------------------------------------------------------------
+Display Environment.
+---------------------------------------------------------------------------
+This contains all information about the display, and the area displayed.
+	
+* Display area in frame buffer
+ This specifies the resolution of the display. The size can be set
+ as follows:
+
+ Width: 256,320,384,512 or 640 pixels
+ Height: 240 or 480 pixels
+
+ These sizes are only an indication on how many pixels will be displayed
+ using a default start end. These settings only specify the resolution of
+ the display.
+
+*  Display start/end.
+ Specifies where the display area is positioned on the screen, and how
+ much data gets sent to the screen. The screen sizes of the display area
+ are valid only if the horizontal/vertical start/end values are default. By
+ changing these you can get bigger/smaller display screens. On most TV's
+ there is some black around the edge, which can be utilised by setting the
+ start of the screen earlier and the end later. The size of the pixels is
+ NOT changed with these settings, the GPU simply sends more data to the
+ screen. Some monitors/TVs have a smaller display area and the extended
+ size might not be visible on those sets.(Mine is capable of about 330
+ pixels horizontal, and 272 vertical in 320*240 mode)
+
+
+* Interlace enable
+
+ When enabled the GPU will display the even and odd lines of the display
+ area alternately. It is necessary to set this when using 480 lines as the
+ number of scan lines on a TV screen are not sufficient to display 480
+ lines.
+
+* 15bit/24bit direct display
+ Switches between 15bit/24bit display mode.
+
+* Video mode
+ Selects which video mode to use, which are either PAL or NTSC.
+
+--------------------------------------------------------------------------
+Communication and OT's.
+--------------------------------------------------------------------------
+All data regarding drawing and drawing environment are sent as packets to
+the GPU. Each packet tells the GPU how and where to draw one primitive, or
+it sets one of the drawing environment parameters. The display environment
+is set up through single word commands using the control port of the GPU.
+
+Packets can be forwarded word by word through the data port of the GPU, or
+more efficiently for large numbers of packets through DMA. A special DMA
+mode was created for this so large numbers of packets can be sent and
+managed easily. In this mode a list of packets is sent, where each entry in
+the list contains a header which is one word containing the address of the
+next entry and the size of the packet and the packet itself. A result of
+this is that the packets do not need to be stored sequentially. This makes
+it possible to easily control the order in which packets get processed. The
+GPU processes the packets it gets in the order they are offered. So the
+first entry in the list also gets drawn first. To insert a packet into the
+middle of the list simply find the packet after which you want it to be
+processed, replace the address in that packet with the address of the new
+packet, and let that point to the address you replaced.
+
+To aid you in finding a location in the list the Ordering Table was
+invented. At first this is basically a linked list with entries of packet
+size 0, so it's a list of only listentryheaders, where each entry points to
+to the next entry. Then as primitives are generated by your program you can
+then add them to the table at a certain index. Just read the address in the
+table entry and replace it with the address of the new packet and store the
+address from the table in the packet. When all packets are generated and
+you want to draw, just pass the address of the first listentry to the DMA
+and the packets will get drawn in the order you entered the packets to the
+table. Packets entered at a higher table index will get drawn after those
+entered at a lower table index. Packets entered at the same index will get
+drawn in the order they were entered, the last one first.
+
+In 3d drawing it's most common that you want the primitives with the highest
+Z value to be drawn first, so it would be nice if the table would be drawn
+the other way around, so the Z value can be used as index. This is a simple
+thing, just make a table of which each entry points to the previous entry,
+and start the DMA with the address of the last table entry. To assist you
+in making such a table, a special DMA channel is available which creates
+it for you.
+
+--------------------------------------------------------------------------
+GPU operation
+--------------------------------------------------------------------------
+* GPU control registers.
+There are 2 32 bit io ports for the GPU, which are:
+
+$1f801810       GPU Data
+$1f801814       GPU control/Status
+
+The data register is used to exchange data with the GPU.
+The control/status register, gives the status of the GPU when read, and
+sets the control bits when written to.
+
+* Control/Status Register $1f801814
+
+Status (Read)
+-----------------------------------------------------------------------------
+|1f |1e 1d|1c |1b |1a  |19 18|17 |16     |15     |14   |13    |12 11 |10    |
+|lcf|dma  |com|img|busy| ?  ?|den|isinter|isrgb24|Video|Height|Width0|Width1|
+-----------------------------------------------------------------------------
+
+         W0 W1
+Width:   00 0   256 pixels
+         01 0   320
+         10 0   512
+         11 0   640
+         00 1   384
+Height:    0  240 pixels
+           1  480
+Video:     0  NTSC
+           1  PAL
+isrgb24:   0  15 bit direct mode
+           1  24 bit direct mode
+isinter:   0  Interlace off
+           1  Interlace on
+den:       0  Display enabled
+           1  Display disabled
+busy:      0  GPU is Busy  (ie. drawing primitives)
+           1  GPU is Idle
+img:       0  Not Ready to send image (packet $c0)
+           1  Ready
+com:       0  Not Ready    to recieve commands
+           1  Ready
+dma:      00  DMA off, communication through GP0
+          01
+          10  DMA CPU -> GPU
+          11  DMA GPU -> CPU
+
+
+lcf:       0  Drawing even lines in interlace mode
+           1  Drawing uneven lines in interlace mode
+----------------------------------------------------
+|0f 0e 0d|0c|0b|0a  |09 |08 07|06 05|04|03 02 01 00|
+| ?  ?  ?|me|md|dfe |dtd|tp   |abr  |ty|tx         |
+----------------------------------------------------
+                                                     
+tx:        0      0        Texture page X = tx*64
+           1     64
+           2    128
+           3    196
+           4   ...
+ty         0      0        Texture page Y
+           1    256
+abr      %00  0.5xB+0.5 xF Semi transparent state
+         %01  1.0xB+1.0 xF
+         %10  1.0xB-1.0 xF
+         %11  1.0xB+0.25xF
+tp       %00  4bit CLUT    Texture page color mode
+         %01  8bit CLUT
+         %10  15bit
+dtd        0  Ditter off
+           1  Ditter on
+dfe        0  Draw to display area prohibited
+           1  Draw to display area allowed
+md         0  off
+           1  on   Apply mask bit to drawn pixels.
+me         0  off
+           1  on   No drawing to pixels with set mask bit.
+
+Control (Write)
+--------------------------------------------------------------------------
+A control command is composed of one word as follows:
+
+bit 1f-18    17-0
+    command  parameter.
+
+The composition of the parameter is different for each command.
+
+--------------------------------------------------------------------------
+*Reset GPU
+command           $00
+parameter         $000000
+Description       Resets the GPU. Also seems to turn off screen.
+                  (sets status to $14802000)
+--------------------------------------------------------------------------
+*Reset Command Buffer
+command           $01
+parameter         $000000
+Description       Resets the command buffer.
+
+--------------------------------------------------------------------------
+*Reset IRQ
+command           $02
+parameter         $000000
+Description       Resets the IRQ. No idea of what this means.
+
+--------------------------------------------------------------------------
+*Display Enable
+command           $03
+parameter         $000000 Display enable
+                  $000001 Display disable
+Description       Turns on/off display. Note that a turned off
+                  screen still gives the flicker of NTSC on a
+                  pal screen if NTSC mode is selected..
+--------------------------------------------------------------------------
+*DMA setup.
+command           $04
+parameter         $000000 DMA disabled
+                  $000001 DMA ?
+                  $000002 DMA CPU to GPU
+                  $000003 DMA GPU to CPU
+Description       Sets dma direction. K-comm also mentions something
+                  about parameter $01, but i wasn't able to translate.
+--------------------------------------------------------------------------
+*Start of display area
+command           $05
+parameter    bit  $00-$09  X (0-1023)
+             bit  $0A-$12  Y (0-512)
+             = Y<<10 + X
+description       Locates the top left corner of the display area.
+--------------------------------------------------------------------------
+*Horizontal Display range
+command           $06
+parameter    bit  $00-$0b   X1 ($1f4-$CDA)
+             bit  $0c-$17   X2
+                = X1+X2<<12
+description       Specifies the horizontal range within which the
+                  display area is displayed. The display is relative
+                  to the display start, so X coordinate 0 will be at
+                  the value in X1. The display end is not relative to
+                  the display start. The number of pixels that get sent
+                  to the screen in 320 mode are (X2-X1)/8. How many
+                  actually are visible depends on your TV/monitor.
+                  (normally $260-$c56)
+--------------------------------------------------------------------------
+*Vertical Display range
+command           $07
+parameter    bit  $00-$09   Y1
+             bit  $0a-$14   Y2
+                = Y1+Y2<<10
+description       Specifies the vertical range within which the
+                  display area is displayed. The display is relative
+                  to the display start, so Y coordinate 0 will be at
+                  the value in Y1. The display end is not relative to
+                  the display start. The number of pixels that get sent
+                  to the display are Y2-Y1, in 240 mode.
+                  (Not sure about the default values, should be
+                   something like NTSC $010-$100, PAL  $023-$123)
+--------------------------------------------------------------------------
+*Display mode
+command           $08
+parameter    bit  $00-$01  Width 0
+             bit  $02      Height
+             bit  $03      Videomode     See above
+             bit  $04      Isrgb24
+             bit  $05      Isinter
+             bit  $06      Width1
+             bit  $07      Reverseflag
+
+description  Sets the display mode.
+--------------------------------------------------------------------------
+*GPU Info
+command           $10
+parameter         $000000
+                  $000001
+                  $000002
+                  $000003  Draw area top left
+                  $000004  Draw area bottom right
+                  $000005  Draw offset
+                  $000006
+                  $000007  GPU Type, should return 2 for a standard GPU.
+
+description       Returns requested info. Read result from GP0.
+                  0,1 seem to return draw area top left also
+                  6   seems to return draw offset too.
+
+--------------------------------------------------------------------------
+*Some other commands i do not know the function of:
+
+*?????
+command           $20
+parameter         ???????
+description       i've seen it used with value $000504
+                  what it does?????
+
+*?????
+command           $09
+parameter         $000001  ??
+description       I've seen it used with value $000001
+                  what it does?????
+
+--------------------------------------------------------------------------
+Command Packets, Data Register.
+--------------------------------------------------------------------------
+Primitive command packets use an 8 bit command value which is present in
+all packets. They contain a 3 bit type block and a 5 bit option block of
+which the meaning of the bits depend on the type. Layout is as follows:
+
+Type:
+000 GPU command
+001 Polygon primitive
+010 Line primitive
+011 Sprite primitive
+100 Transfer command
+111 Environment command
+
+Configuration of the option blocks for the primitives is as follows:
+
+Polygon:
+| 7   6   5 | 4 | 3 | 2 | 1 | 0 |
+| 0   0   1 |IIP|3/4|Tme|Abe|Tge|
+
+Line:
+| 7   6   5 | 4 | 3 | 2 | 1 | 0 |
+| 0   1   0 |IIP|Pll| 0 |Abe| 0 |
+
+Sprite:
+| 7   6   5 | 4   3 | 2 | 1 | 0 |
+| 1   0   0 | Size  |Tme|Abe| 0 |
+
+
+IIP      0 Flat Shading
+         1 Gouroud Shading
+3/4      0 3 vertex polygon
+         1 4 vertex polygon
+Tme      0 Texture mapping   off
+         1                    on
+Abe      0 Semi transparency off
+         1                    on
+Tge      0 Brightness calculation at time of texture mapping on
+         1 off. (draw texture as is)
+Size    00 Free size (Specified by W/H)
+        01  1 x  1
+        10  8 x  8
+        11 16 x 16
+Pll      0  Single line (2 vertices)
+         1  Polyline    (n vertices)
+
+* Color information
+Color information is forwarded as 24 bit data. It is parsed to
+15 bit by the GPU.
+
+Layout as follows:
+
+17-10 $0f-$08 $07-$00
+Blue  Green   Red
+
+* Shading information.
+For textured primitive shading data is forwarded by this packet.
+Layout is the same as for color data, the RGB values controlling
+the brightness of the individual colors ($00-$7f). A value of $80 in a
+color will take the former value as data.
+
+*Texture Page information
+The Data is 16 bit wide, layout is as follows:
+
+|F E D C B A 9|8 7|6 5|4 |3 2 1 0|
+|0            |tp |abr|ty|tx     |
+
+tx       0-f      X*64  texture page x coord
+ty       0        0     texture page y coord
+         1        256
+abr      0        0.5xB+0.5 xF  Semi transparency mode
+         1        1.0xB+1.0 xF
+         2        1.0xB-1.0 xF
+         3        1.0xB+0.25xF
+tp       0        4bit CLUT
+         1        8bit CLUT
+         2        15bit direct
+
+CLUT-ID
+Specifies the location of the CLUT data. Data is 16bits.
+
+F-6      Y coordinate 0-511
+5-0      X coordinate X/16
+
+--------------------------------------------------------------------------
+abbreviations in packet list
+--------------------------------------------------------------------------
+BGR      Color/Shading info see above.
+xn,yn    16 bit values of X and Y in frame buffer.
+un,vn    8 bit values of X and Y in texture page
+tpage    texture page information packet, see above
+clut     clut ID, see above.
+
+--------------------------------------------------------------------------
+Packet list.
+--------------------------------------------------------------------------
+The packets sent to the GPU are processed as a group of data,
+each one word wide. The data must be written to the GPU data register
+($1f801810) sequentially. Once all data has been recieved, the GPU
+starts operation.
+
+Overview of packet commands:
+
+Primitive drawing packets
+ $20     monochrome 3 point polygon
+ $24     textured 3 point polygon
+ $28     monchrome 4 point polygon
+ $2c     textured 4 point polygon
+ $30     gradated 3 point polygon
+ $34     gradated textured 3 point polygon
+ $38     gradated 4 point polygon
+ $3c     gradated textured 4 point polygon
+ $40     monochrome line
+ $48     monochrome polyline
+ $50     gradated line
+ $58     gradated line polyline
+ $60     rectangle
+ $64     sprite
+ $68     dot
+ $70     8*8 rectangle
+ $74     8*8 sprite
+ $78     16*16 rectangle
+ $7c     16*16 sprite
+GPU command & Transfer packets
+ $01     clear cache
+ $02     frame buffer rectangle draw
+ $80     move image in frame buffer
+ $a0     send image to frame buffer
+ $c0     copy image from frame buffer
+Draw mode/environment setting packets
+ $e1     draw mode setting
+ $e2     texture window setting
+ $e3     set drawing area top left
+ $e4     set drawing area bottom right
+ $e5     drawing offset
+ $e6     mask setting
+
+--------------------------------------------------------------------------
+Packet Descriptions
+--------------------------------------------------------------------------
+Primitive Packets
+--------------------------------------------------------------------------
+$20     monochrome 3 point polygon
+
+ |1f-18|17-10|0f-08|07-00|
+1|$20  |BGR              |command+color
+2|y0         |x0         |vertexes
+3|y1         |x1         |
+4|y2         |x2         |
+--------------------------------------------------------------------------
+$24     textured 3 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$24  |BGR              |command+color
+2|y0         |x0         |vertex 0
+3|clut       |v0   |u0   |clutid+ texture coords vertext 0
+4|y1         |x1         |
+5|tpage      |v1   |u1   |
+6|y2         |x2         |
+7|           |v2   |u2   |
+--------------------------------------------------------------------------
+$28     monchrome 4 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$28  |BGR              |command+color
+2|y0         |x0         |vertexes
+3|y1         |x1         |
+4|y2         |x2         |
+5|y3         |x3         |
+--------------------------------------------------------------------------
+$2c     textured 4 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$2c  |BGR              |command+color
+2|y0         |x0         |vertex 0
+3|clut       |v0   |u0   |clutid+ texture coords vertext 0
+4|y1         |x1         |
+5|tpage      |v1   |u1   |
+6|y2         |x2         |
+7|           |v2   |u2   |
+8|y3         |x3         |
+9|           |v3   |u3   |
+--------------------------------------------------------------------------
+$30     graduation 3 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$30  |BGR0             |command+color
+2|y0         |x0         |vertexes
+3|     |BGR1             |
+4|y1         |x1         |
+5|     |BGR2             |
+6|y2         |x2         |
+--------------------------------------------------------------------------
+$34     shaded textured 3 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$34  |BGR0             |command+color
+2|y0         |x0         |vertex 0
+3|clut       |v0   |u0   |clutid+ texture coords vertex 0
+4|     |BGR1             |
+5|y1         |x1         |
+6|tpage      |v1   |u1   |
+7|     |BGR2             |
+8|y2         |x2         |
+9|           |v2   |u2   |
+--------------------------------------------------------------------------
+$38     gradated 4 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$38  |BGR0             |command+color
+2|y0         |x0         |vertexes
+3|     |BGR1             |
+4|y1         |x1         |
+5|     |BGR2             |
+6|y2         |x2         |
+7|     |BGR3             |
+8|y3         |x3         |
+--------------------------------------------------------------------------
+$3c     shaded textured 4 point polygon
+ |1f-18|17-10|0f-08|07-00|
+1|$3c  |BGR0             |command+color
+2|y0         |x0         |vertex 0
+3|clut       |v0   |u0   |clutid+ texture coords vertex 0
+4|     |BGR1             |
+5|y1         |x1         |
+6|tpage      |v1   |u1   |texture page location
+7|     |BGR2             |
+8|y2         |x2         |
+9|           |v2   |u2   |
+a|     |BGR3             |
+b|y3         |x3         |
+c|           |v3   |u3   |
+--------------------------------------------------------------------------
+$40     monochrome line
+ |1f-18|17-10|0f-08|07-00|
+1|$40  |BGR              |command+color
+2|y0         |x0         |vertex 0
+3|y1         |x1         |vertex 1
+--------------------------------------------------------------------------
+$48     single color polyline
+ |1f-18|17-10|0f-08|07-00|
+1|$48  |BGR              |command+color
+2|y0         |x0         |vertex 0
+3|y1         |x1         |vertex 1
+4|y2         |x2         |vertex 2
+
+.|yn         |xn         |vertex n
+.|$55555555 Temination code.
+
+Any number of points can be entered, end with termination code.
+--------------------------------------------------------------------------
+$50     gradated line
+ |1f-18|17-10|0f-08|07-00|
+1|$50  |BGR0             |command+color
+2|y0         |x0         |
+3|     |BGR1             |
+4|y1         |x1         |
+--------------------------------------------------------------------------
+$58     gradated line polyline
+ |1f-18|17-10|0f-08|07-00|
+1|$58  |BGR0             |command+color
+2|y0         |x0         |
+3|     |BGR1             |
+4|y1         |x1         |
+5|     |BGR2             |
+6|y2         |x2         |
+
+.|     |BGRn             |
+.|yn         |xn         |
+.|$55555555 Temination code.
+Any number of points can be entered, end with termination code.
+--------------------------------------------------------------------------
+$60     rectangle
+ |1f-18|17-10|0f-08|07-00|
+1|$60  |BGR              |command+color
+2|y          |x          |
+3|h          |w          |
+--------------------------------------------------------------------------
+$64     sprite
+ |1f-18|17-10|0f-08|07-00|
+1|$64  |BGR              |command+color
+2|y          |x          |
+3|clut       |v    |u    |clut location, texture page y,x
+4|h          |w          |
+--------------------------------------------------------------------------
+$68     dot
+ |1f-18|17-10|0f-08|07-00|
+1|$68  |BGR              |command+color
+2|y          |x          |
+--------------------------------------------------------------------------
+$70     8*8 rectangle
+ |1f-18|17-10|0f-08|07-00|
+1|$70  |BGR              |command+color
+2|y          |x          |
+--------------------------------------------------------------------------
+$74     8*8 sprite
+ |1f-18|17-10|0f-08|07-00|
+1|$74  |BGR              |command+color
+2|y          |x          |
+3|clut       |v    |u    |clut location, texture page y,x
+--------------------------------------------------------------------------
+$78     16*16 rectangle
+ |1f-18|17-10|0f-08|07-00|
+1|$78  |BGR              |command+color
+2|y          |x          |
+--------------------------------------------------------------------------
+$7c     16*16 sprite
+ |1f-18|17-10|0f-08|07-00|
+1|$7c  |BGR              |command+color
+2|y          |x          |
+3|clut       |v    |u    |clut location, texture page y,x
+--------------------------------------------------------------------------
+GPU command & Transfer packets
+--------------------------------------------------------------------------
+$01     clear cache
+ |1f-18|17-10|0f-08|07-00|
+1|$01  |0                |clear cache.
+
+Seems to be the same as the GP1 command.
+--------------------------------------------------------------------------
+$02     frame buffer rectangle draw
+ |1f-18|17-10|0f-08|07-00|
+1|$02  |BGR              |command+color
+2|Y          |X          |Topleft corner
+3|H          |W          |Width & Height
+Fills the area in the frame buffer with the value in RGB. This command
+will draw without regard to drawing environment settings. Coordinates are
+absolute frame buffer coordinates. Max width is $3ff, max height is $1ff.
+--------------------------------------------------------------------------
+$80     move image in frame buffer
+ |1f-18|17-10|0f-08|07-00|
+1|$02  |                0|command
+2|sY         |sX         |Source coord.
+3|dY         |dX         |Destination coord.
+4|H          |W          |Height+Width of transfer
+Copys data within framebuffer
+--------------------------------------------------------------------------
+$01 $a0 send image to frame buffer
+ |1f-18|17-10|0f-08|07-00|
+ |$01  |                 |Reset command buffer (write to GP1 or GP0)
+1|$A0  |                 |
+2|Y          |X          |Destination coord.
+3|H          |W          |Height+Width of transfer
+4|pix1       |pix0       |image data
+5..
+?|pixn       |pixn-1     |
+Transfers data from mainmemory to frame buffer
+If the number of pixels to be sent is odd, an extra should be
+sent. (32 bits per packet)
+---------------------------------------------------------------------------
+$01 $c0 copy image from frame buffer
+ |1f-18|17-10|0f-08|07-00|
+ |$01  |                 |Reset command buffer (write to GP1 or GP0)
+1|$C0  |                 |
+2|Y          |X          |Destination coord.
+3|H          |W          |Height+Width of transfer
+4|pix1       |pix0       |image data (read from data port)
+5..
+?|pixn       |pixn-1     |
+Transfers data from frame buffer to mainmemory. Wait for bit 27
+of the status register to be set before reading the image data.
+When the number of pixels is odd, an extra pixel is read at the
+end.(because on packet is 32 bits)
+--------------------------------------------------------------------------
+Draw mode/environment setting packets
+--------------------------------------------------------------------------
+Some of these packets can also be by primitive packets, in any
+case it is the last packet of either that the GPU recieved
+that is active. so if a primitive sets tpage info, it will over
+write the existing data, even if it was sent by an $e? packet.
+--------------------------------------------------------------------------
+$e1     draw mode setting
+ |1f-18|17-0b|0a |09 |08 07|06 05|04|03 02 01 00|
+1|$e1  |     |dfe|dtd|tp   |abr  |ty|tx         | command +values
+
+see above for explanations
+
+It seems that bit $0b-$0d of the status reg can also be passed with this
+command on some GPU's other than type 2. (ie. Command $10000007 doesn't
+return 2)
+--------------------------------------------------------------------------
+$e2     texture window setting
+
+ |1F-18|17-14|13-0F|0E-0A|09-05|04-00|
+1|$E2        |twy  |twx  |twh  |tww  | command + value
+
+twx      Texture window X, (twx*8)
+twy      Texture window Y, (twy*8)
+tww      Texture window width, 256-(tww*8)
+twh      Texture window height, 256-(twh*8)
+--------------------------------------------------------------------------
+$e3     set drawing area top left
+ |1f-18|17-14|13-0a|09-00|
+1|$e3  |     |Y    |X    |
+sets the drawing area topleft corner. X&Y are absolute frame
+buffer coords.
+--------------------------------------------------------------------------
+$e4     set drawing area bottom right
+ |1f-18|17-14|13-0a|09-00|
+1|$e4  |     |Y    |X    |
+sets the drawing area bottom right. X&Y are absolute frame
+buffer coords.
+--------------------------------------------------------------------------
+$e5     drawing offset
+ |1f-18|17-14|14-0b|0a-00|
+1|$e5  |     |OffsY|OffsX|
+(offset Y = y << 11)
+sets the drawing area offset within the drawing area. X&Y are
+offsets in the frame buffer.
+--------------------------------------------------------------------------
+$e6     mask setting
+ |1f-18|17-02|01   |00   |
+1|$e6  |     |Mask2|Mask1|
+
+Mask1    Set mask bit while drawing. 1 = on
+Mask2    Do not draw to mask areas. 1= on
+
+While mask1 is on, the GPU will set the MSB of all pixels it draws.
+While mask2 is on, the GPU will not write to pixels with set MSB's
+
+--------------------------------------------------------------------------
+DMA
+--------------------------------------------------------------------------
+The GPU has two DMA channels allocated to it. DMA channel 2 is used to send
+linked packet lists to the GPU and to transfer image data to and from the
+frame buffer. DMA channel 6 is sets up an empty linked list, of which each
+entry points to the previous (ie. reverse clear an OT.)
+--------------------------------------------------------------------------
+D2_MADR           DMA base address.          $1f8010a0
+bit |1f                              00|
+desc|madr                              |
+
+madr     pointer to the adress the DMA will start reading from/writing to
+--------------------------------------------------------------------------
+D2_BCR            DMA block control          $1f8010a4
+bit |1f                   10|0f      00|
+desc|ba                     |bs        |
+
+ba       Amount of blocks
+bs       Blocksize (words)
+
+Sets up the DMA blocks. Once started the DMA will send ba blocks of bs
+words. Don't set a blocksize larger then $10 words, as the command buffer
+of the GPU is 64 bytes.
+--------------------------------------------------------------------------
+D2_CHCR           DMA channel control        $1f8010a8
+bit |1f-19|18|17-0c|0b|0a|09|08|07 01|00|
+desc|    0|Tr|    0| 0|Li|Co| 0|    0|Dr|
+
+Tr       0        No DMA transfer busy.
+         1        Start DMA transfer/DMA transfer busy.
+Li       1        Transfer linked list.
+Co       1        Transfer continous stream of data.
+Dr       0        direction to memory
+         1        direction to GPU
+
+This configures the DMA channel. The DMA starts when bit 18 is set. DMA
+is finished as soon as bit 18 is cleared again. To send or recieve data
+to/from VRAM send the appriopriate GPU packets first ($a0/$c0)
+--------------------------------------------------------------------------
+D6_MADR           DMA base address.          $1f8010e0
+bit |1f                              00|
+desc|madr                              |
+
+madr     Last table entry.
+--------------------------------------------------------------------------
+D6_BCR            DMA block control          $1f8010e4
+bit |1f                              00|
+desc|bc                                |
+
+bc       Number of list entries.
+--------------------------------------------------------------------------
+D6_CHCR           DMA channel control        $1f8010e8
+bit |1f-1d|1c|1b-19|18|17-02|01|00|
+desc|    0|OT|    0|Tr|    0|Ot| 0|
+
+Tr       0        No DMA transfer busy.
+         1        Start DMA transfer/DMA transfer busy.
+Ot       1        Set to do an OT clear.
+
+When this register is set to $11000002, the DMA channel will create an
+empty linked list of D6_BCR entries ending at the address in D6_MADR. Each
+entry has a size of 0, and points to the previous. The first entry is
+So if D6_MADR = $80100010, D6_BCR=$00000004, and the DMA is kicked this
+will result in a list looking like this:
+$80100000  $00ffffff
+$80100004  $00100000
+$80100008  $00100004
+$8010000c  $00100008
+$80100010  $0010000c
+--------------------------------------------------------------------------
+DPCR     Dma control register       $1f8010f0
+|1f 1c|1b 18|17 14|13 10|0f 0c|0b 08|07 04|03 00|
+|     |Dma6 |Dma5 |Dma4 |Dma3 |Dma2 |Dma1 |Dma0 |
+
+Each register has a 4 bit control block allocated in this
+register.
+Bit 3:   1= Dma Enabled
+    2:   ?
+    1:   ?
+    0:   ?
+
+Bit 3 must be set for a channel to operate.
+
+--------------------------------------------------------------------------
+Common GPU functions, step by step.
+--------------------------------------------------------------------------
+* Initializing the GPU.
+
+First thing to do when using the GPU is to initialize it. To do that take
+the following steps:
+
+1 - Reset the GPU (GP1 command $00). This turns off the display aswell.
+2 - Set horizontal and vertical start/end. (GP1 command $06, $07)
+3 - Set display mode. (GP1 command $08)
+4 - Set display offset. (GP1 command $05)
+5 - Set draw mode. (GP0 command $e1)
+6 - Set draw area. (GP0 command $e3, $e4)
+7 - Set draw offset. (GP0 command $e5)
+8 - Enable display.
+
+* Sending a linked list.
+
+The normal way to send large numbers of primitives is by using a linked
+list dma transfer. This list is built up of entries of which each points to
+the next. One entry looks like this:
+
+         dw $nnYYYYYY      ; nn = the number of words in the list entry
+                           ; YYYYYY = address of next list entry & $00ffffff
+
+1        dw ..             ; here goes the primitive.
+2        dw ..             ;
+.        dw ..             ;
+nn-1     dw ..             ;
+nn       dw ..             ;
+
+The last entry in the list should have $ffffff as pointer, which is the
+terminator. As soon as this value is found DMA is ended. If the entry
+size is set to 0, no data will be transferred to the GPU and the next
+entry is processed.
+
+To send the list do this:
+1 - Wait for the GPU to be ready to recieve commands. (bit $1c == 1)
+2 - Enable DMA channel 2
+3 - Set GPU to DMA cpu->gpu mode. ($04000002)
+3 - Set D2_MADR to the start of the list
+4 - Set D2_BCR to zero.
+5 - Set D2_CHCR to link mode, mem->GPU and dma enable. ($01000401)
+
+* Uploading Image data through DMA.
+
+To upload an image to VRAM take the following steps:
+
+1 - Wait for the GPU to be idle and DMA to finish. Enable DMA channel 2
+    if necessary.
+2 - Send the 'Send image to VRAM' primitive. (You can send this through
+    dma if you want. Use the linked list method described above)
+3 - Set DMA to CPU->GPU ($04000002) (if you didn't do so already in the
+    previous step)
+4 - Set D2_MADR to the start of the list
+5 - Set D2_BCR with :  bits 31-16  = Number of words to send (H*W /2)
+                       bits 15- 0  = Block size of 1 word. ($01)
+                       if H*W is odd, add 1. (Pixels are 2 bytes, send
+                       an extra blank pixel in case of an odd amount)
+6 - Set D2_CHCR to continuous mode, mem -> GPU and dma enable. ($01000201)
+
+Note that H, W, X and Y are always in frame buffer pixels, even if you send
+image data in other formats.
+You can use bigger block sizes if you need more speed. If the number of
+words to be sent is not a multiple of the blocksize, you'll have to send
+the remainder seperately, because the GPU only accepts an extra halfword
+if the number of pixels is odd. (ie. of the last word sent, only the low
+half word is used.) Also take care not to use blocksizes bigger than $10, as
+the buffer of the GPU is only 64 bytes (=$10 words).
+
+* Waiting to send commands
+
+You can send new commands as soon as DMA has ceased and the GPU is ready.
+1 - Wait for bit $18 to become 0 in D2_CHCR
+2 - Wait for bit $1c to become 1 in GP1.
+
+* Vsync
+
+Step by step for a VSYNC counter coming up (not)soon.
+
+Meanwhile you can init the pad driver and as soon as you want to
+check for VSYNC, fill the return buffer with 0 and wait for it to change.
+The pad driver checks the pads every VSYNC. Check the greentro source for
+an example.
+
+--------------------------------------------------------------------------
+Missing info.
+--------------------------------------------------------------------------
+There's still a lot yet uncovered, so if you have/know anything that's not
+in here please mail it to me.  Things i'm looking for particularly are
+info on the differences between the various versions and revisions of the
+GPU, and something about drawing speeds and other timing.
+
+--------------------------------------------------------------------------
+History:
+--------------------------------------------------------------------------
+23/apr/1999       First public release.
+28/apr/1999       Some bugfixes and rewrites.
+                  Info on texture pages corrected. <Silpheed>
+ 8/may/1999       Detailed packet composition.
+20/may/1999       DMA & Step by steps added.
+25/jun/1999       More DMA, OT and lists.
+30/aug/1999       Correction. ($03)
+--------------------------------------------------------------------------
+Maintained by doomed/padua. Any errors, additions -> <doomed@c64.org>
+--------------------------------------------------------------------------
+--==                    http://psx.rules.org/                         ==--
+--==                    http://www.padua.org/                         ==--
+--------------------------------------------------------------------------
+Thanx & Hello to:
+Silpheed Groepaz Brainwalker & Hitmen, Antiloop Middy Danzig & Napalm,
+K-Communications, Blackbag, TDJ Sander & Focus, Burglar LCF & SCS*TRC,
+Deekay & Crest, Graham NO-XS & Oxyron, MrAlpha Fungus & F4CG, Zealot &
+Wrath Design, Shape, Naphalm Jazzcat & Onslaught, Reyn Ouwehand, WHW & WOW,
+all active people on PSX and C64, #psxdev, #c-64.
+--------------------------------------------------------------------------
diff --git a/misc/gte.txt b/misc/gte.txt
@@ -0,0 +1,999 @@
+==========================================================================
+GTE.txt - Documentation & Explanation.
+==========================================================================
+
+Disclaimer.
+--------------------------------------------------------------------------
+This document is a collection of all info on the GTE i could find and my
+own notes. Most of this is the result of experiment, so not all info might
+be correct. This document is most probably not complete, and not all
+capabilities and quirks of the GTE are documented. No responsibility is
+taken for anything that might occur using the information in this document.
+
+--------------------------------------------------------------------------
+Introduction.
+--------------------------------------------------------------------------
+The Geometry Transformation Engine (GTE) is the heart of all 3d
+calculations on the psx. The GTE has specialised functions for perspective
+transformations, light sourcing and the like, and is much faster than the
+CPU on these operations. It is mounted as the second coprocessor and as
+such has no physical address in the memory of the psx. All control is done
+through special instructions.
+
+--------------------------------------------------------------------------
+Basic mathematics
+--------------------------------------------------------------------------
+The GTE is basicly an engine for vector mathematics, so some knowledge
+of that area is vital for correct usage of the GTE. I will not delve to
+deeply in that area, as that's beyond the scope of this text, but i'll
+introduce some concepts.
+
+The basic representation of a point(vertex) in 3d space is through a vector
+of the sort [X,Y,Z]. In GTE operation there's basicly two kinds of these,
+vectors of variable length and vectors of a unit length of 1.0, called
+normal vectors. The first is used to decribe a locations and translations
+in 3d space, the second to describe a direction.
+
+Rotation of vertices is performed by multiplying the vector of the vertex
+with a rotation matrix. The rotation matrix is a 3x3 matrix consisting of
+3 normal vectors which are orthogonal to each other. (It's actually the
+matrix which describes the coordinate system in which the vertex is located
+in relation to the unit coordinate system. See a maths book for more
+details.) This matrix is derived from rotation angles as follows:
+
+(s? = sin(?), c? = cos(?))
+
+Rotation angle A   Rotation angle B    Rotation angle C
+about X axis:      about Y axis:       about Z axis:
+
+|  1   0   0|       | cB   0  sB|        | cC -sC   0|
+|  0  cA -sA|       |  0   1   0|        | sC  cC   0|
+|  0  sA  cA|       |-sB   0  cB|        |  0   0   1|
+
+Rotation about multiple axis can be done by multiplying these matrices
+with eachother. Note that the order in which this multiplication is done
+*IS* important. The GTE has no sine or cosine functions, so the calculation
+of these must be done by the CPU.
+
+Translation is the simple addition of two vectors, relocating the vertex
+within its current coordinate system. Needless to say the order in which
+translation and rotation occur for a vector is important.
+
+--------------------------------------------------------------------------
+Brief Function descriptions
+--------------------------------------------------------------------------
+RTPS/RTPT         Rotate, translate and perpective transformation.
+
+These two functions perform the final 3d calculations on one or three
+vertices at once. The points are first multiplied with a rotation matrix(R),
+and after that translated(TR). Finally a perspective transformation is
+applied, which results in 2d screen coordinates. It also returns an
+interpolation value to be used with the various depth cueing instructions.
+--------------------------------------------------------------------------
+MVMVA             Matrix & Vector multiplication and addition.
+
+Multiplies a vector with either the rotation matrix, the light matrix or
+the color matrix and then adds the translation vector or background color
+vector.
+--------------------------------------------------------------------------
+DCPL              Depth cue light color
+
+First calculates a color from a light vector(normal vector of a plane
+multiplied with the light matrix and zero limited) and a provided RGB value.
+Then performs depth cueing by interpolating between the far color vector and
+the newfound color.
+--------------------------------------------------------------------------
+DPCS/DPCT         Depth cue single/triple
+
+Performs depth cueing by interpolating between a color and the far color
+vector on one or three colors.
+--------------------------------------------------------------------------
+INTPL             Interpolation
+
+Interpolates between a vector and the far color vector.
+--------------------------------------------------------------------------
+SQR               Square
+
+Calculates the square of a vector.
+--------------------------------------------------------------------------
+NCS/NCT           Normal Color
+
+Calculates a color from the normal of a point or plane and the light
+sources and colors. The basic color of the plane or point the normal
+refers to is assumed to be white.
+--------------------------------------------------------------------------
+NCDS/NCDT         Normal Color Depth Cue.
+
+Same as NCS/NCT but also performs depth cueing (like DPCS/DPCT)
+--------------------------------------------------------------------------
+NCCS/NCCT
+
+Same NCS/NCT, but the base color of the plane or point is taken into
+account.
+--------------------------------------------------------------------------
+CDP
+
+A color is calculated from a light vector (base color is assumed to be
+white) and depth cueing is performed (like DPCS).
+--------------------------------------------------------------------------
+CC
+
+A color is calculated from a light vector and a base color.
+--------------------------------------------------------------------------
+NCLIP
+
+Calculates the outer product of three 2d points.(ie. 3 vertices which
+define a plane after projection.)
+
+The 3 vertices should be stored clockwise according to the visual point:
+
+          Z+
+         /
+       /____ X+
+       |
+       |
+        Y+
+
+If this is so, the result of this function will be negative if we are
+facing the backside of the plane.
+--------------------------------------------------------------------------
+AVSZ3/AVSZ4
+
+Adds 3 or 4 z values together and multplies them by a fixed point value.
+This value is normally chosen so that this function returns the average
+of the z values (usually further divided by 2 or 4 for easy adding to the
+OT)
+--------------------------------------------------------------------------
+OP
+
+Calculates the outer product of 2 vectors.
+--------------------------------------------------------------------------
+GPF
+
+Multiplies 2 vectors. Also returns the result as 24bit rgb value.
+--------------------------------------------------------------------------
+GPL
+
+Multiplies a vector with a scalar and adds the result to another vector.
+Also returns the result as 24bit rgb value.
+
+--------------------------------------------------------------------------
+GTE Operation.
+--------------------------------------------------------------------------
+Instructions.
+--------------------------------------------------------------------------
+The CPU has six special load and store instructions for the GTE registers,
+and an instruction to issue commands to the coprocessor.
+
+rt        CPU register 0-31
+gd        GTE data register 0-31
+gc        GTE control register 0-31
+imm       16 bit immediate value
+base      CPU register 0-31
+imm(base) address pointed to by base + imm.
+b25       25 bit wide data field.
+
+LWC2  gd, imm(base)   stores value at imm(base) in gte data register gd.
+SWC2  gd, imm(base)   stores gte data register at imm(base).
+MTC2  rt, gd          stores register rt in GTE data register gd.
+MFC2  rt, gd          stores GTE data register gd in register rt.
+CTC2  rt, gc          stores register rt in GTE control register gc.
+CFC2  rt, gc          stores GTE control register in register rt.
+
+COP2  b25             Issues a GTE command.
+
+Gte load and store instructions have a delay of 2 instructions, for any
+gte commands or operations accessing that register.
+
+--------------------------------------------------------------------------
+Registers.
+--------------------------------------------------------------------------
+The GTE has 32 data registers, and 32 control registers,each 32 bits wide.
+The following list describes their common use and format. Note in some
+functions format is different from the one that's given here. The numbers
+in the format fields are the signed, integer and fractional parts of the
+field. So 1,3,12 means signed(1 bit), 3 bits integral part, 12 bits
+fractional part.
+
+Control registers:
+No. Name    |31-24 23-16|15-08 07-01| Description
+ 0  R11R12  |R12 1, 3,12|R11 1, 3,12| Rotation matrix elements 11, 12
+ 1  R13R21  |R21 1, 3,12|R13 1, 3,12| Rotation matrix elements 13, 21
+ 2  R22R23  |R23 1, 3,12|R22 1, 3,12| Rotation matrix elements 22, 23
+ 3  R31R32  |R32 1, 3,12|R31 1, 3,12| Rotation matrix elements 31, 32
+ 4  R33     |          0|R33 1, 3,12| Rotation matrix element 33
+ 5  TRX     |TRX 1,31, 0            | Translation vector X
+ 6  TRY     |TRY 1,31, 0            | Translation vector Y
+ 7  TRZ     |TRZ 1,31, 0            | Translation vector Z
+ 8  L11L12  |L12 1, 3,12|L11 1, 3,12| Light source matrix elements 11, 12
+ 9  L13L21  |L21 1, 3,12|L13 1, 3,12| Light source matrix elements 13, 21
+10  L22L23  |L23 1, 3,12|L22 1, 3,12| Light source matrix elements 22, 23
+11  L31L32  |L32 1, 3,12|L31 1, 3,12| Light source matrix elements 31, 32
+12  L33     |          0|L33 1, 3,12| Light source matrix element 33
+13  RBK     |RBK 1,19,12            | Background color red component.
+14  GBK     |GBK 1,19,12            | Background color green component.
+15  BBK     |RBK 1,19,12            | Background color blue component.
+16  LR1LR2  |LR2 1, 3,12|LR1 1, 3,12| Light color matrix source 1&2 red comp.
+17  LR3LG1  |LG1 1, 3,12|LR3 1, 3,12| Light color matrix source 3 red, 1 green
+18  LG2LG3  |LG3 1, 3,12|LG2 1, 3,12| Light color matrix source 2&3 green comp.
+19  LB1LB2  |LB2 1, 3,12|LB1 1, 3,12| Light color matrix source 1&2 blue comp.
+20  LB3     |          0|LB3 1, 3,12| Light color matrix source 3 blue component.
+21  RFC     |RFC 1,27, 4            | Far color red component.
+22  GFC     |GFC 1,27, 4            | Far color green component.
+23  BFC     |BFC 1,27, 4            | Far color blue component.
+24  OFX     |OFX 1,15,16            | Screen offset X
+25  OFY     |OFY 1,15,16            | Screen offset Y
+26  H       |          0|H   0,16, 0| Projection plane distance.
+27  DQA     |          0|DQA 1, 7, 8| Depth queing parameter A.(coefficient.)
+28  DQB     |          0|DQB 1, 7,24| Depth queing parameter B.(offset.)
+29  ZSF3    |          0|ZSF3 1,3,12| Z3 average scale factor (normally 1/3)
+30  ZSF4    |          0|ZSF4 1,3,12| Z4 average scale factor (normally 1/4)
+31  FLAG    |See gte funcions       | Returns any calculation errors.
+
+Data registers:
+No. Name  rw|31-24 23-16|15-08 07-01| Description
+ 0  VXY0  rw|VY0        |VX0        | Vector 0 X and Y. 1,3,12 or 1,15,0
+ 1  VZ0   rw|          0|VZ0        | Vector 0 Z.
+ 2  VXY1  rw|VY1        |VX1        | Vector 1 X and Y. 1,3,12 or 1,15,0
+ 3  VZ1   rw|          0|VZ1        | Vector 1 Z.
+ 4  VXY2  rw|VY2        |VX2        | Vector 2 X and Y. 1,3,12 or 1,15,0
+ 5  VZ2   rw|          0|VZ2        | Vector 2 Z.
+ 6  RGB   rw|Code |B    |G    |R    | Rgb value. Code is passed, but not used in calc.
+ 7  OTZ   r |           |OTZ 0,15, 0| Z Average value.
+ 8  IR0   rw|Sign       |IR0 1, 3,12| Intermediate value 0. *1
+ 9  IR1   rw|Sign       |IR0 1, 3,12| Intermediate value 1. *1
+10  IR2   rw|Sign       |IR0 1, 3,12| Intermediate value 2. *1
+11  IR3   rw|Sign       |IR0 1, 3,12| Intermediate value 3. *1
+12  SXY0  rw|SY0 1,15, 0|SX0 1,15, 0| Screen XY coordinate fifo. *2
+13  SXY1  rw|SY1 1,15, 0|SX1 1,15, 0|
+14  SXY2  rw|SY2 1,15, 0|SX2 1,15, 0|
+15  SXYP  rw|SYP 1,15, 0|SXP 1,15, 0|
+16  SZ0   rw|          0|SZ0 0,16, 0| Screen Z fifo. *2
+17  SZ1   rw|          0|SZ1 0,16, 0|
+18  SZ2   rw|          0|SZ2 0,16, 0|
+19  SZ3   rw|          0|SZ3 0,16, 0|
+20  RGB0  rw|CD0  |B0   |G0   |R0   | Characteristic color fifo. *2
+21  RGB1  rw|CD1  |B1   |G1   |R1   |
+22  RGB2  rw|CD2  |B2   |G2   |R2   | CD2 is the bit pattern of currently executed function
+23 (RES1)   |                       | Prohibited
+24  MAC0  rw|MAC0 1,31,0            | Sum of products value 0
+25  MAC1  rw|MAC1 1,31,0            | Sum of products value 1
+26  MAC2  rw|MAC2 1,31,0            | Sum of products value 2
+27  MAC3  rw|MAC3 1,31,0            | Sum of products value 3
+28  IRGB   w|           |IB |IG |IR | *3
+29  ORGB  r |           |OB |OG |OR | *4
+30  LZCS   w|LZCS 1,31,0            | Leading zero count source data.*5
+31  LZCR  r |LZCR 0,6,0             | Leading zero count result.*5
+
+*1) The specified format is the format which GTE functions output to these
+    registers. The input format is mostly (1,19,12)
+
+*2) The SXYx, SZx and RGBx are first in first out registers (fifo). The last
+    calculation result is stored in the last register, and previous results
+    are stored in previous registers. So for example when a new SXY value
+    is obtained the following happens:
+    SXY0 = SXY1
+    SXY1 = SXY2
+    SXY2 = SXYP
+    SXYP = result.
+
+*3) IRGB:
+    |31        15|14-10| 9- 5| 4- 0|
+    |           0|IR   |IG   |IB   |
+    When writing a value to IRGB the following happens:
+    IR1 = IR format converted to (1,11,4)
+    IR2 = IG format converted to (1,11,4)
+    IR3 = IB format converted to (1,11,4)
+
+*4) ORGB:
+    |31        15|14-10| 9- 5| 4- 0|
+    |           0|IR   |IG   |IB   |
+    When writing a value to IRGB the following happens:
+    IR = (IR1>>7) &$1f
+    IG = (IR2>>7) &$1f
+    IB = (IR3>>7) &$1f
+*5) Reading LZCR returns the leading 0 count of LZCS if LZCS is positive
+    and the leading 1 count of LZCS if LZCS is negative.
+
+--------------------------------------------------------------------------
+Programming Considerations.
+--------------------------------------------------------------------------
+Before use the GTE must be turned on. The GTE has bit 30 allocated to it in
+the status register of the stystem control coprocessor (cop0). Before any
+GTE instruction is used, this bit must be set.
+
+GTE instructions and functions should not be used in
+- Delay slots of jumps and branches
+- Event handlers or interrupts.
+
+If an instruction that reads a GTE register or a GTE command is executed
+before the current GTE command is finished, the cpu will hold until the
+instruction has finished. The number of cycles each GTE instruction takes
+is in the command list.
+
+--------------------------------------------------------------------------
+Function Operation.
+--------------------------------------------------------------------------
+This part describes the actual calculations performed by the various GTE
+functions. The first line contains the name of the function, the number
+of cycles it takes and a brief description. The second line any fields that
+may be set in the opcode and in the third line is the actual opcode. See
+the end of the list for the fields and their descriptions. Then follows a
+list of all registers which are needed in the calculation under the 'in',
+and a list of registers which modified under the 'out' with a brief
+description and the format of the data. Next follows the calculation which
+is performed after initiating the function. The format field left is the
+size in which the data is stored, the format field on the right contains
+the format in which the calculation is performed. At certain points in the
+calculation checks and limitations are done and their results stored in the
+flag register, see the table below. They are identified with the code from
+the second column of the table directly followed by square brackets
+enclosing the part of the calculation on which the check is performed. The
+additional Lm_ identifier means the value is limited to the bottom or
+ceiling of the check if it exceeds the boundary.
+
+bit      description
+31       Checksum.
+30  A1   Result larger than 43 bits and positive
+29  A2   Result larger than 43 bits and positive
+28  A3   Result larger than 43 bits and positive
+27  A1   Result larger than 43 bits and negative
+26  A2   Result larger than 43 bits and negative
+25  A3   Result larger than 43 bits and negative
+24  B1   Value negative(lm=1) or larger than 15 bits(lm=0)
+23  B2   Value negative(lm=1) or larger than 15 bits(lm=0)
+22  B3   Value negative(lm=1) or larger than 15 bits(lm=0)
+21  C1   Value negative or larger than 8 bits.
+20  C2   Value negative or larger than 8 bits.
+19  C3   Value negative or larger than 8 bits.
+18  D    Value negative or larger than 16 bits.
+17  E    Divide overflow. (quotient > 2.0)
+16  F    Result larger than 31 bits and positive.
+15  F    Result larger than 31 bits and negative.
+14  G1   Value larger than 10 bits.
+13  G2   Value larger than 10 bits.
+12  H    Value negative or larger than 12 bits.
+
+
+--------------------------------------------------------------------------
+RTPS     15       Perspective transformation          
+Fields:  none
+Opcode:  cop2 $0180001
+
+In:      V0       Vector to transform.                         [1,15,0]
+         R        Rotation matrix                              [1,3,12]
+         TR       Translation vector                           [1,31,0]
+         H        View plane distance                          [0,16,0]
+         DQA      Depth que interpolation values.              [1,7,8]
+         DQB                                                   [1,7,8]
+         OFX      Screen offset values.                        [1,15,16]
+         OFY                                                   [1,15,16]
+Out:     SXY fifo Screen XY coordinates.(short)                [1,15,0]
+         SZ fifo  Screen Z coordinate.(short)                  [0,16,0]
+         IR0      Interpolation value for depth queing.        [1,3,12]
+         IR1      Screen X (short)                             [1,15,0]
+         IR2      Screen Y (short)                             [1,15,0]
+         IR3      Screen Z (short)                             [1,15,0]
+         MAC1     Screen X (long)                              [1,31,0]
+         MAC2     Screen Y (long)                              [1,31,0]
+         MAC3     Screen Z (long)                              [1,31,0]
+
+Calculation:
+[1,31,0] MAC1=A1[TRX + R11*VX0 + R12*VY0 + R13*VZ0]            [1,31,12]
+[1,31,0] MAC2=A2[TRY + R21*VX0 + R22*VY0 + R23*VZ0]            [1,31,12]
+[1,31,0] MAC3=A3[TRZ + R31*VX0 + R32*VY0 + R33*VZ0]            [1,31,12]
+[1,15,0] IR1= Lm_B1[MAC1]                                      [1,31,0]
+[1,15,0] IR2= Lm_B2[MAC2]                                      [1,31,0]
+[1,15,0] IR3= Lm_B3[MAC3]                                      [1,31,0]
+         SZ0<-SZ1<-SZ2<-SZ3
+[0,16,0] SZ3= Lm_D(MAC3)                                       [1,31,0]
+         SX0<-SX1<-SX2, SY0<-SY1<-SY2
+[1,15,0] SX2= Lm_G1[F[OFX + IR1*(H/SZ)]]                       [1,27,16]
+[1,15,0] SY2= Lm_G2[F[OFY + IR2*(H/SZ)]]                       [1,27,16]
+[1,31,0] MAC0= F[DQB + DQA * (H/SZ)]                           [1,19,24]
+[1,15,0] IR0= Lm_H[MAC0]                                       [1,31,0]
+
+Notes:
+Z values are limited downwards at 0.5 * H. For smaller z values you'll have
+write your own routine.
+--------------------------------------------------------------------------
+RTPT     23       Perspective Transformation on 3 points.
+Fields   none
+opcode   cop2 $0280030
+
+in       V0       Vector to transform.                         [1,15,0]
+         V1                                                    [1,15,0]
+         V2                                                    [1,15,0]
+         R        Rotation matrix                              [1,3,12]
+         TR       Translation vector                           [1,31,0]
+         H        View plane distance                          [0,16,0]
+         DQA      Depth que interpolation values.              [1,7,8]
+         DQB                                                   [1,7,8]
+         OFX      Screen offset values.                        [1,15,16]
+         OFY                                                   [1,15,16]
+out      SXY fifo Screen XY coordinates.(short)                [1,15,0]
+         SZ fifo  Screen Z coordinate.(short)                  [0,16,0]
+         IR0      Interpolation value for depth queing.        [1,3,12]
+         IR1      Screen X (short)                             [1,15,0]
+         IR2      Screen Y (short)                             [1,15,0]
+         IR3      Screen Z (short)                             [1,15,0]
+         MAC1     Screen X (long)                              [1,31,0]
+         MAC2     Screen Y (long)                              [1,31,0]
+         MAC3     Screen Z (long)                              [1,31,0]
+
+Calculation: Same as RTPS, but repeats for V1 and V2.
+--------------------------------------------------------------------------
+MVMVA    8        Multiply vector by matrix and vector addition.
+Fields:  sf,mx,v,cv,lm
+Opcode:  cop2 $0400012
+
+in:      V0/V1/V2/IR       Vector v0, v1, v2 or [IR1,IR2,IR3]
+         R/LLM/LCM         Rotation, light or color matrix.    [1,3,12]
+         TR/BK             Translation or background color vector.
+out:     [IR1,IR2,IR3]     Short vector
+         [MAC1,MAC2,MAC3]  Long vector
+
+Calculation:
+MX = matrix specified by mx
+V = vector specified by v
+CV = vector specified by cv
+
+
+         MAC1=A1[CV1 + MX11*V1 + MX12*V2 + MX13*V3]
+         MAC2=A2[CV2 + MX21*V1 + MX22*V2 + MX23*V3]
+         MAC3=A3[CV3 + MX31*V1 + MX32*V2 + MX33*V3]
+         IR1=Lm_B1[MAC1]
+         IR2=Lm_B2[MAC2]
+         IR3=Lm_B3[MAC3]
+
+Notes:
+The cv field allows selection of the far color vector, but this vector
+is not added correctly by the GTE.
+--------------------------------------------------------------------------
+DCPL     8        Depth Cue Color light
+Fields:  none
+Opcode:  cop2 $0680029
+In:      RGB               Primary color.         R,G,B,CODE   [0,8,0]
+         IR0               interpolation value.                [1,3,12]
+         [IR1,IR2,IR3]     Local color vector.                 [1,3,12]
+         CODE              Code value from RGB.           CODE [0,8,0]
+         FC                Far color.                          [1,27,4]
+Out:     RGBn              RGB fifo               Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation:
+[1,27,4]  MAC1=A1[R*IR1 + IR0*(Lm_B1[RFC - R * IR1])]          [1,27,16]
+[1,27,4]  MAC2=A2[G*IR2 + IR0*(Lm_B1[GFC - G * IR2])]          [1,27,16]
+[1,27,4]  MAC3=A3[B*IR3 + IR0*(Lm_B1[BFC - B * IR3])]          [1,27,16]
+[1,11,4]  IR1=Lm_B1[MAC1]                                      [1,27,4]
+[1,11,4]  IR2=Lm_B2[MAC2]                                      [1,27,4]
+[1,11,4]  IR3=Lm_B3[MAC3]                                      [1,27,4]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+DPCS     8        Depth Cueing.
+Fields:  none
+Opcode:  cop2 $0780010
+
+In:      IR0               Interpolation value                 [1,3,12]
+         RGB               Color                  R,G,B,CODE   [0,8,0]
+         FC                Far color              RFC,GFC,BFC  [1,27,4]
+Out:     RGBn              RGB fifo               Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculations:
+[1,27,4]  MAC1=A1[(R + IR0*(Lm_B1[RFC - R])]                   [1,27,16][lm=0]
+[1,27,4]  MAC2=A2[(G + IR0*(Lm_B1[GFC - G])]                   [1,27,16][lm=0]
+[1,27,4]  MAC3=A3[(B + IR0*(Lm_B1[BFC - B])]                   [1,27,16][lm=0]
+[1,11,4]  IR1=Lm_B1[MAC1]                                      [1,27,4][lm=0]
+[1,11,4]  IR2=Lm_B2[MAC2]                                      [1,27,4][lm=0]
+[1,11,4]  IR3=Lm_B3[MAC3]                                      [1,27,4][lm=0]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+INTPL    8        Interpolation of a vector and far color vector.
+Fields:  none
+Opcode:  cop2 $0980011
+
+In:      [IR1,IR2,IR3]     Vector                              [1,3,12]
+         IR0               Interpolation value                 [1,3,12]
+         CODE              Code value from RGB.           CODE [0,8,0]
+         FC                Far color              RFC,GFC,BFC  [1,27,4]
+Out:     RGBn              RGB fifo               Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculations:
+[1,27,4]  MAC1=A1[IR1 + IR0*(Lm_B1[RFC - IR1])]                [1,27,16]
+[1,27,4]  MAC2=A2[IR2 + IR0*(Lm_B1[GFC - IR2])]                [1,27,16]
+[1,27,4]  MAC3=A3[IR3 + IR0*(Lm_B1[BFC - IR3])]                [1,27,16]
+[1,11,4]  IR1=Lm_B1[MAC1]                                      [1,27,4]
+[1,11,4]  IR2=Lm_B2[MAC2]                                      [1,27,4]
+[1,11,4]  IR3=Lm_B3[MAC3]                                      [1,27,4]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+SQR      5        Square vector.
+Fields:  sf
+Opcode:  cop2 $0a00428
+                                                       sf=0    sf=1
+in:      [IR1,IR2,IR3]     vector                      [1,15,0][1,3,12]
+out:     [IR1,IR2,IR3]     vector^2                    [1,15,0][1,3,12]
+         [MAC1,MAC2,MAC3]  vector^2                    [1,31,0][1,19,12]
+
+Calculation: (left format sf=0, right format sf=1)
+
+[1,31,0][1,19,12] MAC1=A1[IR1*IR1]                     [1,43,0][1,31,12]
+[1,31,0][1,19,12] MAC2=A2[IR2*IR2]                     [1,43,0][1,31,12]
+[1,31,0][1,19,12] MAC3=A3[IR3*IR3]                     [1,43,0][1,31,12]
+[1,15,0][1,3,12]  IR1=Lm_B1[MAC1]                      [1,31,0][1,19,12][lm=1]
+[1,15,0][1,3,12]  IR2=Lm_B2[MAC2]                      [1,31,0][1,19,12][lm=1]
+[1,15,0][1,3,12]  IR3=Lm_B3[MAC3]                      [1,31,0][1,19,12][lm=1]
+--------------------------------------------------------------------------
+NCS      14       Normal color
+Fields:  none
+Opcode:  cop2 $0C8041E
+
+In:      V0                Normal vector                       [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         CODE              Code value from RGB.           CODE [0,8,0]
+         LCM               Color matrix                        [1,3,12]
+         LLM               Light matrix                        [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+[1,19,12] MAC1=A1[L11*VX0 + L12*VY0 + L13*VZ0]                 [1,19,24]
+[1,19,12] MAC2=A2[L21*VX0 + L22*VY0 + L23*VZ0]                 [1,19,24]
+[1,19,12] MAC3=A3[L31*VX0 + L32*VY0 + L33*VZ0]                 [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,19,12][lm=1]
+[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3]           [1,19,24]
+[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3]           [1,19,24]
+[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3]           [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,19,12][lm=1]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+NCT      30       Normal color
+Fields:  none
+Opcode:  cop2 $0D80420
+
+In:      V0,V1,V2          Normal vector                       [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         CODE              Code value from RGB.           CODE [0,8,0]
+         LCM               Color matrix                        [1,3,12]
+         LLM               Light matrix                        [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation: Same as NCS, but repeated for V1 and V2.
+--------------------------------------------------------------------------
+NCDS     19       Normal color depth cue single vector
+Fields:  none
+Opcode:  cop2 $0e80413
+In:      V0                Normal vector                       [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         RGB               Primary color          R,G,B,CODE   [0,8,0]
+         LLM               Light matrix                        [1,3,12]
+         LCM               Color matrix                        [1,3,12]
+         IR0               Interpolation value                 [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation:
+[1,19,12] MAC1=A1[L11*VX0 + L12*VY0 + L13*VZ0]                 [1,19,24]
+[1,19,12] MAC2=A1[L21*VX0 + L22*VY0 + L23*VZ0]                 [1,19,24]
+[1,19,12] MAC3=A1[L31*VX0 + L32*VY0 + L33*VZ0]                 [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,19,12][lm=1]
+[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3]           [1,19,24]
+[1,19,12] MAC2=A1[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3]           [1,19,24]
+[1,19,12] MAC3=A1[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3]           [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,19,12][lm=1]
+[1,27,4]  MAC1=A1[R*IR1 + IR0*(Lm_B1[RFC-R*IR1])]              [1,27,16][lm=0]
+[1,27,4]  MAC2=A1[G*IR2 + IR0*(Lm_B2[GFC-G*IR2])]              [1,27,16][lm=0]
+[1,27,4]  MAC3=A1[B*IR3 + IR0*(Lm_B3[BFC-B*IR3])]              [1,27,16][lm=0]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,27,4][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,27,4][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,27,4][lm=1]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+NCDT     44       Normal color depth cue triple vectors
+Fields:  none
+Opcode:  cop2 $0f80416
+In:      V0                Normal vector                       [1,3,12]
+         V1                Normal vector                       [1,3,12]
+         V2                Normal vector                       [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         FC                Far color              RFC,GFC,BFC  [1,27,4]
+         RGB               Primary color          R,G,B,CODE   [0,8,0]
+         LLM               Light matrix                        [1,3,12]
+         LCM               Color matrix                        [1,3,12]
+         IR0               Interpolation value                 [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation:
+Same as NCDS but repeats for v1 and v2.
+--------------------------------------------------------------------------
+DPCT     17       Depth Cueing.
+Fields:  none
+Opcode:  cop2 $0F8002A
+
+In:      IR0               Interpolation value                 [1,3,12]
+         RGB0,RGB1,RGB2    Colors in RGB fifo.    Rn,Gn,Bn,CDn [0,8,0]
+         FC                Far color              RFC,GFC,BFC  [1,27,4]
+Out:     RGBn              RGB fifo               Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculations:
+[1,27,4]  MAC1=A1[R0+ IR0*(Lm_B1[RFC - R0])]                   [1,27,16][lm=0]
+[1,27,4]  MAC2=A2[G0+ IR0*(Lm_B1[GFC - G0])]                   [1,27,16][lm=0]
+[1,27,4]  MAC3=A3[B0+ IR0*(Lm_B1[BFC - B0])]                   [1,27,16][lm=0]
+[1,11,4]  IR1=Lm_B1[MAC1]                                      [1,27,4][lm=0]
+[1,11,4]  IR2=Lm_B2[MAC2]                                      [1,27,4][lm=0]
+[1,11,4]  IR3=Lm_B3[MAC3]                                      [1,27,4][lm=0]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+
+Performs this calculation 3 times, so all three RGB values have been
+replaced by the depth cued RGB values.
+
+--------------------------------------------------------------------------
+NCCS     17       Normal Color Color single vector
+Fields:  none
+Opcode:  cop2 $108041B
+
+In:      V0                Normal vector                       [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         RGB               Primary color          R,G,B,CODE   [0,8,0]
+         LLM               Light matrix                        [1,3,12]
+         LCM               Color matrix                        [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation:
+
+[1,19,12] MAC1=A1[L11*VX0 + L12*VY0 + L13*VZ0]                  [1,19,24]
+[1,19,12] MAC2=A2[L21*VX0 + L22*VY0 + L23*VZ0]                  [1,19,24]
+[1,19,12] MAC3=A3[L31*VX0 + L32*VY0 + L33*VZ0]                  [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                      [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                      [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                      [1,19,12][lm=1]
+[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3]            [1,19,24]
+[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3]            [1,19,24]
+[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3]            [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                      [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                      [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                      [1,19,12][lm=1]
+[1,27,4]  MAC1=A1[R*IR1]                                        [1,27,16]
+[1,27,4]  MAC2=A2[G*IR2]                                        [1,27,16]
+[1,27,4]  MAC3=A3[B*IR3]                                        [1,27,16]
+[1,3,12]  IR1= Lm_B1[MAC1]                                      [1,27,4][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                      [1,27,4][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                      [1,27,4][lm=1]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                              [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                              [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                              [1,27,4]
+--------------------------------------------------------------------------
+NCCT     39       Normal Color Color triple vector
+Fields:  none
+Opcode:  cop2 $118043F
+
+In:      V0                Normal vector 1                     [1,3,12]
+         V1                Normal vector 2                     [1,3,12]
+         V2                Normal vector 3                     [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         RGB               Primary color          R,G,B,CODE   [0,8,0]
+         LLM               Light matrix                        [1,3,12]
+         LCM               Color matrix                        [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation:
+Same as NCCS but repeats for v1 and v2.
+--------------------------------------------------------------------------
+CDP      13       Color Depth Que
+Fields:  none
+Opcode:  cop2 $1280414
+
+In:      [IR1,IR2,IR3]     Vector                              [1,3,12]
+         RGB               Primary color          R,G,B,CODE   [0,8,0]
+         IR0               Interpolation value                 [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         LCM               Color matrix                        [1,3,12]
+         FC                Far color              RFC,GFC,BFC  [1,27,4]
+Out:     RGBn              RGB fifo               Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculation:
+[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3]           [1,19,24]
+[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3]           [1,19,24]
+[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3]           [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,19,12][lm=1]
+[1,27,4]  MAC1=A1[R*IR1 + IR0*(Lm_B1[RFC-R*IR1])]              [1,27,16][lm=0]
+[1,27,4]  MAC2=A2[G*IR2 + IR0*(Lm_B2[GFC-G*IR2])]              [1,27,16][lm=0]
+[1,27,4]  MAC3=A3[B*IR3 + IR0*(Lm_B3[BFC-B*IR3])]              [1,27,16][lm=0]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,27,4][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,27,4][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,27,4][lm=1]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+CC       11       Color Color.
+Fields:  none
+Opcode:  cop2 $138041C
+In:      [IR1,IR2,IR3]     Vector                              [1,3,12]
+         BK                Background color       RBK,GBK,BBK  [1,19,12]
+         RGB               Primary color          R,G,B,CODE   [0,8,0]
+         LCM               Color matrix                        [1,3,12]
+Out:     RGBn              RGB fifo.              Rn,Gn,Bn,CDn [0,8,0]
+         [IR1,IR2,IR3]     Color vector                        [1,11,4]
+         [MAC1,MAC2,MAC3]  Color vector                        [1,27,4]
+
+Calculations:
+[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3]           [1,19,24]
+[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3]           [1,19,24]
+[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3]           [1,19,24]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,19,12][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,19,12][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,19,12][lm=1]
+[1,27,4]  MAC1=A1[R*IR1]                                       [1,27,16]
+[1,27,4]  MAC2=A2[G*IR2]                                       [1,27,16]
+[1,27,4]  MAC3=A3[B*IR3]                                       [1,27,16]
+[1,3,12]  IR1= Lm_B1[MAC1]                                     [1,27,4][lm=1]
+[1,3,12]  IR2= Lm_B2[MAC2]                                     [1,27,4][lm=1]
+[1,3,12]  IR3= Lm_B3[MAC3]                                     [1,27,4][lm=1]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]                             [1,27,4]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]                             [1,27,4]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]                             [1,27,4]
+--------------------------------------------------------------------------
+NCLIP    8        Normal clipping
+Fields:
+Opcode:  cop2 $1400006
+
+in:      SXY0,SXY1,SXY2    Screen coordinates                  [1,15,0]
+out:     MAC0              Outerproduct of SXY1 and SXY2 with  [1,31,0]
+                           SXY0 as origin.
+
+Calculation:
+[1,31,0] MAC0 = F[SX0*SY1+SX1*SY2+SX2*SY0-SX0*SY2-SX1*SY0-SX2*SY1] [1,43,0]
+--------------------------------------------------------------------------
+AVSZ3    5        Average of three Z values
+fields:
+Opcode:  cop2 $158002D
+
+in:      SZ1, SZ2, SZ3     Z-Values                            [0,16,0]
+         ZSF3              Divider                             [1,3,12]
+out:     OTZ               Average.                            [0,16,0]
+         MAC0              Average.                            [1,31,0]
+
+Calculation:
+[1,31,0] MAC0=F[ZSF3*SZ1 + ZSF3*SZ2 + ZSF3*SZ3]                [1,31,12]
+[0,16,0] OTZ=Lm_D[MAC0]                                        [1,31,0]
+--------------------------------------------------------------------------
+AVSZ4    6        Average of four Z values
+Fields:
+Opcode:  cop2 $168002E
+
+in:      SZ1,SZ2,SZ3,SZ4   Z-Values                            [0,16,0]
+         ZSF4              Divider                             [1,3,12]
+out:     OTZ               Average.                            [0,16,0]
+         MAC0              Average.                            [1,31,0]
+
+Calculation:
+[1,31,0] MAC0=F[ZSF4*SZ0 + ZSF4*SZ1 + ZSF4*SZ2 + ZSF4*SZ3]     [1,31,12]
+[0,16,0] OTZ=Lm_D[MAC0]                                        [1,31,0]
+--------------------------------------------------------------------------
+OP       6        Outer product of 2 vectors
+Fields:  sf
+Opcode:  cop2 $170000C
+
+in:      [R11R12,R22R23,R33] vector 1
+         [IR1,IR2,IR3]      vector 2
+out:     [IR1,IR2,IR3]      outer product
+         [MAC1,MAC2,MAC3]   outer product
+
+Calculation: (D1=R11R12,D2=R22R23,D3=R33)
+
+         MAC1=A1[D2*IR3 - D3*IR2]
+         MAC2=A2[D3*IR1 - D1*IR3]
+         MAC3=A3[D1*IR2 - D2*IR1]
+         IR1=Lm_B1[MAC0]
+         IR2=Lm_B2[MAC1]
+         IR3=Lm_B3[MAC2]
+
+--------------------------------------------------------------------------
+GPF      5        General purpose interpolation
+Fields:  sf
+Opcode:  cop2 $190003D
+
+in:      IR0               scaling factor
+         CODE              code field of RGB
+         [IR1,IR2,IR3]     vector
+out:     [IR1,IR2,IR3]     vector
+         [MAC1,MAC2,MAC3]  vector
+         RGB2              RGB fifo.
+
+Calculation:
+
+         MAC1=A1[IR0 * IR1]
+         MAC2=A2[IR0 * IR2]
+         MAC3=A3[IR0 * IR3]
+         IR1=Lm_B1[MAC1]
+         IR2=Lm_B2[MAC2]
+         IR3=Lm_B3[MAC3]
+[0,8,0]   Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]   R0<-R1<-R2<- Lm_C1[MAC1]
+[0,8,0]   G0<-G1<-G2<- Lm_C2[MAC2]
+[0,8,0]   B0<-B1<-B2<- Lm_C3[MAC3]
+--------------------------------------------------------------------------
+GPL      5        General purpose interpolation
+Fields:  sf
+Opcode:  cop2 $1A0003E
+
+in:      IR0               scaling factor
+         CODE              code field of RGB
+         [IR1,IR2,IR3]     vector
+         [MAC1,MAC2,MAC3]  vector
+out:     [IR1,IR2,IR3]     vector
+         [MAC1,MAC2,MAC3]  vector
+         RGB2              RGB fifo.
+
+Calculation:
+
+         MAC1=A1[MAC1 + IR0 * IR1]
+         MAC2=A2[MAC2 + IR0 * IR2]
+         MAC3=A3[MAC3 + IR0 * IR3]
+         IR1=Lm_B1[MAC1]
+         IR2=Lm_B2[MAC2]
+         IR3=Lm_B3[MAC3]
+[0,8,0]  Cd0<-Cd1<-Cd2<- CODE
+[0,8,0]  R0<-R1<-R2<- Lm_C1[MAC1]
+[0,8,0]  G0<-G1<-G2<- Lm_C2[MAC2]
+[0,8,0]  B0<-B1<-B2<- Lm_C3[MAC3]
+--------------------------------------------------------------------------
+Field decriptions.
+
+bit |24 23 22 21 20|19|18 17|16 15|14 13|12 11|10|
+desc|              |sf|mx   |v    |cv   |     |lm|
+
+bit |09 08 07 06 05 04 03 02 01 00|
+desc|                             |
+
+sf 0  Normal calculation.
+   1  Calculations on data shifted 12 bits to the left in the IR regs.
+(Not entirely sure about what really happens.)
+
+mx 0  Multiply with rotation matrix
+   1  Multiply with light matrix
+   2  Multiply with color matrix
+   3  -
+
+v  0  V0 source vector (short)
+   1  V1 source vector (short)
+   2  V2 source vector (short)
+   3  IR source vector (long)
+
+cv 0  Add translation vector (TR)
+   1  Add back color vector  (BK)
+   2  Bugged. Should add far color vector. (FC)
+   3  Add no vector
+
+lm 0  No negative limit.
+   1  Limit negative results to 0.
+
+--------------------------------------------------------------------------
+A list of common MVMVA instructions:
+
+rtv0     cop2 $0486012  v0 * rotmatrix
+rtv1     cop2 $048E012  v1 * rotmatrix
+rtv2     cop2 $0496012  v2 * rotmatrix
+
+rtir12   cop2 $049E012  ir * rotmatrix
+rtir0    cop2 $041E012  ir * rotmatrix.
+
+rtv0tr   cop2 $0480012  v0 * rotmatrix + tr vector
+rtv1tr   cop2 $0488012  v1 * rotmatrix + tr vector
+rtv2tr   cop2 $0490012  v2 * rotmatrix + tr vector
+rtirtr   cop2 $0498012  ir * rotmatrix + tr vector
+
+rtv0bk   cop2 $0482012  v0 * rotmatrix + bk vector
+rtv1bk   cop2 $048A012  v1 * rotmatrix + bk vector
+rtv2bk   cop2 $0492012  v2 * rotmatrix + bk vector
+rtirbk   cop2 $049A012  ir * rotmatrix + bk vector
+
+ll       cop2 $04A6412  v0 * light matrix. Lower limit result to 0.
+
+llv0     cop2 $04A6012  v0 * light matrix
+llv1     cop2 $04AE012  v1 * light matrix
+llv2     cop2 $04B6012  v2 * light matrix
+llir     cop2 $04BE012  ir * light matrix
+
+llv0tr   cop2 $04A0012  v0 * light matrix + tr vector
+llv1tr   cop2 $04A8012  v1 * light matrix + tr vector
+llv2tr   cop2 $04B0012  v2 * light matrix + tr vector
+llirtr   cop2 $04B8012  ir * light matrix + tr vector
+
+llv0bk   cop2 $04A2012  v0 * light matrix + bk vector
+llv1bk   cop2 $04AA012  v1 * light matrix + bk vector
+llv2bk   cop2 $04B2012  v2 * light matrix + bk vector
+llirbk   cop2 $04BA012  ir * light matrix + bk vector
+
+lc       cop2 $04DA412
+
+lcv0     cop2 $04C6012  v0 * color matrix
+lcv1     cop2 $04CE012  v1 * color matrix
+lcv2     cop2 $04D6012  v2 * color matrix
+lcir     cop2 $04DE012  ir * color matrix
+
+lcv0tr   cop2 $04C0012  v0 * color matrix + tr vector
+lcv1tr   cop2 $04C8012  v1 * color matrix + tr vector
+lcv2tr   cop2 $04D0012  v2 * color matrix + tr vector
+lcirtr   cop2 $04D8012  ir * color matrix + tr vector
+
+lcv0bk   cop2 $04C2012  v0 * color matrix + bk vector
+lcv1bk   cop2 $04CA012  v1 * color matrix + bk vector
+lcv2bk   cop2 $04D2012  v2 * color matrix + bk vector
+lcirbk   cop2 $04DA012  ir * color matrix + bk vector
+
+Other instructions:
+
+sqr12    cop2 $0A80428  square of ir  (1,19,12)
+sqr0     cop2 $0A00428                (1,31, 0)
+
+op12     cop2 $178000C  outer product (1,19,12)
+op0      cop2 $170000C                (1,31, 0)
+
+gpf12    cop2 $198003D  general purpose interpolation (1,19,12)
+gpf0     cop2 $190003D                                (1,31, 0)
+
+gpl12    cop2 $1A8003E  general purpose interpolation (1,19,12)
+gpl0     cop2 $1A0003E                                (1,31, 0)
+
+--------------------------------------------------------------------------
+doomed@c64.org <- corrections/additions     latest update -> psx.rules.org
+--------------------------------------------------------------------------
+ 8/jun/1999       Initial version.
+23/aug/1999       Almost completely rewritten.
+ 2/feb/2000	  Small fix.
+--------------------------------------------------------------------------
+If you miss anything that was in the previous version please mail me.
+--------------------------------------------------------------------------
+
+
+\ No newline at end of file
diff --git a/misc/psx_documentation_project.pdf b/misc/psx_documentation_project.pdf
Binary files differ.
diff --git a/misc/spu.txt b/misc/spu.txt
@@ -0,0 +1,526 @@
+==========================================================================
+SPU - Sound Processing Unit. Information & Documentation.
+==========================================================================
+
+Disclaimer.
+--------------------------------------------------------------------------
+This document is a collection of all info on the SPU i could find and my
+own notes. Most of this is the result of experiment, so not all info might
+be correct. This document is most probably not complete, and not all
+capabilities and quirks of the SPU are documented. No responsibility is
+taken for anything that might occur using the information in this document.
+
+
+Introduction.
+--------------------------------------------------------------------------
+The SPU is the unit responsible for all aural capabilities of the psx. It
+handles 24 voices, has a 512kb sound buffer, has ADSR envelope filters for
+each voice and lots of other features.
+
+
+Notations and conventions
+When the format of data is given it's shown as a bitwise representation
+like this:
+
+
+bit  |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00|
+desc.|                                               |
+
+The bit row shows which bits of the data are used, and separators are used
+to show where the different elements of the data stop and start. MSB is on
+the left, LSB is on the right. Stuff like |0f-08| means bit $0f to bit $08.
+The desc. row shows the description of the different elements. With
+separators where the element starts and ends.
+
+
+
+--------------------------------------------------------------------------
+The Sound Buffer
+--------------------------------------------------------------------------
+
+The SPU has control over a 512kb sound buffer. Data is stored compressed
+into blocks of 16 bytes. Each block contains 14 packed sample bytes and two
+header bytes, one for the packing and one for sample end and looping
+information. One such block is decoded into 28 sample bytes (= 14 16bit
+samples).
+
+In the first 4 kb of the buffer the SPU stores the decoded data of CD audio
+after volume processing and the sound data of voice 1 and voice 3 after
+envelope processing. The decoded data is stored as 16 bit signed values,
+one sample per clock (44.1 khz).
+
+Following this first 4kb are 8 bytes reserved by the system. The memory
+beyond that is free to store samples, up to the reverb work area if the
+effect processor is used. The size of this work area depends on which
+type of effect is being processed. More on that later.
+
+Memory layout:
+$00000-$003ff  CD audio left
+$00400-$007ff  CD audio right
+$00800-$00bff  Voice 1
+$00c00-$00fff  Voice 3
+$01000-$0100f  System area.
+$01008-$xxxxx  Sound data area.
+$0xxxx-$7ffff  Reverb work area.
+
+--------------------------------------------------------------------------
+Voices.
+--------------------------------------------------------------------------
+The SPU has 24 hardware voices. These voices can be used to reproduce sample
+data, noise or can be used as frequency modulator on the next voice.
+Each voice has it's own programmable ADSR envelope filter. The main volume
+can be programmed independently for left and right output.
+
+The ADSR envelope filter works as follows:
+Ar = Attack rate, which specifies the speed at which the volume increases
+     from zero to it's maximum value, as soon as the note on is given. The
+     slope can be set to lineair or exponential.
+Dr = Decay rate specifies the speed at which the volume decreases to the
+     sustain level. Decay is always decreasing exponentially.
+Sl = Sustain level, base level from which sustain starts.
+Sr = Sustain rate is the rate at which the volume of the sustained note
+     increases or decreases. This can be either lineair or exponential.
+Rr = Release rate is the rate at which the volume of the note decreases
+     as soon as the note off is given.
+
+     lvl |
+       ^ |     /\Dr     __
+     Sl _| _  / _ \__---  \
+         |   /       ---__ \ Rr
+         |  /Ar       Sr  \ \
+         | /                \\
+         |/___________________\________
+                                  ->time
+
+The overal volume can also be set to sweep up or down lineairly or
+exponentially from it's current value. This can be done seperately
+for left and right.
+
+
+--------------------------------------------------------------------------
+SPU Operation
+--------------------------------------------------------------------------
+
+The SPU occupies the area $1f801c00-$1f801dff. All registers are 16 bit
+wide.
+
+=============================================================
+$1f801c00-        Voice data area. For each voice there are 8 16 bit
+$1f801d7f         registers structured like this:
+
+(xx = $c0 + voice number)
+-------------------------------------------------------------
+$1f801xx0         Volume Left
+$1f801xx2         Volume Right
+
+Volume mode:
+bit  |0f|0e|0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.| 0| S|               VV                        |
+
+VV  $0000-$3fff   Voice volume.
+S        0        Phase Normal
+         1              Inverted
+
+Sweep mode:
+bit  |0f|0e|0d|0c|0b 0a 09 08 07|06 05 04 03 02 01 00|
+desc.| 1|Sl|Dr|Ph|              |VV                  |
+
+VV  $0000-$007f   Voice volume.
+Sl       0        Lineair slope
+         1        Exponential slope
+Dr       0        Increase
+         1        Decrease
+Ph       0        Normal phase
+         1        Inverted phase
+
+In sweep mode, the current volume increases to its maximum value,
+or decreases to its mimimum value, according to mode. Choose
+phase equal to the the phase of the current volume.
+-------------------------------------------------------------
+$1f801xx4         Pitch
+bit  |0f 0e|0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|     |Pt                                       |
+
+Pt  $0000-$3fff   Specifies pitch.
+
+Any value can be set, table shows only octaves:
+$0200  - 3 octaves
+$0400  - 2
+$0800  - 1
+$1000  sample pitch
+$2000  + 1
+$3fff  + 2
+-------------------------------------------------------------
+$1f801xx6         Startaddress of Sound
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|Addr                                           |
+
+Addr     Startaddress of sound in Sound buffer /8
+-------------------------------------------------------------
+$1f801xx8         Attack/Decay/Sustain level
+bit  |0f|0e 0d 0c 0b 0a 09 08|07 06 05 04|03 02 01 00|
+desc.|Am|         Ar         |Dr         |Sl         |
+
+Am       0        Attack mode Linear
+         1                    Exponential
+
+Ar       0-7f     attack rate
+Dr       0-f      decay rate
+Sl       0-f      sustain level
+-------------------------------------------------------------
+$1f801xxa         Sustain rate, Release Rate.
+bit  |0f|0e|0d|0c 0b 0a 09 08 07 06|05|04 03 02 01 00|
+desc.|Sm|Sd| 0|   Sr               |Rm|Rr            |
+
+Sm       0        sustain rate mode linear
+         1                          exponential
+Sd       0        sustain rate mode increase
+         1                          decrease
+Sr       0-7f     Sustain Rate
+Rm       0        Linear decrease
+         1        Exponential decrease
+Rr       0-1f     Release Rate
+
+Note: decay mode is always Expontial decrease, and thus cannot
+be set.
+-------------------------------------------------------------
+$1f801xxc         Current ADSR volume
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|ADSRvol                                        |
+
+ADSRvol           Returns the current envelope volume when
+                  read.
+-------------------------------------------------------------
+$1f801xxe         Repeat address.
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|Ra                                             |
+
+Ra  $0000-$ffff   Address sample loops to at end.
+
+Note: Setting this register only has effect after the voice
+has started (ie. KeyON), else the loop address gets reset
+by the sample.
+=============================================================
+$1f801d80         Mainvolume left
+$1f801d82         Mainvolume right
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|                                               |
+
+Sets Main volume, these work the same as the channel volume
+registers. See those for details.
+-------------------------------------------------------------
+$1f801d84         Reverberation depth left
+$1f801d86         Reverberation depth right
+bit  |0f|0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|P |Rvd                                         |
+
+Rvd $0000-$7fff   Sets the wet volume for the effect.
+P        0        Normal phase
+         1        Inverted phase
+=============================================================
+Following registers have a common layout:
+
+first register:
+bit  |0f|0e|0d|0c|0b|0a|09|08|07|06|05|04|03|02|01|00|
+desc.|cf|ce|cd|cc|cb|ca|c9|c8|c7|c6|c5|c4|c3|c2|c1|c0|
+
+second register:
+bit  |0f           08|07 |06 |05 |04 |03 |02 |01 | 00|
+desc.|              0|c17|c16|c15|c14|c13|c12|c11|c10|
+
+c0-c17   0        Mode for channel c?? off
+         1        Mode for channel c?? on
+-------------------------------------------------------------
+$1f801d88         Voice ON  (0-15)
+$1f801d8a         Voice ON  (16-23)
+
+Sets the current voice to key on. (ie. start ads)
+-------------------------------------------------------------
+$1f801d8c         Voice OFF (0-15)
+$1f801d8e         Voice OFF (16-23)
+
+Sets the current voice to key off.(ie. release)
+-------------------------------------------------------------
+$1f801d90         Channel FM (pitch lfo) mode (0-15)
+$1f801d92         Channel FM (pitch lfo) mode (16-23)
+
+Sets the channel frequency modulation. Uses the previous channel
+as modulator.
+-------------------------------------------------------------
+$1f801d94         Channel Noise mode (0-15)
+$1f801d96         Channel Noise mode (16-23)
+
+Sets the channel to noise.
+-------------------------------------------------------------
+$1f801d98         Channel Reverb mode (0-15)
+$1f801d9a         Channel Reverb mode (16-23)
+
+Sets reverb for the channel. As soon as the sample ends, the
+reverb for that channel is turned off.
+-------------------------------------------------------------
+$1f801d9c         Channel ON/OFF (0-15)                 ?
+$1f801d9e         Channel ON/OFF (16-23)                ?
+
+Returns wether the channel is mute or not.              ?
+=============================================================
+$1f801da2         Reverb work area start
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|Revwa                                          |
+
+Revwa $0000-$ffff Reverb work area start in sound buffer /8
+-------------------------------------------------------------
+$1f801da4         Sound buffer IRQ address.
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|IRQa                                           |
+
+IRQa  $0000-$ffff IRQ address in sound buffer /8
+??
+-------------------------------------------------------------
+$1f801da6         Sound buffer address
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|Sba                                            |
+
+SBA $0000-$ffff   Address in sound buffer divided by eight.
+                  Next transfer to this address.
+-------------------------------------------------------------
+$1f801da8         SPU data
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|                                               |
+
+Data forwarding reg, for non DMA transfer.
+-------------------------------------------------------------
+$1f801daa         SPU control                sp0
+bit  |0f|0e|0d 0c 0b 0a 09 08|07|06 |05 04|03|02|01|00|
+desc.|En|Mu|Noise            |Rv|Irq|DMA  |Er|Cr|Ee|Ce|
+
+En       0        SPU off
+         1        SPU on
+Mu       0        Mute SPU
+         1        Unmute SPU
+Noise             Noise clock frequency
+Rv       0        Reverb Disabled
+         1        Reverb Enabled
+Irq      0        Irq disabled
+         1        Irq enabled
+DMA     00
+        01        Non DMA write?  (transfer through data reg)
+        10        DMA Write
+        11        DMA Read
+Er       0        Reverb for external off
+         1        Reverb for external on
+Cr       0        Reverb for CD off
+         1        Reverb for CD on
+Ee       0        External audio off
+         1        External audio on
+Ce       0        CD audio off
+         1        CD audio on
+-------------------------------------------------------------
+$1f801dac         SPU status
+bit  |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|                                               |
+
+Don't know what this is for, but in SPU init routines this
+register get loaded with $4.
+-------------------------------------------------------------
+$1f801dae         SPU status
+bit  |0f 0e 0d 0c|0b|0a|09 08 07 06 05 04 03 02 01 00|
+desc.|           |Dh|Rd|                             |
+
+Dh       0        Decoding in first half of buffer
+         1        Decoding in second half of buffer
+Rd       0        Spu ready to transfer
+         1        Spu not ready
+
+Some of bits 9-0 are also ready/not ready states. More on
+that later. Functions that wait for the SPU to be ready,
+wait for bits a-0 to become 0.
+-------------------------------------------------------------
+$1f801db0         CD volume left
+$1f801db2         CD volume right
+bit  |0f|0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|P |CDvol                                       |
+
+CDvol $0000-$7fff Set volume of CD input.
+P        0        Normal phase.
+         1        Inverted phase.
+-------------------------------------------------------------
+$1f801db4         Extern volume left
+$1f801db6         Extern volume right
+bit  |0f|0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+desc.|P |Exvol                                       |
+
+Exvol $0000-$7fff Set volume of External input.
+P        0        Normal phase.
+         1        Inverted phase.
+-------------------------------------------------------------
+1dc0-1dff         Reverb configuration area
+$1f801dc0         
+$1f801dc2
+$1f801dc4         Lowpass Filter Frequency. 7fff = max value= no filtering
+$1f801dc6         Effect volume 0 - $7fff, bit 15 = phase.
+$1f801dc8
+$1f801dca
+$1f801dcc
+$1f801dce         Feedback
+$1f801dd0
+$1f801dd2
+$1f801dd4         Delaytime(see below)
+$1f801dd6         Delaytime(see below)
+$1f801dd8         Delaytime(see below)
+$1f801dda
+$1f801ddc
+$1f801dde
+$1f801de0         Delaytime(see below)
+$1f801de2
+$1f801de4
+$1f801de6
+$1f801de8
+$1f801dea
+$1f801dec
+$1f801dee
+$1f801df0
+$1f801df2
+$1f801df4         Delaytime
+$1f801df6         Delaytime
+$1f801df8
+$1f801dfa
+$1f801dfc
+$1f801dfe
+
+--------------------------------------------------------------------------
+Reverb
+--------------------------------------------------------------------------
+The SPU is equipped with an effect processor for reverb echo and delay type
+of effects. This effect processor can do one effect at a time, and for each
+voice you can specify wether it should have the effect applied or not.
+
+The effect is setup by initializing the registers $1dc0 to $1ffe to the
+desired effect. I do not exactly know how these work, but you can use
+the presets below.
+
+The effect processor needs a bit of sound buffer memory to perform it's
+calculations. The size of this depends on the effect type. For the presets
+the sizes are:
+
+Reverb off        $00000       Hall          $0ade0
+Room              $026c0       Space echo    $0f6c0
+Studio small      $01f40       Echo          $18040
+Studio medium     $04840       Delay         $18040
+Studio large      $06fe0       Half echo     $03c00
+
+The location at which the work area is location is set in register $1da2
+and it's value is the location in the sound buffer divided by eight. Common
+values are as follows:
+
+Reverb off        $FFFE        Hall          $EA44
+Room              $FB28        Space echo    $E128
+Studio small      $FC18        Echo          $CFF8
+Studio medium     $F6F8        Delay         $CFF8
+Studio large      $F204        Half echo     $F880
+
+For the delay and echo effects (not space echo or half echo) you can
+specify the delay time, and feedback. (range 0-127) Calculations are shown
+below.
+
+When you setup up a new reverb effect, take the following steps:
+
+-Turn off the reverb (bit 7 in sp0)
+-Set Depth to 0
+-First make delay & feedback calculations.
+-Copy the preset to the effect registers
+-Turn on the reverb
+-Set Depth to desired value.
+
+Also make sure there is the reverb work area is cleared, else you might get
+some unwanted noise.
+
+To use the effect on a voice, simple turn on the corresponing bit in the
+channel reverb registers. Note that these get turned off autmatically when
+the sample for the channel ends.
+
+
+-------------------------------------------------------------
+Effect presets: copy these in order to $1dc0-$1dfe
+
+Reverb off:
+$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000
+$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000
+$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000
+$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000
+
+Room:
+$007D, $005B, $6D80, $54B8, $BED0, $0000, $0000, $BA80
+$5800, $5300, $04D6, $0333, $03F0, $0227, $0374, $01EF
+$0334, $01B5, $0000, $0000, $0000, $0000, $0000, $0000
+$0000, $0000, $01B4, $0136, $00B8, $005C, $8000, $8000
+
+Studio Small:
+$0033, $0025  $70F0  $4FA8  $BCE0  $4410  $C0F0  $9C00
+$5280  $4EC0  $03E4  $031B  $03A4  $02AF  $0372  $0266
+$031C  $025D  $025C  $018E  $022F  $0135  $01D2  $00B7
+$018F  $00B5  $00B4  $0080  $004C  $0026  $8000  $8000
+
+Studio Medium:
+$00B1  $007F  $70F0  $4FA8  $BCE0  $4510  $BEF0  $B4C0
+$5280  $4EC0  $0904  $076B  $0824  $065F  $07A2  $0616
+$076C  $05ED  $05EC  $042E  $050F  $0305  $0462  $02B7
+$042F  $0265  $0264  $01B2  $0100  $0080  $8000  $8000
+
+Studio Large:
+$00E3  $00A9  $6F60  $4FA8  $BCE0  $4510  $BEF0  $A680
+$5680  $52C0  $0DFB  $0B58  $0D09  $0A3C  $0BD9  $0973
+$0B59  $08DA  $08D9  $05E9  $07EC  $04B0  $06EF  $03D2
+$05EA  $031D  $031C  $0238  $0154  $00AA  $8000  $8000
+
+Hall:
+$01A5  $0139  $6000  $5000  $4C00  $B800  $BC00  $C000
+$6000  $5C00  $15BA  $11BB  $14C2  $10BD  $11BC  $0DC1
+$11C0  $0DC3  $0DC0  $09C1  $0BC4  $07C1  $0A00  $06CD
+$09C2  $05C1  $05C0  $041A  $0274  $013A  $8000  $8000
+
+Space Echo:
+$033D  $0231  $7E00  $5000  $B400  $B000  $4C00  $B000
+$6000  $5400  $1ED6  $1A31  $1D14  $183B  $1BC2  $16B2
+$1A32  $15EF  $15EE  $1055  $1334  $0F2D  $11F6  $0C5D
+$1056  $0AE1  $0AE0  $07A2  $0464  $0232  $8000  $8000
+
+Echo:
+$0001  $0001  $7FFF  $7FFF  $0000  $0000  $0000  $8100
+$0000  $0000  $1FFF  $0FFF  $1005  $0005  $0000  $0000
+$1005  $0005  $0000  $0000  $0000  $0000  $0000  $0000
+$0000  $0000  $1004  $1002  $0004  $0002  $8000  $8000
+
+Delay:
+
+$0001  $0001  $7FFF  $7FFF  $0000  $0000  $0000  $0000
+$0000  $0000  $1FFF  $0FFF  $1005  $0005  $0000  $0000
+$1005  $0005  $0000  $0000  $0000  $0000  $0000  $0000
+$0000  $0000  $1004  $1002  $0004  $0002  $8000  $8000
+
+Half Echo:
+$0017  $0013  $70F0  $4FA8  $BCE0  $4510  $BEF0  $8500
+$5F80  $54C0  $0371  $02AF  $02E5  $01DF  $02B0  $01D7
+$0358  $026A  $01D6  $011E  $012D  $00B1  $011F  $0059
+$01A0  $00E3  $0058  $0040  $0028  $0014  $8000  $8000
+
+-------------------------------------------------------------
+Delay time calculation:
+Choose delay time in range 0-$7f. rXXXX means register $1f80XXXX.
+
+r1dd4 = dt*64.5 - r1dc0
+r1dd6 = dt*32.5 - r1dc2
+
+r1dd8 = r1dda + dt*32.5
+r1de0 = r1de2 + dt*32.5
+r1df4 = r1df8 + dt*32.5
+r1df6 = r1dfa + dt*32.5
+
+--------------------------------------------------------------------------
+doomed@c64.org <- corrections/additions     latest update -> psx.rules.org
+--------------------------------------------------------------------------
+ 5/jun/1999     First posting. Far from completion.
+
+(thanx to ppl in <>)
+--------------------------------------------------------------------------
+thanx & hello to the usual.
+
diff --git a/misc/system.txt b/misc/system.txt
@@ -0,0 +1,865 @@
+--------------------------------------------------------------------------
+System Operation
+--------------------------------------------------------------------------
+
+Introduction
+--------------------------------------------------------------------------
+This text covers the usage of the R3000, the system control coprocessor and
+hardware registers, the file server and some system calls.
+
+--------------------------------------------------------------------------
+R3000
+--------------------------------------------------------------------------
+The heart of the psx is a MIPS R3000. The version in the PSX has two
+coproccors, (cop0 - System Control Coproccessor, cop2 - GTE), one
+multiplier/divider, 32 general registers, one ALU, one shifter, one
+address adder, 4kb of Instuction Cache, 1 kb of Data cache and NO floating
+point unit.
+
+Registers
+-------------------------------------------------------------
+All registers are 32 bits wide.
+
+ 0       zero     Constant, always 0
+ 1       at       Assembler temporary.
+ 2- 3    v0-v1    Subroutine return values
+ 4- 7    a0-a3    Subroutine arguments
+ 8-15    t0-t7    Temporaries, may be changed by subroutines
+16-23    s0-s7    Register variables, must be saved by subs.
+24-25    t8-t9    Temporaries, may be changed by subroutines
+26-27    k0-k1    Reserved for the kernel
+28       gp       Global pointer
+29       sp       Stack pointer
+30       fp(s8)   9th register variable, subs can use this as a frame
+                  pointer
+31       ra       Return address
+
+ -       pc       Program counter
+ -       hi,lo    Registers of the multiplier/divider.
+
+All registers behave the same, remarks are not hardware bound, but general
+programming good practice. Respect these for compatability, especially if
+you intend to use kernel routines.
+Exceptions are register 0, and 31. Zero will always return 0, regardless
+of any writing attempts. Ra is used by the normal jal instruction for the
+return address. (points to the second instruction after the jal). Note that
+the jalr instruction can use any register for the return address, though
+usually only register 31 is used.
+
+The PC is not really a register, and should not be seen like one. Hi, Lo
+are the registers which the multiplier/divider returns its results to.
+Special instructions are implemented to deal with them.
+
+-------------------------------------------------------------
+Instructions
+-------------------------------------------------------------
+rt       target register (cpu general register 0-31)
+rs       source register (cpu general register 0-31)
+rd       destination register (cpu general register 0-31)
+base     base register (cpu general register 0-31)
+imm      16 bit immediate
+b?       immediate value of ? bits wide.
+c0r      Coprocessor 0 register
+c2d      Coprocessor 2 (GTE) data register
+c2c      Coprocessor 2 (GTE) control register
+
+
+imm(base) means an address of the value in the register + the immediate
+         value.
+
+inst     instruction name.
+d        number of instructions to wait before using r1 (target reg).
+args     format of the operand fields.
+desc.    description of the instruction.
+
+
+inst  d  args        desc.
+
+*Load/Store instructions
+
+lb    1  rt,imm(base)  loads lowest byte of rt with addressed byte and
+                       extends sign.
+lbu   1  rt,imm(base)  loads lowest byte of rt with addressed byte.
+lh    1  rt,imm(base)  loads lowest halfword of rt with addressed halfword
+                        and extends sign.
+lhu   1  rt,imm(base)  loads lowest halfword of rt with addressed halfword.
+lw    1  rt,imm(base)  loads r1 with addressed word.
+lwl   0  rt,imm(base)  loads high order byte of rt with addressed byte and
+                       then loads up to the low order word boundary into rt.
+lwr   0  rt,imm(base)  loads low order byte of rt with addressed byte and
+                       then loads up to the high order word boundary into
+                       rt.
+
+                       There's no delay for lwl and lwr, so you can use them
+                       directly following eachother. fe. to load a word
+                       anywhere in memory without regard to alignment:
+                       lwl   a0,$0003(t0)
+                       lwr   a0,$0000(t0)
+
+sb    1  rt,imm(base)  stores lowest byte of rt in addressed byte.
+sh    1  rt,imm(base)  stores lowest halfword of rt in addressed halfword.
+sw    1  rt,imm(base)  stores rt in addressed word.
+swl   0  rt,imm(base)  unaligned store, see lwl
+swr   0  rt,imm(base)  unaligned store, see lwr
+
+lui   0  rt,imm        loads rt with immediate<<$10
+
+*arithmic instructions
+
+When an arithmic overflow occurs, rd will not be modified.
+
+add   0  rd,rs,rt      Adds rt to rs and stores the result in rd.
+addu  0  rd,rs,rt      Adds rt to rs, ignores arithmic overflow and stores
+                       result in rd.
+sub   0  rd,rs,rt      Substracts rt from rs and stores result in rd.
+subu  0  rd,rs,rt      Substracts rt from rs, ignores arithmic overflow and
+                       stores result in rd.
+
+addi  0  rd,rs,imm     Adds signextended immediate to rs, and stores the
+                       result in rd.
+addiu 0  rd,rs,imm     Adds signextended immediate to rs, ignores arithmic
+                       overflow and stores the result in rd.
+
+subi  0  rd,rs,imm     Substracts signextended immediate from rs and stores
+                       the result in rd.
+subiu 0  rd,rs,imm     Substracts signextended immediate from rs, ignores
+                       arithmic overflow, and stores the result in rd.
+
+mult     rs,rt         Multiplies rs with rt, and stores the 64 bit sign
+                       extended result in hi/lo.
+multu    rs,rt         Multiplies rs with rt, and stores the 64 bit result
+                       in hi/lo.
+div      rs,rt         Divides rs by rt, and stores the quotient into lo,
+                       and the remainder into high. Results are sign
+                       extended.
+divu     rs,rt         Divides rs by rt, and stores the quotient into lo,
+                       and the remainder into high.
+
+
+*logical instructions
+
+and   0  rd,rs,rt      Performs a bit wise AND between rs and rt, and
+                       stores the result in rd.
+or    0  rd,rs,rt      Performs a bit wise OR between rs and rt, and
+                       stores the result in rd.
+xor   0  rd,rs,rt      Performs a bit wise XOR between rs and rt, and
+                       stores the result in rd.
+nor   0  rd,rs,rt      Performs a bit wise NOR between rs and rt, and
+                       stores the result in rd.
+
+andi  0  rd,rs,imm     Performs a bit wise AND between rs and unsigned
+                       immediate and stores the result in rd.
+ori   0  rd,rs,imm     Performs a bit wise OR between rs and unsigned
+                       immediate and stores the result in rd.
+xori  0  rd,rs,imm     Performs a bit wise XOR between rs and unsigned
+                       immediate and stores the result in rd.
+
+*shifting instructions
+
+sllv  0  rd,rs,rt      Shifts rs rt bits to the left and stores the result
+                       in rd.
+srlv  0  rd,rs,rt      Shifts rs rt bits to the right and stores the result
+                       in rd.
+srav  0  rd,rs,rt      Shifts the value in rs rt bits to the right,
+                       preserving sign, and stores the value in rd.
+
+
+sll   0  rd,rs,b5      Shifts rs b5 bits to the left and stores the result
+                       in rd.
+srl   0  rd,rs,b5      Shifts rs b5 bits to the right and stores the result
+                       in rd.
+sra   0  rd,rs,b5      Shifts rs b5 bits to the right, preserving sign and
+                       stores the result in rd.
+
+*comparison instructions.
+
+slt   0  rd,rs,rt      rd=1 if rs < rt, else rd = 0
+sltu  0  rd,rs,rt      rd=1 if (unsigned)rs <(unsigned)rt, else rd = 0
+
+slti  0  rd,rs,imm     rd=1 if rs < imm, else rd = 0
+sltiu 0  rd,rs,imm     rd=1 if (unsigned)rs < (unsigned)imm, else rd = 0
+
+*jumps and branches
+
+Note the the instruction following the branch will always be executed.
+
+j        target        jumps to target
+jal      target        jumps to target and stores pc+8 into RA (second
+                       instruction after the jal instruction)
+
+jr       rd            jumps to address in rd
+jalr     (rt,) rd      jumps to address in rd and stores pc+8 into RA, or
+                       in rt.
+
+beq      rs,rt,imm     branches to imm if rs == rt
+bne      rs,rt,imm     branches to imm if rs != rt
+
+bgtz     rs,imm        branches to imm if rs > 0
+bltz     rs,imm        branches to imm if rs < 0
+blez     rs,imm        branches to imm if rs <= 0
+bgez     rs,imm        branches to imm if rs >= 0
+bltzal   rs,imm        branches to imm and stores pc+8 into RA if rs < 0
+bgezal   rs,imm        branches to imm rd and stores pc+8 into RA if rs >= 0
+
+*system instructions
+
+mfhi   2 rd            moves HI into rd
+mflo   2 rd            moves LO into rd
+mthi   2 rs            moves rs into HI
+mtlo   2 rs            moves rs into LO
+
+mtc0   2 rs,c0r        moves rs into cop0 register c0r
+mfc0   2 rd,c0r        moves cop0 register c0r into rd
+
+mtc2   2 rs,c2d        moves rs into cop2 data register c2d
+mfc2   2 rd,c2d        moves cop2 data register c2d into rd
+
+ctc2   2 rs,c2c        moves rs into cop2 control register c2d
+cfc2   2 rd,c2c        moves cop2 control register c2d into rd
+
+lwc2   1 c2d,imm(base) load cop2 data register with addressed word
+swc2   1 c2d,imm(base) stores cop2 data register at addressed word
+
+syscall  (b20)         generates a system call exception
+break    (b20)         generates a breakpoint exception
+                       the 20bits wide code field is not passed, but
+                       must be read from the instuction itself if you
+                       want to use it.
+
+cop2     b25           Coprocessor operation is started. b25 is
+                       passed as parameter.
+
+rfe                    restores the interrupt enable and kernel
+                       previlege bits.
+
+tlb instructions       see MIPS doc.
+
+--------------------------------------------------------------------------
+Cop0 - System control coprocessor
+--------------------------------------------------------------------------
+
+Registers:
+#    Name      rw Desciption.
+
+-------------------------------------------------------------
+16   ERREG
+-------------------------------------------------------------
+15   PRid      r  COP0 type and rev level
+bit |31                  16|15         8|7           0|
+desc|                      |Imp         |Rev          |
+
+Imp      3        CP0 type R3000A
+         7        IDT unique (3041) use REV to determine correct
+                  config.
+Rev               Revision level.
+-------------------------------------------------------------
+14   EPC       r  Return address from trap
+
+Contains the return address after an exception. This address is
+the instruction at which the exception took place, unless BD is
+set in CAUSE, when the instruction is EPC+4.
+-------------------------------------------------------------
+13   CAUSE     r  Describes the most recently recognised exception
+bit |31|30|29 28|27 26 25 24 23 22 21 20 19 18 17 16|
+desc|BD| 0|CE   |                                  0|
+bit |15 14 13 12 11 10 09 08|07|06 05 04 03 02|01 00|
+desc|Ip                     | 0|Excode        |    0|
+
+BD                Is set when last exception points to the
+                  branch instuction instead of the instruction
+                  in the branch delay slot, where the exception
+                  occurred.
+CE                Contains the coprocessor number if the exception
+                  occurred because of a coprocessor instuction for
+                  a coprocessor which wasn't enabled in SR.
+Ip                Interrupt pending field. Bit 8 and 9 are RW, and
+                  contain the last value written to them. As long
+                  as any of the bits are set they will cause an
+                  interrupt if the corresponding bit is set in IM.
+Excode            Describes what kind of exception occured:
+0        INT      Interrupt
+1        MOD      Tlb modification
+2        TLBL     Tlb load
+3        TLBS     Tlb store
+4        AdEL     Address error, load/I-fetch
+5        AdES     Address error, store
+                  The address errors occur when attempting to read
+                  outside of KUseg in user mode and when the address
+                  is misaligned.
+6        IBE      Bus error on Instruction fetch.
+7        DBE      Bus error on Data load.
+8        Syscall  Generated unconditionally by at syscall instruction
+9        BP       Breakpoint - break instruction.
+10       RI       Reserved instruction
+11       CpU      Coprocessor unusable
+12       Ov       Arithmic overflow
+-------------------------------------------------------------
+12   SR        rw System status register
+bit |31 |30 |29 |28 |27 26|25|24 23|22 |21|20|19|18|17 |16 |
+desc|CU3|CU2|CU1|CU0|    0|RE|    0|BEV|TS|PE|CM|PZ|SwC|IsC|
+
+bit |15 14 13 12 11 10 09 08|07 06|05 |04 |03 |02 |01 |00 |
+desc|Im                     |    0|KUo|IEo|KUp|IEp|KUc|IEc|
+
+CUx      0        Coprocessor x disabled
+         1        Coprocessor x enabled
+                  CU2 is for the GTE, CU1 is for the FPA, which is
+                  not available in the PSX.
+CU0      0        Cop0 in kernal mode.
+         1        Cop0 in user mode.
+                  Makes some nominally privileged instruction usable
+                  in user mode. Normal instructions are usable regardless
+                  of this bit's setting.
+RE       0        Normal 'endianness'
+         1        Reverse 'endianness'
+                  Reverses the byte order in which data is stored in
+                  memory. (lo-hi -> hi-lo)
+BEV      0        Boot exception vectors in RAM
+         1        Boot exception vectors in ROM (kseg1)
+TS                TLB shutdown. Gets set if a programm address simultaniously
+                  matches 2 TLB entries.
+PE                Cache parity error. Does not cause exception.
+CM                Shows the result of the last load operation with the D-cache
+                  isolated. It gets set if the cache really contained data
+                  for the addressed memory location.
+PZ                When set cache parity bits are written as 0.
+Isc      0        Do not isolate cache.
+         1        Isolate cache. All load and store operations are targetted
+                  to the Data cache, and never the main memory.
+Swc      0        Normal cache mode.
+         1        Swapped cache mode. I cache will act as D cache and vice
+                  versa. Use only with Isc to access & invalidate i cache
+                  entries
+Im                8 bit interrupt mask fields. When set the corresponding
+                  interrupts are allowed to cause an exception.
+KUc      0        User mode privilege     , rfe pops KUp here
+         1        Kernal mode privilege   
+IEc      0        Interrupts enabled      , rfe pops IUp here
+         1        All interrupts disabled.
+KUp               KUc gets pushed here on an exception, rfe pops KUo here
+IUp               IUc gets pushed here on an exception, rfe pops IUo here
+KUo               KUp gets pushed here on an exception
+IUo               IUp gets pushed here on an exception
+-------------------------------------------------------------
+11   BPCM      rw Execute breakpoint mask.
+
+Program counter is ANDed with this value and then compared to
+the value in BPC.
+-------------------------------------------------------------
+10   TLBHI/PID
+-------------------------------------------------------------
+9    BDAM      rw Data Access breakpoint mask.
+
+Data fetch address is ANDed with this value and then compared
+to the value in BDA
+-------------------------------------------------------------
+8    BadVaddr  r  Bad Virtual Address.
+
+Contains the address whose reference caused an exception. Set
+on any MMU type of exceptions, on references outside of kuseg
+and on any misaligned reference.
+-------------------------------------------------------------
+7    DCIC      rw Breakpoint control
+|1f 1e 1d 1c|1b|1a|19|18|17|16 15 14 13 12 11 10||0f      00|
+| 1  1  1  0| W| R|DA|PC| 1|                   0|          0|
+
+W        0
+         1        Break on Write
+R        0
+         1        Break on Read
+DA       0        Data access breakpoint disabled
+         1        Data access breakpoint enabled
+PC       0        Execution breakpoint disabled
+         1        Execution breakpoint enabled
+
+To use the Execution breakpoint, set PC. To use the Data access
+breakpoint set DA and either R, W or both. Both breakpoints
+can be used simultaniously. When a breakpoint occurs the PSX
+jumps to $00000040.
+-------------------------------------------------------------
+6    PIDMASK
+-------------------------------------------------------------
+5    BDA       rw Breakpoint on data access.
+
+Sets the breakpoint address for load/store operations
+-------------------------------------------------------------
+4    CTXT
+-------------------------------------------------------------
+3    BPC       rw Breakpoint on execute.
+
+Sets the breakpoint address to break on on execute.
+-------------------------------------------------------------
+2    TLBLO
+1    RAND
+0    INX
+
+For TLB details see mips doc.
+
+--------------------------------------------------------------------------
+PC file server
+--------------------------------------------------------------------------
+Caetla supports pcdrv: device, the SN systems device extension to access
+files on the drive of the pc. This fileserver can be accessed by using the
+kernel functions, with the 'pcdrv:' device name prefix to the filenames or
+using the SN system calls.
+
+-------------------------------------------------------------
+SN System calls
+-------------------------------------------------------------
+The following SN system calls for the fileserver are provided.
+Accessed by setting the registers and using the break command
+with the specified field.
+-------------------------------------------------------------
+PCInit   Inits the fileserver.
+break    $0101
+-------------------------------------------------------------
+PCCreat  Creates a new file on PC.
+break    $0102
+in:      a1       pointer to file name
+         a2       file attribute
+out:     v0       0 = success, -1 = failure
+         v1       file handle or error code if v0 is negative
+-------------------------------------------------------------
+PCOpen            Opens a file on the PC.
+break    $0103
+in:      a1       pointer to file name
+         a2       access mode 0     read only
+                              1     write only
+                              2     r/w
+out:     v0       0 = succes, -1 = failure
+         v1       file handle or error code if v0 is negative
+-------------------------------------------------------------
+PCClose           Closes a file on the PC.
+break    $0104
+in:      a1       file handle
+out:     v0       0 = succes, -1 = failure
+         v1       0 = succes, error code if v0 is negative
+-------------------------------------------------------------
+PCRead            Reads from an open file on PC.
+break    $0105
+in:      a1       file handle
+         a2       length in bytes
+         a3       pointer to store address
+out:     v0       0 = succes, -1 = failure
+         v1       number of read bytes or error code if v0 is
+                  negative.
+
+Note:    Does not stop at eof, so if you set more bytes to read
+         than the filelength, the fileserver will pad with zero
+         bytes. If you are not sure of the the filelength obtain
+         the filelength by PClSeek (a2 = 0, a3 = 2, v1 will return
+         the length of the file, don't forget to reset the file
+         pointer to the start before calling PCread!)
+-------------------------------------------------------------
+PCWrite           Writes to an open file on PC.
+break    $0106
+in:      a1       file handle
+         a2       length in bytes
+         a3       pointer to read address
+out:     v0       0 = succes, -1 = failure
+         v1       number of written bytes or error code if v0
+                  is negative.
+-------------------------------------------------------------
+PClSeek           Repositions the file pointer
+break    $0107
+in:      a1       file handle
+         a2       number of bytes to move.
+         a3       position from 0   Beginning of file
+                                1   Current pointer
+                                2   End of file
+out:     v0       0 = succes, -1 = failure
+         v1       file pointer
+-------------------------------------------------------------
+Attributes are passed as is. File attributes for the pc file
+system are like this:
+bit | 7  6| 5| 4| 3| 2| 1| 0|
+desc|    0| A| D| 0| S| H| R|
+
+A        Archive file
+D        Directory
+S        System file
+H        Hidden file
+R        Read only file
+-------------------------------------------------------------
+
+
+--------------------------------------------------------------------------
+System calls
+--------------------------------------------------------------------------
+Kernel system calls are accessed by loading the call number in t1, and
+jumping to the specifeed address.
+A0 call $3f means: load t1 with $3f and jump to $000000a0.
+
+-------------------------------------------------------------
+Printf            Print string to console.
+A0 call $3f
+in:      a0       Pointer to 0 terminated string.
+         a1-a3    Arguments.
+         sp+$10
+
+Prints the specified string to the console (ie. pc screen).
+String can contain standard C escape sequences and conversion
+characters, except the floating point types (%e, %f, %g).
+Variables are passed in a1 to a3. More variables are passed at
+sp+$10.
+-------------------------------------------------------------
+openevent         adds an event structure to the event table.
+B0 call $08
+in:      a0       Event class.
+         a1       Event spec.
+         a2       Event mode.
+         a3       Address of function to be executed when
+                  event occurs.
+out:     v0       Event descriptor, -1 if failed.
+
+Opens an event, should be called within a critical section.
+The return value is used to identify the event to the other
+even functions.
+A list of event classes, specs and modes is at the end of this
+section.
+-------------------------------------------------------------
+closeevent        releases an event structure from the
+B0 call $09       event table.
+in:      a0       Event descriptor.
+out:     v0       1 on success, 0 if failed.
+-------------------------------------------------------------
+enableevent       Turns on event handling for specified event.
+B0 call $0c
+in:      a0       Event descriptor.
+out:     v0       1 on success, 0 if failed.
+-------------------------------------------------------------
+disableevent      Turns off event handling for specified event.
+B0 call $0d
+in:      a0       Event descriptor.
+out:     v0       1 on success, 0 if failed.
+-------------------------------------------------------------
+open              Opens a file for IO.
+B0 call $32
+in:      a0       File name, terminated with 0
+         a1       Access mode
+out:     v0       File handle, or -1 if error.
+
+Opens a file on the target device for io. Access mode is set
+like this:
+
+bit 0    1 = Read
+    1    1 = Write
+    9    1 = New file
+   15    1 = Asynchronous mode?
+   16-31 Number of memory card blocks for a new file on the
+         memory card.
+
+The PSX can have a maximum of 16 files open at any time.
+-------------------------------------------------------------
+lseek             Move the file pointer.
+B0 call $33
+in:      a0       File handle
+         a1       Movement offset in bytes
+         a2       0 = from start of file
+                  1 = from current file pointer
+                  2 = Bugs. Should be from end of file.
+
+Moves the file pointer the number of bytes in a1, relative to
+the location specified by a2. Movement from the eof is incorrect.
+Also, movement beyond the end of the file is not checked.
+-------------------------------------------------------------
+read              Read data from an open file.
+B0 call $34
+in:      a0       File Handle
+         a1       Pointer to address to store read data
+         a2       Number of bytes to read
+out:     v0       Number of bytes actually read, -1 if failed.
+
+Reads the number of bytes from the specified open file. If length
+is not specified an error is returned. Read per $0080 bytes from
+memory card (bu:) and per $0800 from cdrom (cdrom:).
+-------------------------------------------------------------
+write             Write data to an open file.
+B0 call $35
+in:      a0       File handle
+         a1       Pointer to adress to read data from.
+         a2       Number of bytes to write.
+out:     v0       Number of bytes written.
+
+Writes the number of bytes to the specified open file. Write
+to the memory card per $0080 bytes. Writing to the cdrom returns 0.
+-------------------------------------------------------------
+close             Close an open file.
+B0 call $36
+in:      a0       File handle
+out:     v0       File hande if success, -1 if failed.
+-------------------------------------------------------------
+cd                Change the current directory on target device.
+B0 call $40
+in:      a0       Pointer to new directory path
+out:     v0       1 if success, 0 if failed.
+
+Changes the current directory on target system.
+-------------------------------------------------------------
+firstfile         Finds the first file to match the name.
+B0 call $42
+in:      a0       Pointer to the file name.
+         a1       Pointer to direntry structure.
+out:     v0       0 if unsuccessfull, else same as a1.
+
+Searches for the first file to match the name in the string
+pointed to by a0. Wildcards (?, *) may be used. Start the name
+with the device you want to address. (ie. pcdrv:) Different
+drives can be accessed as normally by their drive names (a:, c:)
+if path is omitted after the device, the current directory will
+be used.
+
+A direntry structure looks like this:
+
+$00 - $13     db  Filename, terminated with 0.
+$14           dw  File attribute
+$18           dw  File size
+$1c           dw  Pointer to next direntry
+$20 - $27     db  Reserved by system
+-------------------------------------------------------------
+nextfile          Searches for the next file to match the name.
+B0 call $43
+in:      a0       Pointer to direntry structure
+out:     v0       0 if unsuccesful, else same as a0.
+
+Uses the settings of a previous firstfile command.
+-------------------------------------------------------------
+rename            Rename a file on target device.
+B0 call $44
+in:      a0       Pointer to old file name
+         a1       Pointer to new file name
+out:     v0       1 if successful, 0 if failed.
+-------------------------------------------------------------
+delete            Delete a file on target device.
+B0 call $45
+in:      a0       Pointer to file name
+out:     v0       1 if successful, 0 if failed.
+-------------------------------------------------------------
+
+Event Classes
+
+The upper byte of each event type, is a descriptor byte, which
+identifies the type of event to kernal routines.
+
+Descriptors:
+$ff     Thread
+$f0     Hardware
+$f1     Event
+$f2     Root counter
+$f3     User event
+$f4     BIOS
+
+Hardware events:
+$f0000001    VBLANK
+$f0000002    GPU
+$f0000003    CDROM Decoder
+$f0000004    DMA controller
+$f0000005    RTC0
+$f0000006    RTC1
+$f0000007    RTC2
+$f0000008    Controller
+$f0000009    SPU
+$f000000a    PIO
+$f000000b    SIO
+$f0000010    Exception
+$f0000011    memory card
+$f0000012    memory card
+$f0000013    memory card
+
+Root counter events:
+$f2000000    counter 0 (pixel clock)
+$f2000001    counter 1 (horizontal retrace)
+$f2000002    counter 2 (one-eighth of system clock)
+$f2000003    counter 3 (vertical retrace)
+
+Bios events:
+$f4000001    memory card
+$f4000002    libmath
+
+Event Specs:
+$0001    counter becomes zero
+$0002    interrupted
+$0004    end of i/o
+$0008    file was closed
+$0010    command acknowledged
+$0020    command completed
+$0040    data ready
+$0080    data end
+$0100    time out
+$0200    unknown command
+$0400    end of read buffer
+$0800    end of write buffer
+$1000    general interrupt
+$2000    new device
+$4000    system call instruction
+$8000    error happned
+$8001    previous write error happned
+$0301    domain error in libmath
+$0302    range error in libmath
+
+Event modes:
+$1000    Handle on interrupt
+$2000    Do not handle on interrupt.
+
+--------------------------------------------------------------------------
+Root Counters
+--------------------------------------------------------------------------
+There are 4 root counters.
+
+Counter  Base address      Synced to
+0        $1f801100         pixelclock
+1        $1f801110         horizontal retrace
+2        $1f801120         1/8 system clock
+3                          vertical retrace
+
+Each have three registers, one with the current value, one with the counter
+mode, and one with a target value.
+
+-------------------------------------------------------------
+$11x0 Count  r
+bit |31                  16|15                   0|
+desc|Garbage               |Count                 |
+     
+Count    Current count value, 0-$ffff
+
+Upper word seems to contain only garbage.
+-------------------------------------------------------------
+$11x4 Mode   rw
+bit |31      10|9  |8  |7 |6  |5 |4  |3  | 2  1| 0|
+desc|Garbage   |Div|Clc|  |Iq2|  |Iq1|Tar|     |En|
+
+En       0        Counter running
+         1        Counter stopped (only counter 2)
+Tar      0        Count to $ffff
+         1        Count to value in target register
+Iq1               Set both for IRQ on target reached.
+Iq2
+Clc      0        System clock (it seems)
+         1        Pixel clock (counter 0)
+                  Horizontal retrace (counter 1)
+Div      0        System clock (it seems)
+         1        1/8 * System clock (counter 2)
+
+When Clc and Div of the counters are zero, they all run at the
+same speed. This speed seems to be about 8 times the normal
+speed of root counter 2, which is specified as 1/8 the system
+clock.
+
+Bits 10 to 31 seem to contain only garbage.
+-------------------------------------------------------------
+$11x8 Target rw
+bit |31                  16|15                       0|
+desc|Garbage?              |Target                    |
+
+Target   Target value, 0-$ffff
+
+Upper word seems to contain only garbage.
+-------------------------------------------------------------
+Quick step-by-step:
+
+To set up an interrupt using these counters you can do the following:
+1 - Reset the counter. (Mode = 0)
+2 - Set its target value, set mode.
+3 - Enable corresponding bit in the interrupt mask register ($1f801074)
+    bit 3 = Counter 3 (Vblank)
+    bit 4 = Counter 0 (System clock)
+    bit 5 = Counter 1 (Hor retrace)
+    bit 6 = Counter 2 (Pixel)
+4 - Open an event. (Openevent bios call - $b0, $08)
+    With following arguments:
+ a0-Rootcounter event descriptor or'd with the counter number.
+    ($f2000000 - counter 0, $f2000001 - counter 1,$f2000002 - counter 2,
+     $f2000003 - counter 3)
+ a1-Spec = $0002 - interrupt event.
+ a2-Mode = Interrupt handling ($1000)
+ a3-Pointer to your routine to be excuted.
+    The return value in V0 is the event identifier.
+
+5 - Enable the event, with the corresponding bioscall ($b0,$0c) with
+    the identifier as argument.
+
+6 - Make sure interrupts are enabled. (Bit 0 and bit 10 of the COP0 status
+    register must be set.)
+
+Your handler just has to restore the registers it uses, and it should
+terminate with a normal jr ra.
+
+To turn off the interrupt, first call disable event ($b0, $0d) and then
+close it using the Close event call ($b0,$09) both with the event number
+as argument.
+
+--------------------------------------------------------------------------
+DMA
+--------------------------------------------------------------------------
+
+-------------------------------------------------------------
+DPCR     Dma control register       $1f8010f0
+|1f 1c|1b 18|17 14|13 10|0f 0c|0b 08|07 04|03 00|
+|     |Dma6 |Dma5 |Dma4 |Dma3 |Dma2 |Dma1 |Dma0 |
+
+Each register has a 4 bit control block allocated in this
+register.
+Bit 3:   1= Dma Enabled
+    2:   ?
+    1:   ?
+    0:   ?
+
+Bit 3 must be set for a channel to operate.
+-------------------------------------------------------------
+DICR     Dma interrupt register     $1f8010f4
+
+-------------------------------------------------------------
+The DMA channel registers are located starting at $1f801080. The
+base adress for each channel is:
+$1f801080 DMA channel 0  MDECin
+$1f801090 DMA channel 1  MDECout
+$1f8010a0 DMA channel 2  GPU (lists + image data)
+$1f8010b0 DMA channel 3  CDrom
+$1f8010c0 DMA channel 4  SPU
+$1f8010d0 DMA channel 5  PIO
+$1f8010e0 DMA channel 6  OTC (reverse clear OT)
+
+-------------------------------------------------------------
+D_MADR           DMA base address.          $1f8010x0
+bit |1f                              00|
+desc|madr                              |
+
+madr     pointer to the adress the DMA will start reading
+         from/writing to
+-------------------------------------------------------------
+D_BCR            DMA block control          $1f8010x4
+bit |1f                   10|0f      00|
+desc|ba                     |bs        |
+
+ba       Amount of blocks
+bs       Blocksize (words)
+
+The channel will transfer ba blocks of bs words. Take care
+not to set the size larger than the buffer of the corresponding
+unit can hold. (GPU & SPU both have a $10 word buffer). A
+larger blocksize, means a faster transfer.
+-------------------------------------------------------------
+D_CHCR           DMA channel control        $1f8010x8
+bit |1f-19|18|17-0c|0b|0a|09|08|07 01|00|
+desc|    0|Tr|    0| 0|Li|Co| 0|    0|Dr|
+
+Tr       0        No DMA transfer busy.
+         1        Start DMA transfer/DMA transfer busy.
+Li       1        Transfer linked list. (GPU only)
+Co       1        Transfer continous stream of data.
+Dr       0        direction to memory
+         1        direction from memory
+-------------------------------------------------------------
+
+--------------------------------------------------------------------------
+doomed@c64.org <- corrections/additions     latest update -> psx.rules.org
+--------------------------------------------------------------------------
+16/may/1999       Initial version.
+19/may/1999       Added Breakpoint info. <Herozero>
+ 3/jun/1999       Root counters, some stuff on events and DMA added.
+
+(thanx to ppl in <>)
+--------------------------------------------------------------------------
+thanx & hello to the usual.
+
+
diff --git a/ntani.txt b/ntani.txt
@@ -0,0 +1,4 @@
+ηθελα να σου πω οτι οτι ειπα δεν ηταν αληθεια και δεν το εννουσα
+αλιμονο αν κατι τετοιο δεν το λαχταρουσα
+συγγνωμη μονο μπορω να πω και σε παρακαλω πιστεψε με μετανοω
+σε περιμενω την παρασκευη σαν τρελος να σε ειδω   
diff --git a/src/bios.c b/src/bios.c
@@ -7,12 +7,12 @@ BIOS*
 BIOS_new(const char* path)
 {
 	BIOS *b;
-    long pos;
-    FILE* f;
+	long pos;
+	FILE* f;
 
 	f = fopen(path, "rb");
 	if (f == NULL) 
-    {
+	{
 		perror("ERROR");
 		exit(EXIT_FAILURE);
 	}
@@ -22,12 +22,12 @@ BIOS_new(const char* path)
 	pos = ftell(f);
 	/* If not 512KB then exit */
 	if (pos != 512*1024) 
-    {
+	{
 		fprintf(stderr, "INVALID BIOS_SIZE\n"); exit(1);
 	}
 
 	fseek(f, 0, SEEK_SET);
-	
+
 	b = (BIOS*)malloc(sizeof(BIOS));
 	b->data = (unsigned char*)malloc(sizeof(unsigned char)*pos);
 	fread(b->data, 1, pos, f);
diff --git a/src/cdrom.c b/src/cdrom.c
@@ -1,10 +1,14 @@
 /* CDRom Drive */
-#include "cdrom.h"
-#include "util.h"
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 
+#include "cdrom.h"
+#include "util.h"
+#include "interconnect.h"
+
+extern Interconnect* inter;
+
 cdrom*
 cdrom_new(void)
 {
@@ -14,13 +18,13 @@ cdrom_new(void)
 }
 
 u8
-cdrom_fifo_is_empty(fifo fifo)
+cdrom_fifo_empty(fifo fifo)
 {
 	return fifo.write_idx == fifo.read_idx;
 }
 
 u8
-cdrom_fifo_is_full(fifo fifo)
+cdrom_fifo_full(fifo fifo)
 {
 	return fifo.write_idx == (fifo.read_idx ^ 0x10);
 }
@@ -34,46 +38,45 @@ cdrom_store8(cdrom* cd, u32 off, u8 val)
 void 
 cdrom_write(cdrom* cd, u32 offset, u8 val)
 {
-	//printf("**THE OFFSET IS %08X**\n", offset);
-	//printf("**THE VAL IS %d**\n", val);
-  u8 idx;
+	printf("CDROM_WRITE: THE OFFSET IS %08X\n", offset);
+	printf("CDROM_WRITE: THE VAL IS %d\n", val);
 
+	u8 idx;
 
 	idx = cd->status & STATUS_INDEX_MASK;
 
 	switch (offset) {
 	case 0:
-    cd->status = (cd->status & (~3)) | (val & 3);
+		cd->status = val & 3;
+		break;
+	case 1:
+		fprintf(stderr, "offset 1\n");
+		fprintf(stderr, "cd->status: %08X\n", cd->status);
+
+		cdrom_exec_cmd(cd, val);
+		break;
+	case 2:
+		cdrom_irq_write_mask(cd, val);
+		break;
+	case 3:
+		switch (idx) {
+		case 1:
+			cdrom_irq_ack(cd, val & 0x1f);
+
+			if ((val & 0x40) != 0)
+				memset(&cd->host_params, 0, sizeof(fifo));
+
+			break;
+		default:
+			fprintf(stderr, "cdrom_write: Unimplemented write on offset %08X and index %08X\n", offset, idx);
+			exit(EXIT_FAILURE);
+		}
 		break;
-  case 1:
-    fprintf(stderr, "offset 1\n");
-    break;
-  case 2:
-    fprintf(stderr, "offset 2\n");
-    break;
-  case 3:
-    switch (idx) {
-    case 1:
-      //self.irq_ack(shared, val & 0x1f);
-
-      //if val & 0x40 != 0 {
-      //  self.host_params.clear();
-      //}
-
-      //if val & 0xa0 != 0 {
-      //  panic!("Unhandled CDROM 3.1: {:02x}", val);
-      //}
-      break;
-    default:
-      fprintf(stderr, "cdrom_write: Unimplemented write on offset %08X and index %08X\n", offset, idx);
-      exit(EXIT_FAILURE);
-    }
-    break;
 	default:
 		break;
 	}
 
-  return;
+	return;
 }
 
 u8 
@@ -83,26 +86,66 @@ cdrom_load(cdrom* cd, u32 offset)
 
 	switch (offset) {
 	case 0:
-		return cd->status;
+		return cdrom_status(cd);
 	default:
 		break;
 	}
+
 	return 0;
 }
 
 void
-cdrom_command(cdrom* cd, u8 com)
+cdrom_exec_cmd(cdrom* cd, u8 cmd)
 {
 	// u32 ret;
 
-	switch ((cdrom_cmd)com) 
+	switch ((cdrom_cmd)cmd) 
 	{
 	case CDROM_CMD_GETSTAT: 
-		cd->status = (cd->status & STATUS_INDEX_MASK);
+		cd->status = cdrom_status(cd);
+		break;
+	case CDROM_CMD_TEST:
+		fprintf(stderr, "Not implemented TEST COMMAND");
 		break;
 	default: 
+		fprintf(stderr, "ERR: Unimplemented command -> %02X\n", cmd);
 		break;
 	}
 
 	return;
 }
+
+void
+cdrom_irq_ack(cdrom* cd, u8 val)
+{
+	// TODO
+	cd->irq_flags &= val;
+}
+
+void
+cdrom_irq_write_mask(cdrom* cd, u8 val)
+{
+	if ((val & 0x18) != 0) 
+		fprintf(stderr, "WARNING: Unhandled IRQ Mask: %02X\n", val);
+
+	cd->irq_mask = val & 0x1f;
+}
+
+u8
+cdrom_status(cdrom* cd)
+{
+	u8 S = cd->status;
+
+	S |= 0 << 2;
+	
+	S |= cdrom_fifo_empty(cd->host_params) << 3;
+	S |= !cdrom_fifo_full(cd->host_params) << 4;
+	S |= !cdrom_fifo_full(cd->host_response) << 5;
+
+	S |= (cd->rx_index < cd->rx_len) << 6;
+
+	// BLOCKING
+	S |= 0 << 7;/* cd-rom->sub_cpu.busy() << 7; */
+
+	return S;
+}
diff --git a/src/cdrom.h b/src/cdrom.h
@@ -1,5 +1,4 @@
 #pragma once
-
 #include "types.h"
 
 enum {
@@ -20,7 +19,7 @@ enum {
  cdrom_audio_channels = 2,
 };
 
-typedef struct fifo {
+typedef struct {
     // Data buffer
 	u8 buffer[16];
     // Write pointer (4bits + carry)
@@ -30,9 +29,11 @@ typedef struct fifo {
 } fifo;
 
 typedef enum {
-	CDROM_CMD_SYNC = 0X00,
-	CDROM_CMD_GETSTAT = 0X01,
-	CDROM_CMD_INIT = 0XA,
+	CDROM_CMD_SYNC = 0x00,
+	CDROM_CMD_GETSTAT = 0x01,
+	CDROM_CMD_SETLOC = 0x02,
+	CDROM_CMD_TEST = 0x19,
+	CDROM_CMD_INIT = 0xA,
 } cdrom_cmd;
 
 #define STATUS_INDEX_MASK    0x03
@@ -43,16 +44,32 @@ typedef enum {
 #define STATUS_DRQSTS_MASK   0x40
 #define STATUS_BUSYSTS_MASK  0x80
 
+typedef enum {
+	CDROM_IRQ_NOINTR,		/* No interrupt */
+	CDROM_IRQ_DATAREADY,	/* Data Read */
+	CDROM_IRQ_ACKNOWLEGE,	/* Command Complete */
+	CDROM_IRQ_COMPLETE,		/* Acknowledge */
+	CDROM_IRQ_DATAEND,		/* End of data detected */
+	CDROM_IRQ_DISKERROR	/* Error detected */
+} cdrom_irq_type;
+
+/* 4 memory-mapped registers. 
+   First of 'em has an index that dictates how the others behave.
+*/
 typedef struct cdrom {
 	u8 status; /* Status Register */
-    /// Command parameter FIFO
+
+	/* Command parameter FIFO */
 	fifo host_params;
 	fifo host_response;
 
-	u8 command;
+	cdrom_cmd command;
 	u8 irq_flags;
 	u8 irq_mask;
 
+	u8 rx_index;
+	u8 rx_len;
+
 	u8 sb[2340];
 	u32 SB_in;
 } cdrom;
@@ -68,9 +85,12 @@ u8 cdrom_load(cdrom*, u32);
 void cdrom_load8(cdrom*, u8);
 void cdrom_load16(cdrom*, u16);
 void cdrom_load32(cdrom*, u32);
+u8	 cdrom_status(cdrom*);
 
-u8 	 cdrom_fifo_is_empty(fifo);
-u8 	 cdrom_fifo_is_full(fifo);
+u8 	 cdrom_fifo_empty(fifo);
+u8 	 cdrom_fifo_full(fifo);
 u8 	 cdrom_fifo_push(void);
 
-void cdrom_command(cdrom*, u8);
+void cdrom_exec_cmd(cdrom*, u8);
+void cdrom_irq_ack(cdrom*, u8);
+void cdrom_irq_write_mask(cdrom*, u8);
diff --git a/src/interconnect.c b/src/interconnect.c
@@ -18,6 +18,7 @@ new_interconnect(void) {
   inter->dma = DMA_new();
   inter->gpu = GPU_new();
   inter->cdrom = cdrom_new();
+  inter->irq = irq_new();
   return inter;
 }
 
@@ -54,12 +55,11 @@ INTER_load8(Interconnect* inter, u32 addr)
     return 0;
   }
 
-//  contains = UTIL_contains(CDROM_START, CDROM_SIZE, abs_addr, &offset);
-//  if (contains)
-//  {
-//	cdrom_load(inter->cdrom, offset);
-//    return 0;
-//  }
+  contains = UTIL_contains(CDROM_START, CDROM_SIZE, abs_addr, &offset);
+  if (contains)
+  {
+	  return cdrom_load(inter->cdrom, offset);
+  }
 
   fprintf(stderr, "Unhandled Load8 At Address %08X\n", addr);
   exit(EXIT_FAILURE);
diff --git a/src/interconnect.h b/src/interconnect.h
@@ -5,6 +5,7 @@
 #include "gpu.h"
 #include "types.h"
 #include "cdrom.h"
+#include "irq.h"
 
 struct Interconnect {
 	BIOS*	 bios;
@@ -12,6 +13,7 @@ struct Interconnect {
 	DMA*	 dma;
 	GPU*	 gpu;
 	cdrom*	 cdrom;
+	irq		 irq;
 };
 
 typedef struct Interconnect Interconnect;
diff --git a/src/irq.c b/src/irq.c
@@ -7,7 +7,7 @@ irq_new(void)
 {
 	irq i;
 	i.status = 0;
-	i.mask = 0;
+	i.mask = 99;
 	return i;
 }
 
@@ -18,7 +18,10 @@ irq_write(irq* i, u32 a,  u32 v)
 
 	//printf("[IRQ] Write: 0x%08x 0x%08x --- PAD TEMP\n", A, V);
 
-	if(a & 4) i->mask = v; else i->status &= v;
+	if(a & 4) 
+		i->mask = v; 
+	else 
+		i->status &= v;
 
 	return;
 }
@@ -28,7 +31,7 @@ irq_load(irq* i, u32 a)
 {
 	u32 ret = 0;
 
-	if(a & 4)
+	if (a & 4)
 		ret = i->mask;
 	else
 		ret = i->status;
diff --git a/src/main.c b/src/main.c
@@ -1,8 +1,5 @@
 #include <stdlib.h>
 #include <SDL2/SDL.h>
-#include <lua.h>
-#include <lualib.h>
-#include <lauxlib.h>
 
 #include "cpu.h"
 #include "interconnect.h"
@@ -10,46 +7,64 @@
 #include "mem.h"
 #include "gpu.h"
 #include "sr.h"
+#include "MiniFB.h"
 
 SDL_Event ev;
 
+Interconnect *inter;
+
+int WINDOW_STATE;
+
+void 
+keyboard(struct mfb_window *window, mfb_key key, mfb_key_mod mod, bool isPressed) 
+{
+	if (key == KB_KEY_ESCAPE) 
+		exit(EXIT_FAILURE);
+	if (key == KB_KEY_A)
+		fprintf(stderr, "The value of OFFSET is: %08X", inter->cdrom->status & 3);
+}
+
 int
 main(int argc, char **argv)
 {
     int c;
     REN *ren;
     CPU *cpu;
-    Interconnect *inter;
 	//lua_State *L = luaL_newstate();
 	//luaL_openlibs(L);
 
-    SDL_Init(SDL_INIT_VIDEO);
+    //SDL_Init(SDL_INIT_VIDEO);
+    //SDL_SetRenderDrawColor(ren->renderer, 0xff, 0xff, 0xff, 0xff);
+    //SDL_RenderClear(ren->renderer);
+    //SDL_RenderPresent(ren->renderer);
+
     inter = new_interconnect();
     cpu = new_cpu(inter);
     ren = inter->gpu->ren;
-    SDL_SetRenderDrawColor(ren->renderer, 0xff, 0xff, 0xff, 0xff);
-    SDL_RenderClear(ren->renderer);
-    SDL_RenderPresent(ren->renderer);
+
+	mfb_set_keyboard_callback(inter->gpu->ren->window, keyboard);
 
     while(1) {
+    //while(mfb_wait_sync(inter->gpu->ren->window)) {
         /* Because it's too slow to run events every instr */
         for (c = 0; c < 1e5; c++) CPU_run_next_instruction(cpu);
 
-        while(SDL_PollEvent(&ev) != 0) {
-            switch(ev.type) {
-                case SDL_QUIT:
-                SDL_Quit();
-                exit(1);
-                case SDL_KEYDOWN:
-                if (ev.key.keysym.sym == SDLK_q) {
-                    SDL_Quit();
-                    exit(1);
-                } else if (ev.key.keysym.sym == SDLK_a) {
-					fprintf(stderr, "The data %d\n;", inter->cdrom->host_params.buffer[0]);
-                    break;
-                }
-            }
-        }
+		if (WINDOW_STATE < 0)
+			break;
+		
+
+        //while(SDL_PollEvent(&ev) != 0) {
+        //    switch(ev.type) {
+        //        case SDL_QUIT:
+		//			SDL_Quit();
+		//			exit(1);
+        //        case SDL_KEYDOWN:
+        //        if (ev.key.keysym.sym == SDLK_q) {
+        //            SDL_Quit();
+        //            exit(1);
+		//		}
+        //    }
+        //}
     }
 
     free(inter->bios->data);
@@ -64,7 +79,7 @@ main(int argc, char **argv)
 
 	//lua_close(L);
 
-    SDL_Quit();
+    //SDL_Quit();
 
     return 0;
 }
diff --git a/src/sr.c b/src/sr.c
@@ -9,70 +9,72 @@
 #include "sr.h"
 #include "defs.h"
 #include "util.h"
+#include "MiniFB.h"
 
 ivec2
 POSITION_from_gp0(u32 val)
 {
-    ivec2 pos;
-    pos.x = (i16)val;
-    pos.y = (i16)(val >> 16);
+	ivec2 pos;
+	pos.x = (i16)val;
+	pos.y = (i16)(val >> 16);
 
-    return pos;
+	return pos;
 }
 
 C
 COLOR_from_gp0(u32 val)
 {
-    C c;
-    c.r = (u8)val;
-    c.g = (u8)(val >> 8);
-    c.b = (u8)(val >> 16);
-    return c;
+	C c;
+	c.r = (u8)val;
+	c.g = (u8)(val >> 8);
+	c.b = (u8)(val >> 16);
+	return c;
 }
 
 void
 FB_flip_vert(u32 *data)
 {
-  u64 bytes_per_line; u32 *line; i32 half, j;
-
-  bytes_per_line = W;
-  line = (u32 *)malloc(bytes_per_line * sizeof(u32));
-  half = H>>1;
-
-  for (j=0; j<half; j++) {
-    u64 l1 = j*bytes_per_line;
-    u64 l2 = (H-1-j)*bytes_per_line;
-    memmove((void *)line,      (void *)(data+l1), bytes_per_line* sizeof(u32));
-    memmove((void *)(data+l1), (void *)(data+l2), bytes_per_line* sizeof(u32));
-    memmove((void *)(data+l2), (void *)line,      bytes_per_line* sizeof(u32));
-  }
-  free(line);
+	u64 bytes_per_line; u32 *line; i32 half, j;
+
+	bytes_per_line = W;
+	line = (u32 *)malloc(bytes_per_line * sizeof(u32));
+	half = H>>1;
+
+	for (j=0; j<half; j++) {
+		u64 l1 = j*bytes_per_line;
+		u64 l2 = (H-1-j)*bytes_per_line;
+		memmove((void *)line,      (void *)(data+l1), bytes_per_line* sizeof(u32));
+		memmove((void *)(data+l1), (void *)(data+l2), bytes_per_line* sizeof(u32));
+		memmove((void *)(data+l2), (void *)line,      bytes_per_line* sizeof(u32));
+	}
+	free(line);
 }
 
 C
 C_new(u32 b)
 {
-  C c;
-  c.r = (u8)(b & 0xff);
-  c.g = (u8)((b >> 8) & 0xff);
-  c.b = (u8)((b >> 16) & 0xff);
-  return c;
+	C c;
+	c.r = (u8)(b & 0xff);
+	c.g = (u8)((b >> 8) & 0xff);
+	c.b = (u8)((b >> 16) & 0xff);
+	return c;
 }
 
-void REN_FB_set(REN* ren, i32 x, i32 y, u8 r, u8 g, u8 b) {
+void 
+REN_FB_set(REN* ren, i32 x, i32 y, u8 r, u8 g, u8 b) {
 
-  u32 *fb;
+	u32 *fb;
 
-  if (!ren->fb || x < 0 || y < 0 || x >= W || y >= H) return;
+	if (!ren->fb || x < 0 || y < 0 || x >= W || y >= H) return;
 
-  // Clamp color values
-  r = r > 255 ? 255 : r;
-  g = g > 255 ? 255 : g;
-  b = b > 255 ? 255 : b;
+	// Clamp color values
+	r = r > 255 ? 255 : r;
+	g = g > 255 ? 255 : g;
+	b = b > 255 ? 255 : b;
 
-  // Direct write instead of memcpy
-  fb = ren->fb + (x + y * W);
-  *fb = r | (g << 8) | (b << 16);
+	// Direct write instead of memcpy
+	fb = ren->fb + (x + y * W);
+	*fb = r | (g << 8) | (b << 16);
 }
 
 //C*
@@ -87,9 +89,10 @@ REN_new(void)
 {
 	REN* ren;
 	ren = (REN*)malloc(sizeof(REN));
-	ren->window = SDL_CreateWindow("Ultimecia", 400 , 300, WIN_W, WIN_H, SDL_WINDOW_HIDDEN);
-	ren->renderer = SDL_CreateRenderer(ren->window, -1, 0);
-	ren->tex = SDL_CreateTexture(ren->renderer, SDL_PIXELFORMAT_RGB888, SDL_TEXTUREACCESS_STREAMING, W, H);
+	ren->window = mfb_open_ex("my display", 800, 600, WF_RESIZABLE | WF_ALWAYS_ON_TOP);
+	//ren->window = SDL_CreateWindow("Ultimecia", 400 , 300, WIN_W, WIN_H, SDL_WINDOW_HIDDEN);
+	//ren->renderer = SDL_CreateRenderer(ren->window, -1, 0);
+	//ren->tex = SDL_CreateTexture(ren->renderer, SDL_PIXELFORMAT_RGB888, SDL_TEXTUREACCESS_STREAMING, W, H);
 	ren->verts = (ivec2*)malloc(sizeof(ivec2) * 10000);  // Single allocation with larger size
 	ren->colors = (C*)malloc(sizeof(C) * 10000);         // Single allocation with larger size
 	ren->fb = (u32*)malloc(W*H*sizeof(u32));
@@ -161,7 +164,7 @@ REN_triangle(REN* ren, ivec2 verts[3], C colors[3])
 
 	for (i32 y = verts[0].y; y < verts[1].y; y++) {
 		draw_scanline(ren, y, xL >> 16, (C){(u8)(rL >> 16), (u8)(gL >> 16), (u8)(bL >> 16)},
-							  xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)});
+				xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)});
 		xL += dx01; rL += dr01; gL += dg01; bL += db01;
 		xR += dx02; rR += dr02; gR += dg02; bR += db02;
 	}
@@ -170,7 +173,7 @@ REN_triangle(REN* ren, ivec2 verts[3], C colors[3])
 	xL = verts[1].x << 16, rL = colors[1].r << 16, gL = colors[1].g << 16, bL = colors[1].b << 16;
 	for (i32 y = verts[1].y; y < verts[2].y; y++) {
 		draw_scanline(ren, y, xL >> 16, (C){(u8)(rL >> 16), (u8)(gL >> 16), (u8)(bL >> 16)},
-							  xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)});
+				xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)});
 		xL += dx12; rL += dr12; gL += dg12; bL += db12;
 		xR += dx02; rR += dr02; gR += dg02; bR += db02;
 	}
@@ -179,60 +182,61 @@ REN_triangle(REN* ren, ivec2 verts[3], C colors[3])
 void
 REN_push_triangle(REN* ren, ivec2 verts[3], C colors[3])
 {
-  u8 i;
+	u8 i;
 
-  REN_flush(ren);
+	REN_flush(ren);
 
-  for (i = 0; i < 3; i++) {
-    ren->verts[ren->nvertices] = verts[i];
-    ren->colors[ren->nvertices] = colors[i];
-    ren->nvertices++;
-  }
+	for (i = 0; i < 3; i++) {
+		ren->verts[ren->nvertices] = verts[i];
+		ren->colors[ren->nvertices] = colors[i];
+		ren->nvertices++;
+	}
 }
 
 void
 REN_push_quad(REN* ren, ivec2 verts[4], C colors[4])
 {
-  u8 i;
-
-  REN_flush(ren);
-
-  // First triangle: vertices 0,1,2
-  for (i = 0; i < 3; i++) {
-    ren->verts[ren->nvertices] = verts[i];
-    ren->colors[ren->nvertices] = colors[i];
-    ren->nvertices++;
-  }
-  for (i = 1; i < 4; i++) {
-    ren->verts[ren->nvertices] = verts[i];
-    ren->colors[ren->nvertices] = colors[i];
-    ren->nvertices++;
-  }
+	u8 i;
+
+	REN_flush(ren);
+
+	// First triangle: vertices 0,1,2
+	for (i = 0; i < 3; i++) {
+		ren->verts[ren->nvertices] = verts[i];
+		ren->colors[ren->nvertices] = colors[i];
+		ren->nvertices++;
+	}
+	for (i = 1; i < 4; i++) {
+		ren->verts[ren->nvertices] = verts[i];
+		ren->colors[ren->nvertices] = colors[i];
+		ren->nvertices++;
+	}
 }
 
 void
 REN_flush(REN* ren) {
-  u32 i;
-  for (i = 0; i < ren->nvertices; i += 3)
-      REN_triangle(ren, ren->verts + i, ren->colors + i);
-  ren->nvertices = 0;  // Reset buffer for next frame
+	u32 i;
+	for (i = 0; i < ren->nvertices; i += 3)
+		REN_triangle(ren, ren->verts + i, ren->colors + i);
+	ren->nvertices = 0;  // Reset buffer for next frame
 }
 
 void
 REN_draw(REN* ren)
 {
-  SDL_UpdateTexture(ren->tex, NULL, ren->fb, W * sizeof(u32));
-  SDL_RenderCopy(ren->renderer, ren->tex, NULL, NULL);
+	//SDL_UpdateTexture(ren->tex, NULL, ren->fb, W * sizeof(u32));
+	//SDL_RenderCopy(ren->renderer, ren->tex, NULL, NULL);
+	mfb_update_ex(ren->window, ren->fb, W , H);
 }
 
 void
 REN_display(REN* ren)
 {
-  // Flush any remaining vertices before displaying
-  if (ren->nvertices > 0) {
-    REN_flush(ren);
-  }
+	// Flush any remaining vertices before displaying
+	if (ren->nvertices > 0) {
+		REN_flush(ren);
+	}
 
-  REN_draw(ren);
-  SDL_RenderPresent(ren->renderer);
+	REN_draw(ren);
+	//SDL_RenderPresent(ren->renderer);
 }
diff --git a/src/sr.h b/src/sr.h
@@ -21,7 +21,8 @@ typedef struct { double x, y, z; } vec3f;
 enum mop {ADD, SUB, MUL, DIV};
 
 typedef struct _RENDERER {
-    SDL_Window* window;
+	struct mfb_window* window;
+    //SDL_Window* window;
     SDL_Texture* tex;
     SDL_Renderer* renderer;
     ivec2* verts;
diff --git a/src/time.c b/src/time.c
diff --git a/src/time.h b/src/time.h
diff --git a/test.cc b/test.cc
@@ -14,7 +14,6 @@ main()
   uint8_t value = 1;
   int wow = 3;
 
-
   cout << bitset<8>(status_bits) << endl;
   cout << bitset<8>(0x1f) << endl;
   cout << bitset<8>(~0x1f) << endl;

A	lib/include/MiniFB.h	\|	107	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lib/include/MiniFB_cpp.h	\|	186	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lib/include/MiniFB_enums.h	\|	186	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lib/include/MiniFB_ios.h	\|	7	+++++++
A	lib/libminifb.a	\|	0
M	makefile	\|	24	+++++++++++-------------
A	misc/cdrom_exploration.txt	\|	274	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	misc/gpu.txt	\|	1250	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	misc/gte.txt	\|	1000	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	misc/psx_documentation_project.pdf	\|	0
A	misc/spu.txt	\|	526	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	misc/system.txt	\|	865	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	ntani.txt	\|	4	++++
M	src/bios.c	\|	10	+++++-----
M	src/cdrom.c	\|	117	++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M	src/cdrom.h	\|	40	++++++++++++++++++++++++++++++----------
M	src/interconnect.c	\|	12	++++++------
M	src/interconnect.h	\|	2	++
M	src/irq.c	\|	9	++++++---
M	src/main.c	\|	63	+++++++++++++++++++++++++++++++++++++++------------------------
M	src/sr.c	\|	158	++++++++++++++++++++++++++++++++++++++++---------------------------------------
M	src/sr.h	\|	3	++-
A	src/time.c	\|	0
A	src/time.h	\|	0
M	test.cc	\|	1	-

	ultimecia A ps1 emulator in c
	Log \| Files \| Refs