ultimecia

A ps1 emulator in c
Log | Files | Refs

commit 65dcc262b854e3dad76cfcce9d8a9c041b43f888
parent f94ef545e4e4ef7002788d3bdab7e4c3c3f90cbe
Author: noone <vazkats@gmail.com>
Date:   Wed,  1 Oct 2025 19:03:29 +0300

Introduced minifb instead of SDL2 for now. (so simple to integrate..)
Progress with cdrom
Added some misc docs for preservation's sake

Diffstat:
Alib/include/MiniFB.h | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/MiniFB_cpp.h | 186+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/MiniFB_enums.h | 186+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/MiniFB_ios.h | 7+++++++
Alib/libminifb.a | 0
Mmakefile | 24+++++++++++-------------
Amisc/cdrom_exploration.txt | 274+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amisc/gpu.txt | 1250+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amisc/gte.txt | 1000+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amisc/psx_documentation_project.pdf | 0
Amisc/spu.txt | 526+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amisc/system.txt | 865+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Antani.txt | 4++++
Msrc/bios.c | 10+++++-----
Msrc/cdrom.c | 117++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Msrc/cdrom.h | 40++++++++++++++++++++++++++++++----------
Msrc/interconnect.c | 12++++++------
Msrc/interconnect.h | 2++
Msrc/irq.c | 9++++++---
Msrc/main.c | 63+++++++++++++++++++++++++++++++++++++++------------------------
Msrc/sr.c | 158++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/sr.h | 3++-
Asrc/time.c | 0
Asrc/time.h | 0
Mtest.cc | 1-
25 files changed, 4667 insertions(+), 177 deletions(-)

diff --git a/lib/include/MiniFB.h b/lib/include/MiniFB.h @@ -0,0 +1,107 @@ +#ifndef _MINIFB_H_ +#define _MINIFB_H_ + +#include "MiniFB_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#ifndef __ANDROID__ +#define MFB_RGB(r, g, b) (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b) +#define MFB_ARGB(a, r, g, b) (((uint32_t) a) << 24) | (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b) +#else + #ifdef HOST_WORDS_BIGENDIAN + #define MFB_RGB(r, g, b) (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b) + #define MFB_ARGB(a, r, g, b) (((uint32_t) a) << 24) | (((uint32_t) r) << 16) | (((uint32_t) g) << 8) | ((uint32_t) b) + #else + #define MFB_ARGB(r, g, b) (((uint32_t) a) << 24) | (((uint32_t) b) << 16) | (((uint32_t) g) << 8) | ((uint32_t) r) + #define MFB_RGB(r, g, b) (((uint32_t) b) << 16) | (((uint32_t) g) << 8) | ((uint32_t) r) + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Create a window that is used to display the buffer sent into the mfb_update function, returns 0 if fails +struct mfb_window * mfb_open(const char *title, unsigned width, unsigned height); +struct mfb_window * mfb_open_ex(const char *title, unsigned width, unsigned height, unsigned flags); + +// Update the display +// Input buffer is assumed to be a 32-bit buffer of the size given in the open call +// Will return a negative status if something went wrong or the user want to exit +// Also updates the window events +mfb_update_state mfb_update(struct mfb_window *window, void *buffer); + +mfb_update_state mfb_update_ex(struct mfb_window *window, void *buffer, unsigned width, unsigned height); + +// Only updates the window events +mfb_update_state mfb_update_events(struct mfb_window *window); + +// Close the window +void mfb_close(struct mfb_window *window); + +// Set user data +void mfb_set_user_data(struct mfb_window *window, void *user_data); +void * mfb_get_user_data(struct mfb_window *window); + +// Set viewport (useful when resize) +bool mfb_set_viewport(struct mfb_window *window, unsigned offset_x, unsigned offset_y, unsigned width, unsigned height); +// Let mfb to calculate the best fit from your framebuffer original size +bool mfb_set_viewport_best_fit(struct mfb_window *window, unsigned old_width, unsigned old_height); + +// DPI +// [Deprecated]: Probably a better name will be mfb_get_monitor_scale +void mfb_get_monitor_dpi(struct mfb_window *window, float *dpi_x, float *dpi_y); +// Use this instead +void mfb_get_monitor_scale(struct mfb_window *window, float *scale_x, float *scale_y); + +// Callbacks +void mfb_set_active_callback(struct mfb_window *window, mfb_active_func callback); +void mfb_set_resize_callback(struct mfb_window *window, mfb_resize_func callback); +void mfb_set_close_callback(struct mfb_window* window, mfb_close_func callback); +void mfb_set_keyboard_callback(struct mfb_window *window, mfb_keyboard_func callback); +void mfb_set_char_input_callback(struct mfb_window *window, mfb_char_input_func callback); +void mfb_set_mouse_button_callback(struct mfb_window *window, mfb_mouse_button_func callback); +void mfb_set_mouse_move_callback(struct mfb_window *window, mfb_mouse_move_func callback); +void mfb_set_mouse_scroll_callback(struct mfb_window *window, mfb_mouse_scroll_func callback); + +// Getters +const char * mfb_get_key_name(mfb_key key); + +bool mfb_is_window_active(struct mfb_window *window); +unsigned mfb_get_window_width(struct mfb_window *window); +unsigned mfb_get_window_height(struct mfb_window *window); +int mfb_get_mouse_x(struct mfb_window *window); // Last mouse pos X +int mfb_get_mouse_y(struct mfb_window *window); // Last mouse pos Y +float mfb_get_mouse_scroll_x(struct mfb_window *window); // Mouse wheel X as a sum. When you call this function it resets. +float mfb_get_mouse_scroll_y(struct mfb_window *window); // Mouse wheel Y as a sum. When you call this function it resets. +const uint8_t * mfb_get_mouse_button_buffer(struct mfb_window *window); // One byte for every button. Press (1), Release 0. (up to 8 buttons) +const uint8_t * mfb_get_key_buffer(struct mfb_window *window); // One byte for every key. Press (1), Release 0. + +// FPS +void mfb_set_target_fps(uint32_t fps); +unsigned mfb_get_target_fps(void); +bool mfb_wait_sync(struct mfb_window *window); + +// Timer +struct mfb_timer * mfb_timer_create(void); +void mfb_timer_destroy(struct mfb_timer *tmr); +void mfb_timer_reset(struct mfb_timer *tmr); +double mfb_timer_now(struct mfb_timer *tmr); +double mfb_timer_delta(struct mfb_timer *tmr); +double mfb_timer_get_frequency(void); +double mfb_timer_get_resolution(void); + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef __cplusplus +} + +#if !defined(MINIFB_AVOID_CPP_HEADERS) + #include "MiniFB_cpp.h" +#endif + +#endif + +#endif diff --git a/lib/include/MiniFB_cpp.h b/lib/include/MiniFB_cpp.h @@ -0,0 +1,186 @@ +#pragma once + +#if defined(__cplusplus) + +#include <functional> +#include "MiniFB.h" + +//------------------------------------- +// To be able to distinguish these C++ functions, using std::function, from C functions, using raw function pointers, we need to reverse params order. +// +// Note that FROM the compiler point of view +// mfb_set_XXX_callback(window, &my_c_func) +// and +// mfb_set_XXX_callback(window, [](...) {}) +// have the same parameters. +//------------------------------------- +void mfb_set_active_callback (std::function<void(struct mfb_window *, bool)> func, struct mfb_window *window); +void mfb_set_resize_callback (std::function<void(struct mfb_window *, int, int)> func, struct mfb_window *window); +void mfb_set_close_callback (std::function<bool(struct mfb_window *)> func, struct mfb_window *window); +void mfb_set_keyboard_callback (std::function<void(struct mfb_window *, mfb_key, mfb_key_mod, bool)> func, struct mfb_window *window); +void mfb_set_char_input_callback (std::function<void(struct mfb_window *, unsigned int)> func, struct mfb_window *window); +void mfb_set_mouse_button_callback(std::function<void(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)> func, struct mfb_window *window); +void mfb_set_mouse_move_callback (std::function<void(struct mfb_window *, int, int)> func, struct mfb_window *window); +void mfb_set_mouse_scroll_callback(std::function<void(struct mfb_window *, mfb_key_mod, float, float)> func, struct mfb_window *window); +//------------------------------------- + +//------------------------------------- +template <class T> +void mfb_set_active_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, bool)); + +template <class T> +void mfb_set_resize_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int)); + +template <class T> +void mfb_set_keyboard_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key, mfb_key_mod, bool)); + +template <class T> +void mfb_set_char_input_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, unsigned int)); + +template <class T> +void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)); + +template <class T> +void mfb_set_mouse_move_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int)); + +template <class T> +void mfb_set_mouse_scroll_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key_mod, float, float)); +//------------------------------------- + +//------------------------------------- +// To avoid clumsy hands +//------------------------------------- +class mfb_stub { + mfb_stub() : m_window(0x0) {} + + friend void mfb_set_active_callback (std::function<void(struct mfb_window *window, bool)> func, struct mfb_window *window); + friend void mfb_set_resize_callback (std::function<void(struct mfb_window *, int, int)> func, struct mfb_window *window); + friend void mfb_set_close_callback (std::function<bool(struct mfb_window *)> func, struct mfb_window *window); + friend void mfb_set_keyboard_callback (std::function<void(struct mfb_window *, mfb_key, mfb_key_mod, bool)> func, struct mfb_window *window); + friend void mfb_set_char_input_callback (std::function<void(struct mfb_window *, unsigned int)> func, struct mfb_window *window); + friend void mfb_set_mouse_button_callback(std::function<void(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)> func, struct mfb_window *window); + friend void mfb_set_mouse_move_callback (std::function<void(struct mfb_window *, int, int)> func, struct mfb_window *window); + friend void mfb_set_mouse_scroll_callback(std::function<void(struct mfb_window *, mfb_key_mod, float, float)> func, struct mfb_window *window); + + template <class T> + friend void mfb_set_active_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, bool)); + template <class T> + friend void mfb_set_resize_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int)); + template <class T> + friend void mfb_set_close_callback(struct mfb_window *window, T *obj, bool (T::*method)(struct mfb_window *)); + template <class T> + friend void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)); + template <class T> + friend void mfb_set_keyboard_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key, mfb_key_mod, bool)); + template <class T> + friend void mfb_set_char_input_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, unsigned int)); + template <class T> + friend void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_mouse_button, mfb_key_mod, bool)); + template <class T> + friend void mfb_set_mouse_move_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, int, int)); + template <class T> + friend void mfb_set_mouse_scroll_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *, mfb_key_mod, float, float)); + + static mfb_stub *GetInstance(struct mfb_window *window); + + static void active_stub(struct mfb_window *window, bool isActive); + static void resize_stub(struct mfb_window *window, int width, int height); + static bool close_stub(struct mfb_window *window); + static void keyboard_stub(struct mfb_window *window, mfb_key key, mfb_key_mod mod, bool isPressed); + static void char_input_stub(struct mfb_window *window, unsigned int); + static void mouse_btn_stub(struct mfb_window *window, mfb_mouse_button button, mfb_key_mod mod, bool isPressed); + static void mouse_move_stub(struct mfb_window *window, int x, int y); + static void scroll_stub(struct mfb_window *window, mfb_key_mod mod, float deltaX, float deltaY); + + struct mfb_window *m_window; + std::function<void(struct mfb_window *window, bool)> m_active; + std::function<void(struct mfb_window *window, int, int)> m_resize; + std::function<bool(struct mfb_window *window)> m_close; + std::function<void(struct mfb_window *window, mfb_key, mfb_key_mod, bool)> m_keyboard; + std::function<void(struct mfb_window *window, unsigned int)> m_char_input; + std::function<void(struct mfb_window *window, mfb_mouse_button, mfb_key_mod, bool)> m_mouse_btn; + std::function<void(struct mfb_window *window, int, int)> m_mouse_move; + std::function<void(struct mfb_window *window, mfb_key_mod, float, float)> m_scroll; +}; + +//------------------------------------- +template <class T> +inline void mfb_set_active_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, bool)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_active = std::bind(method, obj, _1, _2); + mfb_set_active_callback(window, mfb_stub::active_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_resize_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, int, int)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_resize = std::bind(method, obj, _1, _2, _3); + mfb_set_resize_callback(window, mfb_stub::resize_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_close_callback(struct mfb_window *window, T *obj, bool (T::*method)(struct mfb_window *window)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_close = std::bind(method, obj, _1); + mfb_set_close_callback(window, mfb_stub::close_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_keyboard_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, mfb_key, mfb_key_mod, bool)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_keyboard = std::bind(method, obj, _1, _2, _3, _4); + mfb_set_keyboard_callback(window, mfb_stub::keyboard_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_char_input_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, unsigned int)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_char_input = std::bind(method, obj, _1, _2); + mfb_set_char_input_callback(window, mfb_stub::char_input_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_mouse_button_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, mfb_mouse_button, mfb_key_mod, bool)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_mouse_btn = std::bind(method, obj, _1, _2, _3, _4); + mfb_set_mouse_button_callback(window, mfb_stub::mouse_btn_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_mouse_move_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, int, int)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_mouse_move = std::bind(method, obj, _1, _2, _3); + mfb_set_mouse_move_callback(window, mfb_stub::mouse_move_stub); +} + +//------------------------------------- +template <class T> +inline void mfb_set_mouse_scroll_callback(struct mfb_window *window, T *obj, void (T::*method)(struct mfb_window *window, mfb_key_mod, float, float)) { + using namespace std::placeholders; + + mfb_stub *stub = mfb_stub::GetInstance(window); + stub->m_scroll = std::bind(method, obj, _1, _2, _3, _4); + mfb_set_mouse_scroll_callback(window, mfb_stub::scroll_stub); +} + +#endif diff --git a/lib/include/MiniFB_enums.h b/lib/include/MiniFB_enums.h @@ -0,0 +1,186 @@ +#pragma once + +#include <stdint.h> +#include <stdbool.h> + +// Enums +typedef enum { + STATE_OK = 0, + STATE_EXIT = -1, + STATE_INVALID_WINDOW = -2, + STATE_INVALID_BUFFER = -3, + STATE_INTERNAL_ERROR = -4, +} mfb_update_state; + +typedef enum { + MOUSE_BTN_0, // No mouse button + MOUSE_BTN_1, + MOUSE_BTN_2, + MOUSE_BTN_3, + MOUSE_BTN_4, + MOUSE_BTN_5, + MOUSE_BTN_6, + MOUSE_BTN_7, +} mfb_mouse_button; +#define MOUSE_LEFT MOUSE_BTN_1 +#define MOUSE_RIGHT MOUSE_BTN_2 +#define MOUSE_MIDDLE MOUSE_BTN_3 + +typedef enum { + KB_KEY_UNKNOWN = -1, + + KB_KEY_SPACE = 32, + KB_KEY_APOSTROPHE = 39, + KB_KEY_COMMA = 44, + KB_KEY_MINUS = 45, + KB_KEY_PERIOD = 46, + KB_KEY_SLASH = 47, + KB_KEY_0 = 48, + KB_KEY_1 = 49, + KB_KEY_2 = 50, + KB_KEY_3 = 51, + KB_KEY_4 = 52, + KB_KEY_5 = 53, + KB_KEY_6 = 54, + KB_KEY_7 = 55, + KB_KEY_8 = 56, + KB_KEY_9 = 57, + KB_KEY_SEMICOLON = 59, + KB_KEY_EQUAL = 61, + KB_KEY_A = 65, + KB_KEY_B = 66, + KB_KEY_C = 67, + KB_KEY_D = 68, + KB_KEY_E = 69, + KB_KEY_F = 70, + KB_KEY_G = 71, + KB_KEY_H = 72, + KB_KEY_I = 73, + KB_KEY_J = 74, + KB_KEY_K = 75, + KB_KEY_L = 76, + KB_KEY_M = 77, + KB_KEY_N = 78, + KB_KEY_O = 79, + KB_KEY_P = 80, + KB_KEY_Q = 81, + KB_KEY_R = 82, + KB_KEY_S = 83, + KB_KEY_T = 84, + KB_KEY_U = 85, + KB_KEY_V = 86, + KB_KEY_W = 87, + KB_KEY_X = 88, + KB_KEY_Y = 89, + KB_KEY_Z = 90, + KB_KEY_LEFT_BRACKET = 91, + KB_KEY_BACKSLASH = 92, + KB_KEY_RIGHT_BRACKET = 93, + KB_KEY_GRAVE_ACCENT = 96, + KB_KEY_WORLD_1 = 161, + KB_KEY_WORLD_2 = 162, + + KB_KEY_ESCAPE = 256, + KB_KEY_ENTER = 257, + KB_KEY_TAB = 258, + KB_KEY_BACKSPACE = 259, + KB_KEY_INSERT = 260, + KB_KEY_DELETE = 261, + KB_KEY_RIGHT = 262, + KB_KEY_LEFT = 263, + KB_KEY_DOWN = 264, + KB_KEY_UP = 265, + KB_KEY_PAGE_UP = 266, + KB_KEY_PAGE_DOWN = 267, + KB_KEY_HOME = 268, + KB_KEY_END = 269, + KB_KEY_CAPS_LOCK = 280, + KB_KEY_SCROLL_LOCK = 281, + KB_KEY_NUM_LOCK = 282, + KB_KEY_PRINT_SCREEN = 283, + KB_KEY_PAUSE = 284, + KB_KEY_F1 = 290, + KB_KEY_F2 = 291, + KB_KEY_F3 = 292, + KB_KEY_F4 = 293, + KB_KEY_F5 = 294, + KB_KEY_F6 = 295, + KB_KEY_F7 = 296, + KB_KEY_F8 = 297, + KB_KEY_F9 = 298, + KB_KEY_F10 = 299, + KB_KEY_F11 = 300, + KB_KEY_F12 = 301, + KB_KEY_F13 = 302, + KB_KEY_F14 = 303, + KB_KEY_F15 = 304, + KB_KEY_F16 = 305, + KB_KEY_F17 = 306, + KB_KEY_F18 = 307, + KB_KEY_F19 = 308, + KB_KEY_F20 = 309, + KB_KEY_F21 = 310, + KB_KEY_F22 = 311, + KB_KEY_F23 = 312, + KB_KEY_F24 = 313, + KB_KEY_F25 = 314, + KB_KEY_KP_0 = 320, + KB_KEY_KP_1 = 321, + KB_KEY_KP_2 = 322, + KB_KEY_KP_3 = 323, + KB_KEY_KP_4 = 324, + KB_KEY_KP_5 = 325, + KB_KEY_KP_6 = 326, + KB_KEY_KP_7 = 327, + KB_KEY_KP_8 = 328, + KB_KEY_KP_9 = 329, + KB_KEY_KP_DECIMAL = 330, + KB_KEY_KP_DIVIDE = 331, + KB_KEY_KP_MULTIPLY = 332, + KB_KEY_KP_SUBTRACT = 333, + KB_KEY_KP_ADD = 334, + KB_KEY_KP_ENTER = 335, + KB_KEY_KP_EQUAL = 336, + KB_KEY_LEFT_SHIFT = 340, + KB_KEY_LEFT_CONTROL = 341, + KB_KEY_LEFT_ALT = 342, + KB_KEY_LEFT_SUPER = 343, + KB_KEY_RIGHT_SHIFT = 344, + KB_KEY_RIGHT_CONTROL = 345, + KB_KEY_RIGHT_ALT = 346, + KB_KEY_RIGHT_SUPER = 347, + KB_KEY_MENU = 348 +} mfb_key; +#define KB_KEY_LAST KB_KEY_MENU + +typedef enum { + KB_MOD_SHIFT = 0x0001, + KB_MOD_CONTROL = 0x0002, + KB_MOD_ALT = 0x0004, + KB_MOD_SUPER = 0x0008, + KB_MOD_CAPS_LOCK = 0x0010, + KB_MOD_NUM_LOCK = 0x0020 +} mfb_key_mod; + +typedef enum { + WF_RESIZABLE = 0x01, + WF_FULLSCREEN = 0x02, + WF_FULLSCREEN_DESKTOP = 0x04, + WF_BORDERLESS = 0x08, + WF_ALWAYS_ON_TOP = 0x10, +} mfb_window_flags; + +// Opaque pointer +struct mfb_window; +struct mfb_timer; + +// Event callbacks +typedef void(*mfb_active_func)(struct mfb_window *window, bool isActive); +typedef void(*mfb_resize_func)(struct mfb_window *window, int width, int height); +typedef bool(*mfb_close_func)(struct mfb_window* window); +typedef void(*mfb_keyboard_func)(struct mfb_window *window, mfb_key key, mfb_key_mod mod, bool isPressed); +typedef void(*mfb_char_input_func)(struct mfb_window *window, unsigned int code); +typedef void(*mfb_mouse_button_func)(struct mfb_window *window, mfb_mouse_button button, mfb_key_mod mod, bool isPressed); +typedef void(*mfb_mouse_move_func)(struct mfb_window *window, int x, int y); +typedef void(*mfb_mouse_scroll_func)(struct mfb_window *window, mfb_key_mod mod, float deltaX, float deltaY); + diff --git a/lib/include/MiniFB_ios.h b/lib/include/MiniFB_ios.h @@ -0,0 +1,7 @@ +#pragma once + +#include "MiniFB_enums.h" + +void user_implemented_init(struct mfb_window *window); + +void user_implemented_update(struct mfb_window *window); diff --git a/lib/libminifb.a b/lib/libminifb.a Binary files differ. diff --git a/makefile b/makefile @@ -1,28 +1,26 @@ -#Makefile for Ultimecia PSX Emulator +# Ultimecia PSX Emulator Makefile -CC := cc -CFLAGS := -Wall -Wpedantic -std=c99 -g -O3 -I/opt/homebrew/include/ -I/opt/homebrew/include/lua5.4 -LDFLAGS := -L/opt/homebrew/lib/ -LIBS := -lSDL2 -llua +CC := clang +CFLAGS := -Wall -Wpedantic -g -Ilib/include $(shell /opt/homebrew/bin/pkg-config --cflags sdl2 lua) +LDFLAGS := $(shell /opt/homebrew/bin/pkg-config --libs sdl2 lua) -framework Cocoa -framework Metal -framework MetalKit +LIBS := lib/libminifb.a SRC := $(wildcard src/*.c) OBJ := $(SRC:.c=.o) BIN := bin/ultimecia -JOBS := 10 all: $(BIN) $(BIN): $(OBJ) - @mkdir -p bin - $(CC) $(OBJ) -o $@ $(LDFLAGS) $(LIBS) + @mkdir -p bin + $(CC) $(OBJ) -o $@ $(LDFLAGS) $(LIBS) -# Pattern rule for compiling .c to .o +# Compile .c to .o %.o: %.c - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) -c $< -o $@ -# Clean clean: - rm -f src/*.o - rm -f $(BIN) + rm -f src/*.o + rm -f $(BIN) .PHONY: all clean diff --git a/misc/cdrom_exploration.txt b/misc/cdrom_exploration.txt @@ -0,0 +1,274 @@ +-------------------------------------------------------------------------- +Quick CDrom explanation... 2000/doomed + +There's a *LOT* missing here, and the other half might be incorrect, so +i won't take any responsibility for strange stuff happening. It should +give you some pointers in the right direction for your own CD explorations +though. More might follow at some later time.. +-------------------------------------------------------------------------- +CDREG0 = $1f801800 +CDREG1 = $1f801801 +CDREG2 = $1f801802 +CDREG3 = $1f801803 +-------------------------------------------------------------------------- +CDREG0 write : 0 - to send a command + 1 - to get the result + read : I/O status? + bit 0- 0 REG1 command send + - 1 REG1 data read + bit 1- 0 data transfer finished + 1 data transfer ready/in progress + bit 7- 1 command being processed. + +CDREG1 write : command + read : results + +CDREG2 write : send arguments + write : 7 = flush arg buffer? + +CDREG3 write : 7 = flush irq + read : hi nibble: ? + low nibble: interrupt status +-------------------------------------------------------------------------- +Modes for SetMode: +M_Speed bit 7 0: normal speed 1: double speed +M_Strsnd bit 6 0: ADPCM off 1: ADPCM on +M_Size bit 5 0: 2048 byte 1: 2340 byte +M_Size2 bit 4 0: - 1: 2328 byte +M_SF bit 3 0: Channel off 1: Channel on +M_Report bit 2 0: Report off 1: Report on +M_AutoPause bit 1 0: AutoPause off 1: AutoPause on +M_CDDA bit 0 0: CD-DA off 1: CD-DA on + +These modes can be set using the setmode command. +-------------------------------------------------------------------------- +Status bits: +Play bit 7 playing CD-DA +Seek bit 6 seeking +Read bit 5 reading data sectors +ShellOpen bit 4 once shell open +SeekError bit 3 seek error detected +Standby bit 2 spindle motor rotating +Error bit 1 command error detected + +These are the bit values for the status byte recieved from CD commands. +-------------------------------------------------------------------------- +Interrupt values: +NoIntr $00 No interrupt +DataReady $01 Data Ready +Acknowledge $02 Command Complete +Complete $03 Acknowledge +DataEnd $04 End of Data Detected +DiskError $05 Error Detected + +These are returned in the low nibble of CDREG3. First write a 1 to CDREG0 +before reading CDREG3. When a command is completed it returns 3. +To acknowledge an irq value after you've handled it, write a 1 to CDREG0 +then a 7 to both CDREG2 and CDREG3. Another interrupt may be queued, so +you should check CDREG3 again if 0 or if there's another interrupt to +be handled. +-------------------------------------------------------------------------- +Sync $00 - status +Nop $01 - status +Setloc $02 min,sec,sector status +Play $03 B - status +Forward $04 B - status +Backward $05 B - status +ReadN $06 B - status +Standby $07 B - status +Stop $08 B - status +Pause $09 B - status +Init $0a - status +Mute $0b - status +Demute $0c - status +Setfilter $0d file,channel status +Setmode $0e mode status +Getparam $0f - status,mode,file?,chan?,?,? +GetlocL $10 - min,sec,sector,mode,file,channel +GetlocP $11 - track,index,min,sec,frame,amin, + asec,aframe +GetTN $13 - status,first,total (BCD) +GetTD $14 track(BCD) status,min,sec (BCD) +SeekL $15 B * status +SeekP $16 B * status +Test $19 # depends on parameter +ID $1A B - success,flag1,flag2,00 + 4 letters of ID (SCEx) +ReadS $1B B - status +Reset $1C - status +ReadTOC $1E B? - status + +* These commands' targets are set using Setloc. +# Command 19 is really a portal to another set of commands. + +B means blocking. These commands return an immediate result saying the +command was started, but you need to wait for an IRQ in order to get +real results. + +Command descriptions: +00 Sync: Command does not succeed until all other commands complete. + This can be used for synchronization - hence the name. +01 Nop: Does nothing; use this if you just want the status. +02 Setloc: This command, with its parameters, sets the target for + commands with a * for their parameter list. +03 Play: Plays audio sectors from the last point seeked. This is + almost identical to CdlReadS, believe it or not. The main + difference is that this does not trigger a completed read + IRQ. CdlPlay may be used on data sectors. However, all + sectors from data tracks are treated as 00, so no sound is + played. As CdlPlay is reading, the audio data appears in + the sector buffer, but is not reliable. Game Shark + "enhancement CDs" for the 2.x and 3.x versions used this + to get around the PSX copy protection. +04 Forward: Seek to next track ? +05 Backward: Seek to beginning of current track, or previous track if + early in current track (like a CD player's back button) +06 ReadN: Read with retry. Each sector causes an IRQ (type 1) if + ModeRept is on (I think). ReadN and ReadS cause errors if + you're trying to read a non-PSX CD or audio CD without a + mod chip. +07 Standby: CD-ROM aborts all reads and playing, but continues + spinning. CD-ROM does not attempt to keep its place. +08 Stop: Stops motor. Official way to restart is 0A, but almost + any command will restart it. +09 Pause: Like Standby, except the point is to maintain the current + location within reasonable error. +0A Init: Multiple effects at once. Setmode = 00, Standby, abort + all commands. +0B Mute: Turn off CDDA stream to SPU. +0C Demute: Turn on CDDA streaming to SPU. +0D Setfilter: Automatic ADPCM (CD-ROM XA) filter ignores sectors except + those which have the same channel and file (parameters) + in their subheader area. This is the mechanism used to + select which of multiple songs in a single XA to play. + Setfilter does not affect actual reading (sector reads + still occur for all sectors). +0E Setmode: Sets parameters such as read mode and spin speed. See + chart above the command list. +0F Getparam: ??? returns status, mode, file, channel, ?, ? +10 GetlocL: Retrieves first 6 (8?) bytes of last read sector (header) + This is used to know where the sector came from, but is + generally pointless in 2340 byte read mode. All results + are in BCD ($12 is considered track twelve, not eighteen) + Command may execute concurrently with a read or play + (GetlocL returns results immediately). +11 GetlocP: Retrieves 8 of 12 bytes of sub-Q data for the last-read + sector. Same purpose as GetlocL, but more powerful, and + works while playing audio. All results are in BCD. + track: track number ($AA for lead-out area) + index: index number (INDEX lines in CUE sheets) + min: minute number within track + sec: second number within track + frame: sector number within "sec" (0 to 74) + amin: minute number on entire disk + asec: second number on entire disk + aframe: sector number within "asec" (0 to 74) +13 GetTN: Get first track number and number of tracks in the TOC. +14 GetTD: Gets start of specified track (does it return sector??) +15 SeekL: Seek to Setloc's location in data mode (can only seek to + data sectors, but is accurate to the sector) +16 SeekP: Seek to Setloc's location in audio mode (can seek to + any sector, but is only accurate to the second) +19 Test: This function has many subcommands that are completely + different. +1A ID: Returns copy protection status. StatError for invalid + data CD, StatStandby for valid PSX CD or audio CD. The + following bits I'm unsure about, but I think the 3rd + byte has $80 bit for "CD denied" and $10 bit for + "import". $80 = copy, $90 = denied import, $10 = + accepted import (Yaroze only). The 5th through 8th + bytes are the SCEx ASCII string from the CD. +1B ReadS: Read without automatic retry. +1C Reset: Same as opening and closing the drive door. +1E ReadTOC: Reread the Table of Contents without reset. + +----------------------------------------------------------------------- +-------------------------------------------------------------------------- +To send a command: + +- First send any arguments by writing 0 to CDREG0, then all arguments + sequentially to CDREG2 + +- Then write 0 to CDREG0, and the command to CDREG1. + +To wait for a command to complete: + +- Wait until a CDrom irq occurs (bit 3 of the interrupt regs) The cause + of the cdrom irq is in the low nibble of CDREG3. This is usually 3 + on a succesful comletion. Failure to complete the command will result + in a 5. If you don't wish to use irq's you can just check for the + low nibble of cdreg3 to become something other than 0, but make sure + it doesn't get cleared in any irq setup by the bios or some such. + +To Get the results + +- Write a 1 to CDREG0, then read CDREG0, If bit 5 is set, read a return + value from CDREG1, then read CDREG0 again repeat until bit 5 goes low. + +To Clear the irq + +- After command completion the irq cause should be cleared, do this by + writing a 1 to CDREG0 then 7 to CDREG2 and CDREG3. My guess is that + the write to CDREG2 clears the arguments previously set from some + buffer. + Note that irq's are queued, and if you clear the current, another may + come up directly.. +-------------------------------------------------------------------------- +To init the CD: + +-Flush all irq's +-CDREG0=0 +-CDREG3=0 +-Com_Delay=4901 ($1f801020) +-Send 2 NOP's +-Command $0a, no args. (<- what's this??) +-Demute +-------------------------------------------------------------------------- +To set up the cd for audio playback, some weird init stuff needs to be +done: + +CDREG0=2 +CDREG2=$80 +CDREG3=0 +CDREG0=3 +CDREG1=$80 +CDREG2=0 +CDREG3=$20 + +Also don't forget to init the SPU. (CDvol and CD enable especially) +-------------------------------------------------------------------------- +You should not send some commands while the CD is seeking. (ie. status +returns with bit 6 set.) Thing is that the status only gets updated after +a new command. I haven't tested this for other command, but for the +play command ($03) you can just keep repeating the command and checking +the status returned by that, for bit 6 to go low(and bit 7 to go high in +this case) If you don't and try to do a getloc directly after the play +command reports it's done, the cd will stop. (I guess the cd can't +get it's current location while it's seeking, so the logic stops the seek +to get an exact fix, but never restarts..) + + +----------------------------------------------------------------------- +19 subcommands. +----------------------------------------------------------------------- + +For one reason or another, there is a counter that counts the number of +SCEx strings received by the CD-ROM controller. + +Be aware that the results for these commands can exceed 8 bytes. + +04: Read SCEx counter (returned in 1st byte?) +05: Reset SCEx counter. This also sets 1A's SCEx response to + 00 00 00 00, but doesn't appear to force a protection failure. +20: Returns an ASCII string specifying where the CD-ROM firmware is + intended to be used ("for Japan", "for U/C"). +22: Returns a chip number inside the PSX in use. +23: Returns another chip number. +24: Returns yet another chip number. Same as 22's on some PSXs. + +-------------------------------------------------------------------------- +3/nov/1999 Initial version +3/feb/2000 Update. Big thanks to Barubary, who rewrote a large part. +-------------------------------------------------------------------------- +psx.padua.org www.padua.org doomed@c64.org +-------------------------------------------------------------------------- diff --git a/misc/gpu.txt b/misc/gpu.txt @@ -0,0 +1,1250 @@ +=========================================================================== +GPU information. +=========================================================================== +About this document. +--------------------------------------------------------------------------- +This document is a collection of all info on the GPU i could find and my +own notes. Most of this is the result of experiment, so not all info might +be correct. This document is most probably not complete, and not all +capabilities and quirks of the GPU are documented. No responsibility is +taken for anything that might occur using the information in this document. + +The K-communications text and the one by Nagra/Blackbag are the basis of +this document. + +Notations and conventions +When the format of data is given it's shown as a bitwise representation +like this: + +pixel| | +bit |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00| +desc.|S |Blue |Green |Red | + +The "pixel" row shows how large the data is in the frame buffer. Each mark +one this line denotes the size of the data in frame buffer pixels, as that +is the mininum size that kind be addressed. +The bit row shows which bits of the data are used, and separators are used +to show where the different elements of the data stop and start. MSB is on +the left, LSB is on the right. Stuff like |0f-08| means bit $0f to bit $08. +The desc. row shows the description of the different elements. With +separators where the element starts and ends. + +-------------------------------------------------------------------------- +The Graphics Processing Unit (GPU) - overview. +-------------------------------------------------------------------------- +The GPU is the unit responsible for the graphical output of the PSX. It +handles display and drawing of all graphics. It has the control over an 1MB +frame buffer and contains a 2Kb texture cache. It has a command and +data port. It has a 64 byte command FIFO buffer, which can hold up to +3 commands and is connected to a DMA channel for transfer of image data and +linked command lists and a DMA channel for reverse clearing an OT. + +--------------------------------------------------------------------------- +The Frame Buffer. +--------------------------------------------------------------------------- +The frame buffer is the memory which stores all grpahic data which the GPU +can access and manipulate, while drawing and displaying an image . The +memory is under the GPU and cannot be accessed by the CPU directly. It is +operated solely by the GPU. The frame buffer has a size of 1 MB and is +treated as a space of 1024 pixels wide and 512 pixels high. Each "pixel" +has the size of one word (16 bit). It is not treated linearly like usual +memory, but is accessed through coordinates, with an upperleft corner of +(0,0) and a lower right corner of (1023,511). + +When data is displayed from the frame buffer, a rectangular area is read +from the specified coordinate within this memory. The size of this area can +be chosen from several hardware defined types. Note that these hardware +sizes are only valid when the X and Y stop/start registers are at their +default values. This display area can be displayed in two color formats, +being 15bit direct and 24bit direct. The data format of one pixel is as +follows: + +15bitDirect display. + +pixel| | +bit |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00| +desc.|M |Blue |Green |Red | + +This means each color has a value of 0-31. The MSB of a pixel (M) is used +to mask the pixel. + +24bit Direct Display. + +The GPU can also be set to 24bit mode, in which case 3 bytes form one +pixel, 1 byte for each color. Data in this mode is arranged as follows: + +pixel|0 |1 |2 | +Bit |F-8|7-0|F-8|7-0|F-8|7-0| +desc.|G0 |R0 |R1 |B0 |B1 |G1 | + +Thus 2 display pixels are encoded in 3 frame buffer pixels. They are +displayed as follows: [R0,G0,B0] [R1,G1,B1] + +--------------------------------------------------------------------------- +Primitives. +--------------------------------------------------------------------------- +A basic firgure which the GPU can draw is called a primitive, and it can +draw the following: + +* Polygon + The GPU can draw 3 point and 4 point polygons. Each point of the polygon + specifies a point in the frame buffer. The polygon can be gouroud shaded. + The correct order of vertices for 4 point polygons is as follows: + + 1--2 Note: A 4 point polygon is processed internally as two 3 point + | | polygons. + 3--4 Note: When drawing a polygon the GPU will not draw the right + most and bottom edge. So a (0,0)-(32,32) rectangle will actually + be drawn as (0,0)-(31,31). Make sure adjoining polygons have the same + coordinates if you want them to touch eachother!. Haven't checked how this + works with 3 point polygons. + +* Polygon with texture +A primitive of this type is the same as above, except that a texture is +applied. Each vertex of the polygon maps to a point on a texture page in +the frame buffer. The polygon can be gouroud shaded. + +Note: Because a 4 point polygon is processed internally as two 3 point + polygons, texture mapping is also done independently for both halfs. + This has some annoying consequences. + +* Rectangle +A rectangle is defined by the location of the top left corner and its width +and height. Width and height can be either free, 8*8 or 16*16. It's drawn +much faster than a polygon, but gouroud shading is not possible. + +* Sprite +A sprite is a textured rectangle, defined as a rectangle with coordinates +on a texture page. Like the rectangle is drawn much faster than the polygon +equivalent. No gouroud shading possible. + +Note: Even though the primitive is called a sprite, it has nothing in + common with the traditional sprite, other than that it's a rectangular +piece of graphics. Unlike the psx sprite, the traditional sprite is NOT +drawn to the bitmap, but gets sent to the screen instead of the actual +graphics data at that location at display time. + +* Line +A line is a straight line between 2 specified points. The line can be +gouroud shaded. A special form is the polyline, for which an arbitrary +number of points can be specified. + +* Dot +The dot primitive draws one pixel at the specified coordinate and in the +specified color. It is actually a special form of rectangle, with a size +of 1*1. + +--------------------------------------------------------------------------- +Texture +--------------------------------------------------------------------------- +A texture is an image put on a polygon or sprite. It is necessary to +prepare the data beforehand in the frame buffer. This image is called a +texture pattern. The texture pattern is located on a texture page which +has a standard size and is located somewhere in the frame buffer, see +below. The data of a texture can be stored in 3 different modes: + +* 15bitDirect mode. + +bit |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00| +desc.|S |Blue |Green |Red | + +This means each color has a value of 0-31. The MSB of a pixel (S)is used +to specify it the pixel is semi transparent or not. More on that later. + + +* 8bit CLUT mode, + Each pixel is defined by 8bits and the value of the pixel is converted to + a 15bit color using the CLUT(color lookup table) much like standard vga + pictures. So in effect you have 256 colors which are in 15bit precision. + + Bit: |0F-08|07-00| + desc:|I1 |I0 | + + I0 is the index to the CLUT for the left pixel, I1 for the right. + +* 4bitCLUT mode, + Same as above except that only 16 colors can be used. Data is arranged as + follows: + + Bit |F-C|B-8|7-4|3-0| + desc. |I3 |I2 |I1 |I0 | + 0 is drawn to the left + + +* Texture Pages + +Texture pages have a unit size of 256*256 pixels, regardless of colormode. +This means that in the frame buffer they will be 64 pixels wide for 4bit +CLUT, 128 pixels wide for 8bit CLUT and 256 pixels wide for 15bit direct. +The pixels are addressed with coordinates relative to the location of the +texture page, not the framebuffer. So the topleft texture coordinate on +a texture page is (0,0) and the bottom right one is (255,255) + The pages can be located in the frame buffer on X multiples of 64 and Y +multiples of 256. More than one texture page can be set up, but each +primitive can only contain texture from one page. + +* Texture Windows +The area within a texture window is repeated throughout the texture +page. The data is not actually stored all over the texture page but +the GPU reads the repeated patterns as if they were there. The X and Y +and H and W must be multiples of 8. + +* CLUT (Color Lookup Table) +The clut is a the table where the colors are stored for the image data in +the CLUT modes. The pixels of those images are used as indexes to this +table. The clut is arranged in the frame buffer as a 256x1 image for the +8bit clut mode, and a 16x1 image for the 4bit clut mode. Each pixel as a 16 +bit value, the first 15 used of a 15 bit color, and the 16th used for +semitransparency. The clut data can be arranged in the frame buffer at X +multiples of 16 (X=0,16,32,48,etc) and anywhere in the Y range of 0-511. +More than one clut can be prepared but only one can be used for each +primitive. + +* Texture Caching + +If polygons with texture are displayed, the GPU needs to read these from +the frame buffer. This slows down the drawing process, and as a result +the number of polygons that can be drawn in a given timespan. To speed up +this process the GPU is equipped with a texture cache, so a given piece +of texture needs not to be read multiple times in succession. +The texture cache size depends on the color mode used for the textures. +In 4 bit CLUT mode it has a size of 64x64, in 8 bit CLUT it's 32x64 and in +15bitDirect is 32x32. A general speed up can be achieved by setting up +textures according to these sizes. For further speed gain a more precise +knowledge of how the cache works is necessary. + +- Cache blocks + +The texture page is divided into non-overlapping cache blocks, each of a +unit size according to color mode. These cache blocks are tiled within +the texture page. + ++-----+-----+-----+-- +|cache| | | +|block| | +| 0| 1 | 2 .. ++-----+-----+-- +| | | + +.. + +- Cache entries + +Each cache block is divided into 256 cache entries, which are numbered +sequentially, and are 8 bytes wide. So a cache entry holds 16 4bit clut +pixels 8 8bit clut pixels, or 4 15bitdirect pixels. + +4bit and 8bit clut: 15bitdirect: ++----+----+----+----+ +----+----+----+----+----+----+----+----+ +| 0| 1| 2| 3| | 0| 1| 2| 3| 4| 5| 6| 7| ++----+----+----+----+ +----+----+----+----+----+----+----+----+ +| 4| 5| 6| 7| | 8| 9| a| b| c| d| e| f| ++----+----+----+----+ +----+----+----+----+----+----+----+----+ +| 8| 9| .. | 10| 11| .. ++----+----+-- +----+----+-- +| c| ..| | 18| ..| ++----+-- +----+-- +| .. | .. + + +The cache can hold only one cache entry by the same number, so if f.e. a +piece of texture spans multiple cache blocks and it has data on entry 9 if +block 1, but also on entry 9 of block 2, these cannot be in the cache at +once. + + +--------------------------------------------------------------------------- +Rendering options. +--------------------------------------------------------------------------- +There are 3 modes which affect the way the GPU renders the primitives to +the frame buffer. + +* Semi Transparency +When semi transparency is set for a pixel, the GPU first reads the pixel it +wants to write to, and then calculates the color it will write from the 2 +pixels according to the semitransparency mode selected. Processing speed is +lower in this mode because additional reading and calculating are +necessary. There are 4 semitransparency modes in the GPU. + +B= the pixel read from the image in the frame buffer, F = the +halftransparent pixel + +* 0.5 x B + 0.5 x F +* 1.0 x B + 1.0 x F +* 1.0 x B - 1.0 x F +* 1.0 x B +0.25 x F + +A new semi transparency mode can be set for each primitive. For primitives +without texture semi transparency can be selected. For primitives with +texture semi transparency is stored in the MSB of each pixel, so some pixels +can be set to STP others can be drawn opaque. For the CLUT modes the STP bit +is obtained from the CLUT. So if a color index points to a color in the +CLUT with the MSB set, it will be drawn semi transparent. + +When the color is black(BGR=0), STP is processed different from when it's not +black (BGR<>0). The table below shows the differences: + + transparency proccessing (bit 1 of command packet) +BGR STP off on +0,0,0 0 Transparent Transparent +0,0,0 1 Non-transparent Non-Transparent +x,x,x 0 Non-Transparent Non-Transparent +x,x,x 1 Non-Transparent Transparent + +* Shading +The GPU has a shading function, which will scale the color of a primitive +to a specified brightness. There are 2 shading modes: Flat shading, and +gouraud shading. Flat shading is the mode in which one brightness value is +specified for the entire primitive. In Gouraud shading mode, a different +brightness value can be given for each vertex of a primitive, and the +brightness between these points is automatically interpolated. + +* Mask + +The mask function will prevent to GPU to write to specific pixels when +drawing in the framebuffer. This means that when the gpu is drawing a +primitive to a masked area, it will first read the pixel at the coordinate +it wants to write to, check if it's masking bit is set, and if so refrain +from writing to that particular pixel. The masking bit is the MSB of the +pixel, just like the STP bit. +To set this masking bit, the GPU provides a mask out mode, which will set +the MSB of any pixel it writes. If both mask out and mask evaluation are +on, the GPU will not draw to pixels with set MSB's, and will draw pixels +with set MSB's to the others, these in turn becoming masked pixels. + +--------------------------------------------------------------------------- +Drawing Environment +--------------------------------------------------------------------------- +The drawing environment specifies all global parameters the GPU needs for +drawing primitives. + +* Drawing offset. + This locates the top left corner of the drawing area. Coordinates of + primitives originate to this point. So if the drawing offset is (0,240) + and a vertex of a poligon is located at (16,20) it will be drawn to the + frame buffer at (0+16,240+20). + +* Drawing clip area + This specifies the maximum range the GPU draws primitives to. So in effect + it specifies the top left and bottom right corner of the drawing area. + +* Dither enable + When dither is enabled the GPU will dither areas during shading. It will + process internally in 24 bit and ditter the colors when converting back to + 15bit. When it is off, the lower 3 bits of each color simply get + discarded. + +* Draw to display enable. + This will enable/disable any drawing to the area that is currently + displayed. + +* Mask enable + When turned on any pixel drawn to the framebuffer by the GPU will have a + set masking bit. (= set MSB) + +* Mask judgement enable + Specifies if the mask data from the frame buffer is evaluated at the time + of drawing. + +--------------------------------------------------------------------------- +Display Environment. +--------------------------------------------------------------------------- +This contains all information about the display, and the area displayed. + +* Display area in frame buffer + This specifies the resolution of the display. The size can be set + as follows: + + Width: 256,320,384,512 or 640 pixels + Height: 240 or 480 pixels + + These sizes are only an indication on how many pixels will be displayed + using a default start end. These settings only specify the resolution of + the display. + +* Display start/end. + Specifies where the display area is positioned on the screen, and how + much data gets sent to the screen. The screen sizes of the display area + are valid only if the horizontal/vertical start/end values are default. By + changing these you can get bigger/smaller display screens. On most TV's + there is some black around the edge, which can be utilised by setting the + start of the screen earlier and the end later. The size of the pixels is + NOT changed with these settings, the GPU simply sends more data to the + screen. Some monitors/TVs have a smaller display area and the extended + size might not be visible on those sets.(Mine is capable of about 330 + pixels horizontal, and 272 vertical in 320*240 mode) + + +* Interlace enable + + When enabled the GPU will display the even and odd lines of the display + area alternately. It is necessary to set this when using 480 lines as the + number of scan lines on a TV screen are not sufficient to display 480 + lines. + +* 15bit/24bit direct display + Switches between 15bit/24bit display mode. + +* Video mode + Selects which video mode to use, which are either PAL or NTSC. + +-------------------------------------------------------------------------- +Communication and OT's. +-------------------------------------------------------------------------- +All data regarding drawing and drawing environment are sent as packets to +the GPU. Each packet tells the GPU how and where to draw one primitive, or +it sets one of the drawing environment parameters. The display environment +is set up through single word commands using the control port of the GPU. + +Packets can be forwarded word by word through the data port of the GPU, or +more efficiently for large numbers of packets through DMA. A special DMA +mode was created for this so large numbers of packets can be sent and +managed easily. In this mode a list of packets is sent, where each entry in +the list contains a header which is one word containing the address of the +next entry and the size of the packet and the packet itself. A result of +this is that the packets do not need to be stored sequentially. This makes +it possible to easily control the order in which packets get processed. The +GPU processes the packets it gets in the order they are offered. So the +first entry in the list also gets drawn first. To insert a packet into the +middle of the list simply find the packet after which you want it to be +processed, replace the address in that packet with the address of the new +packet, and let that point to the address you replaced. + +To aid you in finding a location in the list the Ordering Table was +invented. At first this is basically a linked list with entries of packet +size 0, so it's a list of only listentryheaders, where each entry points to +to the next entry. Then as primitives are generated by your program you can +then add them to the table at a certain index. Just read the address in the +table entry and replace it with the address of the new packet and store the +address from the table in the packet. When all packets are generated and +you want to draw, just pass the address of the first listentry to the DMA +and the packets will get drawn in the order you entered the packets to the +table. Packets entered at a higher table index will get drawn after those +entered at a lower table index. Packets entered at the same index will get +drawn in the order they were entered, the last one first. + +In 3d drawing it's most common that you want the primitives with the highest +Z value to be drawn first, so it would be nice if the table would be drawn +the other way around, so the Z value can be used as index. This is a simple +thing, just make a table of which each entry points to the previous entry, +and start the DMA with the address of the last table entry. To assist you +in making such a table, a special DMA channel is available which creates +it for you. + +-------------------------------------------------------------------------- +GPU operation +-------------------------------------------------------------------------- +* GPU control registers. +There are 2 32 bit io ports for the GPU, which are: + +$1f801810 GPU Data +$1f801814 GPU control/Status + +The data register is used to exchange data with the GPU. +The control/status register, gives the status of the GPU when read, and +sets the control bits when written to. + +* Control/Status Register $1f801814 + +Status (Read) +----------------------------------------------------------------------------- +|1f |1e 1d|1c |1b |1a |19 18|17 |16 |15 |14 |13 |12 11 |10 | +|lcf|dma |com|img|busy| ? ?|den|isinter|isrgb24|Video|Height|Width0|Width1| +----------------------------------------------------------------------------- + + W0 W1 +Width: 00 0 256 pixels + 01 0 320 + 10 0 512 + 11 0 640 + 00 1 384 +Height: 0 240 pixels + 1 480 +Video: 0 NTSC + 1 PAL +isrgb24: 0 15 bit direct mode + 1 24 bit direct mode +isinter: 0 Interlace off + 1 Interlace on +den: 0 Display enabled + 1 Display disabled +busy: 0 GPU is Busy (ie. drawing primitives) + 1 GPU is Idle +img: 0 Not Ready to send image (packet $c0) + 1 Ready +com: 0 Not Ready to recieve commands + 1 Ready +dma: 00 DMA off, communication through GP0 + 01 + 10 DMA CPU -> GPU + 11 DMA GPU -> CPU + + +lcf: 0 Drawing even lines in interlace mode + 1 Drawing uneven lines in interlace mode +---------------------------------------------------- +|0f 0e 0d|0c|0b|0a |09 |08 07|06 05|04|03 02 01 00| +| ? ? ?|me|md|dfe |dtd|tp |abr |ty|tx | +---------------------------------------------------- + +tx: 0 0 Texture page X = tx*64 + 1 64 + 2 128 + 3 196 + 4 ... +ty 0 0 Texture page Y + 1 256 +abr %00 0.5xB+0.5 xF Semi transparent state + %01 1.0xB+1.0 xF + %10 1.0xB-1.0 xF + %11 1.0xB+0.25xF +tp %00 4bit CLUT Texture page color mode + %01 8bit CLUT + %10 15bit +dtd 0 Ditter off + 1 Ditter on +dfe 0 Draw to display area prohibited + 1 Draw to display area allowed +md 0 off + 1 on Apply mask bit to drawn pixels. +me 0 off + 1 on No drawing to pixels with set mask bit. + +Control (Write) +-------------------------------------------------------------------------- +A control command is composed of one word as follows: + +bit 1f-18 17-0 + command parameter. + +The composition of the parameter is different for each command. + +-------------------------------------------------------------------------- +*Reset GPU +command $00 +parameter $000000 +Description Resets the GPU. Also seems to turn off screen. + (sets status to $14802000) +-------------------------------------------------------------------------- +*Reset Command Buffer +command $01 +parameter $000000 +Description Resets the command buffer. + +-------------------------------------------------------------------------- +*Reset IRQ +command $02 +parameter $000000 +Description Resets the IRQ. No idea of what this means. + +-------------------------------------------------------------------------- +*Display Enable +command $03 +parameter $000000 Display enable + $000001 Display disable +Description Turns on/off display. Note that a turned off + screen still gives the flicker of NTSC on a + pal screen if NTSC mode is selected.. +-------------------------------------------------------------------------- +*DMA setup. +command $04 +parameter $000000 DMA disabled + $000001 DMA ? + $000002 DMA CPU to GPU + $000003 DMA GPU to CPU +Description Sets dma direction. K-comm also mentions something + about parameter $01, but i wasn't able to translate. +-------------------------------------------------------------------------- +*Start of display area +command $05 +parameter bit $00-$09 X (0-1023) + bit $0A-$12 Y (0-512) + = Y<<10 + X +description Locates the top left corner of the display area. +-------------------------------------------------------------------------- +*Horizontal Display range +command $06 +parameter bit $00-$0b X1 ($1f4-$CDA) + bit $0c-$17 X2 + = X1+X2<<12 +description Specifies the horizontal range within which the + display area is displayed. The display is relative + to the display start, so X coordinate 0 will be at + the value in X1. The display end is not relative to + the display start. The number of pixels that get sent + to the screen in 320 mode are (X2-X1)/8. How many + actually are visible depends on your TV/monitor. + (normally $260-$c56) +-------------------------------------------------------------------------- +*Vertical Display range +command $07 +parameter bit $00-$09 Y1 + bit $0a-$14 Y2 + = Y1+Y2<<10 +description Specifies the vertical range within which the + display area is displayed. The display is relative + to the display start, so Y coordinate 0 will be at + the value in Y1. The display end is not relative to + the display start. The number of pixels that get sent + to the display are Y2-Y1, in 240 mode. + (Not sure about the default values, should be + something like NTSC $010-$100, PAL $023-$123) +-------------------------------------------------------------------------- +*Display mode +command $08 +parameter bit $00-$01 Width 0 + bit $02 Height + bit $03 Videomode See above + bit $04 Isrgb24 + bit $05 Isinter + bit $06 Width1 + bit $07 Reverseflag + +description Sets the display mode. +-------------------------------------------------------------------------- +*GPU Info +command $10 +parameter $000000 + $000001 + $000002 + $000003 Draw area top left + $000004 Draw area bottom right + $000005 Draw offset + $000006 + $000007 GPU Type, should return 2 for a standard GPU. + +description Returns requested info. Read result from GP0. + 0,1 seem to return draw area top left also + 6 seems to return draw offset too. + +-------------------------------------------------------------------------- +*Some other commands i do not know the function of: + +*????? +command $20 +parameter ??????? +description i've seen it used with value $000504 + what it does????? + +*????? +command $09 +parameter $000001 ?? +description I've seen it used with value $000001 + what it does????? + +-------------------------------------------------------------------------- +Command Packets, Data Register. +-------------------------------------------------------------------------- +Primitive command packets use an 8 bit command value which is present in +all packets. They contain a 3 bit type block and a 5 bit option block of +which the meaning of the bits depend on the type. Layout is as follows: + +Type: +000 GPU command +001 Polygon primitive +010 Line primitive +011 Sprite primitive +100 Transfer command +111 Environment command + +Configuration of the option blocks for the primitives is as follows: + +Polygon: +| 7 6 5 | 4 | 3 | 2 | 1 | 0 | +| 0 0 1 |IIP|3/4|Tme|Abe|Tge| + +Line: +| 7 6 5 | 4 | 3 | 2 | 1 | 0 | +| 0 1 0 |IIP|Pll| 0 |Abe| 0 | + +Sprite: +| 7 6 5 | 4 3 | 2 | 1 | 0 | +| 1 0 0 | Size |Tme|Abe| 0 | + + +IIP 0 Flat Shading + 1 Gouroud Shading +3/4 0 3 vertex polygon + 1 4 vertex polygon +Tme 0 Texture mapping off + 1 on +Abe 0 Semi transparency off + 1 on +Tge 0 Brightness calculation at time of texture mapping on + 1 off. (draw texture as is) +Size 00 Free size (Specified by W/H) + 01 1 x 1 + 10 8 x 8 + 11 16 x 16 +Pll 0 Single line (2 vertices) + 1 Polyline (n vertices) + +* Color information +Color information is forwarded as 24 bit data. It is parsed to +15 bit by the GPU. + +Layout as follows: + +17-10 $0f-$08 $07-$00 +Blue Green Red + +* Shading information. +For textured primitive shading data is forwarded by this packet. +Layout is the same as for color data, the RGB values controlling +the brightness of the individual colors ($00-$7f). A value of $80 in a +color will take the former value as data. + +*Texture Page information +The Data is 16 bit wide, layout is as follows: + +|F E D C B A 9|8 7|6 5|4 |3 2 1 0| +|0 |tp |abr|ty|tx | + +tx 0-f X*64 texture page x coord +ty 0 0 texture page y coord + 1 256 +abr 0 0.5xB+0.5 xF Semi transparency mode + 1 1.0xB+1.0 xF + 2 1.0xB-1.0 xF + 3 1.0xB+0.25xF +tp 0 4bit CLUT + 1 8bit CLUT + 2 15bit direct + +CLUT-ID +Specifies the location of the CLUT data. Data is 16bits. + +F-6 Y coordinate 0-511 +5-0 X coordinate X/16 + +-------------------------------------------------------------------------- +abbreviations in packet list +-------------------------------------------------------------------------- +BGR Color/Shading info see above. +xn,yn 16 bit values of X and Y in frame buffer. +un,vn 8 bit values of X and Y in texture page +tpage texture page information packet, see above +clut clut ID, see above. + +-------------------------------------------------------------------------- +Packet list. +-------------------------------------------------------------------------- +The packets sent to the GPU are processed as a group of data, +each one word wide. The data must be written to the GPU data register +($1f801810) sequentially. Once all data has been recieved, the GPU +starts operation. + +Overview of packet commands: + +Primitive drawing packets + $20 monochrome 3 point polygon + $24 textured 3 point polygon + $28 monchrome 4 point polygon + $2c textured 4 point polygon + $30 gradated 3 point polygon + $34 gradated textured 3 point polygon + $38 gradated 4 point polygon + $3c gradated textured 4 point polygon + $40 monochrome line + $48 monochrome polyline + $50 gradated line + $58 gradated line polyline + $60 rectangle + $64 sprite + $68 dot + $70 8*8 rectangle + $74 8*8 sprite + $78 16*16 rectangle + $7c 16*16 sprite +GPU command & Transfer packets + $01 clear cache + $02 frame buffer rectangle draw + $80 move image in frame buffer + $a0 send image to frame buffer + $c0 copy image from frame buffer +Draw mode/environment setting packets + $e1 draw mode setting + $e2 texture window setting + $e3 set drawing area top left + $e4 set drawing area bottom right + $e5 drawing offset + $e6 mask setting + +-------------------------------------------------------------------------- +Packet Descriptions +-------------------------------------------------------------------------- +Primitive Packets +-------------------------------------------------------------------------- +$20 monochrome 3 point polygon + + |1f-18|17-10|0f-08|07-00| +1|$20 |BGR |command+color +2|y0 |x0 |vertexes +3|y1 |x1 | +4|y2 |x2 | +-------------------------------------------------------------------------- +$24 textured 3 point polygon + |1f-18|17-10|0f-08|07-00| +1|$24 |BGR |command+color +2|y0 |x0 |vertex 0 +3|clut |v0 |u0 |clutid+ texture coords vertext 0 +4|y1 |x1 | +5|tpage |v1 |u1 | +6|y2 |x2 | +7| |v2 |u2 | +-------------------------------------------------------------------------- +$28 monchrome 4 point polygon + |1f-18|17-10|0f-08|07-00| +1|$28 |BGR |command+color +2|y0 |x0 |vertexes +3|y1 |x1 | +4|y2 |x2 | +5|y3 |x3 | +-------------------------------------------------------------------------- +$2c textured 4 point polygon + |1f-18|17-10|0f-08|07-00| +1|$2c |BGR |command+color +2|y0 |x0 |vertex 0 +3|clut |v0 |u0 |clutid+ texture coords vertext 0 +4|y1 |x1 | +5|tpage |v1 |u1 | +6|y2 |x2 | +7| |v2 |u2 | +8|y3 |x3 | +9| |v3 |u3 | +-------------------------------------------------------------------------- +$30 graduation 3 point polygon + |1f-18|17-10|0f-08|07-00| +1|$30 |BGR0 |command+color +2|y0 |x0 |vertexes +3| |BGR1 | +4|y1 |x1 | +5| |BGR2 | +6|y2 |x2 | +-------------------------------------------------------------------------- +$34 shaded textured 3 point polygon + |1f-18|17-10|0f-08|07-00| +1|$34 |BGR0 |command+color +2|y0 |x0 |vertex 0 +3|clut |v0 |u0 |clutid+ texture coords vertex 0 +4| |BGR1 | +5|y1 |x1 | +6|tpage |v1 |u1 | +7| |BGR2 | +8|y2 |x2 | +9| |v2 |u2 | +-------------------------------------------------------------------------- +$38 gradated 4 point polygon + |1f-18|17-10|0f-08|07-00| +1|$38 |BGR0 |command+color +2|y0 |x0 |vertexes +3| |BGR1 | +4|y1 |x1 | +5| |BGR2 | +6|y2 |x2 | +7| |BGR3 | +8|y3 |x3 | +-------------------------------------------------------------------------- +$3c shaded textured 4 point polygon + |1f-18|17-10|0f-08|07-00| +1|$3c |BGR0 |command+color +2|y0 |x0 |vertex 0 +3|clut |v0 |u0 |clutid+ texture coords vertex 0 +4| |BGR1 | +5|y1 |x1 | +6|tpage |v1 |u1 |texture page location +7| |BGR2 | +8|y2 |x2 | +9| |v2 |u2 | +a| |BGR3 | +b|y3 |x3 | +c| |v3 |u3 | +-------------------------------------------------------------------------- +$40 monochrome line + |1f-18|17-10|0f-08|07-00| +1|$40 |BGR |command+color +2|y0 |x0 |vertex 0 +3|y1 |x1 |vertex 1 +-------------------------------------------------------------------------- +$48 single color polyline + |1f-18|17-10|0f-08|07-00| +1|$48 |BGR |command+color +2|y0 |x0 |vertex 0 +3|y1 |x1 |vertex 1 +4|y2 |x2 |vertex 2 + +.|yn |xn |vertex n +.|$55555555 Temination code. + +Any number of points can be entered, end with termination code. +-------------------------------------------------------------------------- +$50 gradated line + |1f-18|17-10|0f-08|07-00| +1|$50 |BGR0 |command+color +2|y0 |x0 | +3| |BGR1 | +4|y1 |x1 | +-------------------------------------------------------------------------- +$58 gradated line polyline + |1f-18|17-10|0f-08|07-00| +1|$58 |BGR0 |command+color +2|y0 |x0 | +3| |BGR1 | +4|y1 |x1 | +5| |BGR2 | +6|y2 |x2 | + +.| |BGRn | +.|yn |xn | +.|$55555555 Temination code. +Any number of points can be entered, end with termination code. +-------------------------------------------------------------------------- +$60 rectangle + |1f-18|17-10|0f-08|07-00| +1|$60 |BGR |command+color +2|y |x | +3|h |w | +-------------------------------------------------------------------------- +$64 sprite + |1f-18|17-10|0f-08|07-00| +1|$64 |BGR |command+color +2|y |x | +3|clut |v |u |clut location, texture page y,x +4|h |w | +-------------------------------------------------------------------------- +$68 dot + |1f-18|17-10|0f-08|07-00| +1|$68 |BGR |command+color +2|y |x | +-------------------------------------------------------------------------- +$70 8*8 rectangle + |1f-18|17-10|0f-08|07-00| +1|$70 |BGR |command+color +2|y |x | +-------------------------------------------------------------------------- +$74 8*8 sprite + |1f-18|17-10|0f-08|07-00| +1|$74 |BGR |command+color +2|y |x | +3|clut |v |u |clut location, texture page y,x +-------------------------------------------------------------------------- +$78 16*16 rectangle + |1f-18|17-10|0f-08|07-00| +1|$78 |BGR |command+color +2|y |x | +-------------------------------------------------------------------------- +$7c 16*16 sprite + |1f-18|17-10|0f-08|07-00| +1|$7c |BGR |command+color +2|y |x | +3|clut |v |u |clut location, texture page y,x +-------------------------------------------------------------------------- +GPU command & Transfer packets +-------------------------------------------------------------------------- +$01 clear cache + |1f-18|17-10|0f-08|07-00| +1|$01 |0 |clear cache. + +Seems to be the same as the GP1 command. +-------------------------------------------------------------------------- +$02 frame buffer rectangle draw + |1f-18|17-10|0f-08|07-00| +1|$02 |BGR |command+color +2|Y |X |Topleft corner +3|H |W |Width & Height +Fills the area in the frame buffer with the value in RGB. This command +will draw without regard to drawing environment settings. Coordinates are +absolute frame buffer coordinates. Max width is $3ff, max height is $1ff. +-------------------------------------------------------------------------- +$80 move image in frame buffer + |1f-18|17-10|0f-08|07-00| +1|$02 | 0|command +2|sY |sX |Source coord. +3|dY |dX |Destination coord. +4|H |W |Height+Width of transfer +Copys data within framebuffer +-------------------------------------------------------------------------- +$01 $a0 send image to frame buffer + |1f-18|17-10|0f-08|07-00| + |$01 | |Reset command buffer (write to GP1 or GP0) +1|$A0 | | +2|Y |X |Destination coord. +3|H |W |Height+Width of transfer +4|pix1 |pix0 |image data +5.. +?|pixn |pixn-1 | +Transfers data from mainmemory to frame buffer +If the number of pixels to be sent is odd, an extra should be +sent. (32 bits per packet) +--------------------------------------------------------------------------- +$01 $c0 copy image from frame buffer + |1f-18|17-10|0f-08|07-00| + |$01 | |Reset command buffer (write to GP1 or GP0) +1|$C0 | | +2|Y |X |Destination coord. +3|H |W |Height+Width of transfer +4|pix1 |pix0 |image data (read from data port) +5.. +?|pixn |pixn-1 | +Transfers data from frame buffer to mainmemory. Wait for bit 27 +of the status register to be set before reading the image data. +When the number of pixels is odd, an extra pixel is read at the +end.(because on packet is 32 bits) +-------------------------------------------------------------------------- +Draw mode/environment setting packets +-------------------------------------------------------------------------- +Some of these packets can also be by primitive packets, in any +case it is the last packet of either that the GPU recieved +that is active. so if a primitive sets tpage info, it will over +write the existing data, even if it was sent by an $e? packet. +-------------------------------------------------------------------------- +$e1 draw mode setting + |1f-18|17-0b|0a |09 |08 07|06 05|04|03 02 01 00| +1|$e1 | |dfe|dtd|tp |abr |ty|tx | command +values + +see above for explanations + +It seems that bit $0b-$0d of the status reg can also be passed with this +command on some GPU's other than type 2. (ie. Command $10000007 doesn't +return 2) +-------------------------------------------------------------------------- +$e2 texture window setting + + |1F-18|17-14|13-0F|0E-0A|09-05|04-00| +1|$E2 |twy |twx |twh |tww | command + value + +twx Texture window X, (twx*8) +twy Texture window Y, (twy*8) +tww Texture window width, 256-(tww*8) +twh Texture window height, 256-(twh*8) +-------------------------------------------------------------------------- +$e3 set drawing area top left + |1f-18|17-14|13-0a|09-00| +1|$e3 | |Y |X | +sets the drawing area topleft corner. X&Y are absolute frame +buffer coords. +-------------------------------------------------------------------------- +$e4 set drawing area bottom right + |1f-18|17-14|13-0a|09-00| +1|$e4 | |Y |X | +sets the drawing area bottom right. X&Y are absolute frame +buffer coords. +-------------------------------------------------------------------------- +$e5 drawing offset + |1f-18|17-14|14-0b|0a-00| +1|$e5 | |OffsY|OffsX| +(offset Y = y << 11) +sets the drawing area offset within the drawing area. X&Y are +offsets in the frame buffer. +-------------------------------------------------------------------------- +$e6 mask setting + |1f-18|17-02|01 |00 | +1|$e6 | |Mask2|Mask1| + +Mask1 Set mask bit while drawing. 1 = on +Mask2 Do not draw to mask areas. 1= on + +While mask1 is on, the GPU will set the MSB of all pixels it draws. +While mask2 is on, the GPU will not write to pixels with set MSB's + +-------------------------------------------------------------------------- +DMA +-------------------------------------------------------------------------- +The GPU has two DMA channels allocated to it. DMA channel 2 is used to send +linked packet lists to the GPU and to transfer image data to and from the +frame buffer. DMA channel 6 is sets up an empty linked list, of which each +entry points to the previous (ie. reverse clear an OT.) +-------------------------------------------------------------------------- +D2_MADR DMA base address. $1f8010a0 +bit |1f 00| +desc|madr | + +madr pointer to the adress the DMA will start reading from/writing to +-------------------------------------------------------------------------- +D2_BCR DMA block control $1f8010a4 +bit |1f 10|0f 00| +desc|ba |bs | + +ba Amount of blocks +bs Blocksize (words) + +Sets up the DMA blocks. Once started the DMA will send ba blocks of bs +words. Don't set a blocksize larger then $10 words, as the command buffer +of the GPU is 64 bytes. +-------------------------------------------------------------------------- +D2_CHCR DMA channel control $1f8010a8 +bit |1f-19|18|17-0c|0b|0a|09|08|07 01|00| +desc| 0|Tr| 0| 0|Li|Co| 0| 0|Dr| + +Tr 0 No DMA transfer busy. + 1 Start DMA transfer/DMA transfer busy. +Li 1 Transfer linked list. +Co 1 Transfer continous stream of data. +Dr 0 direction to memory + 1 direction to GPU + +This configures the DMA channel. The DMA starts when bit 18 is set. DMA +is finished as soon as bit 18 is cleared again. To send or recieve data +to/from VRAM send the appriopriate GPU packets first ($a0/$c0) +-------------------------------------------------------------------------- +D6_MADR DMA base address. $1f8010e0 +bit |1f 00| +desc|madr | + +madr Last table entry. +-------------------------------------------------------------------------- +D6_BCR DMA block control $1f8010e4 +bit |1f 00| +desc|bc | + +bc Number of list entries. +-------------------------------------------------------------------------- +D6_CHCR DMA channel control $1f8010e8 +bit |1f-1d|1c|1b-19|18|17-02|01|00| +desc| 0|OT| 0|Tr| 0|Ot| 0| + +Tr 0 No DMA transfer busy. + 1 Start DMA transfer/DMA transfer busy. +Ot 1 Set to do an OT clear. + +When this register is set to $11000002, the DMA channel will create an +empty linked list of D6_BCR entries ending at the address in D6_MADR. Each +entry has a size of 0, and points to the previous. The first entry is +So if D6_MADR = $80100010, D6_BCR=$00000004, and the DMA is kicked this +will result in a list looking like this: +$80100000 $00ffffff +$80100004 $00100000 +$80100008 $00100004 +$8010000c $00100008 +$80100010 $0010000c +-------------------------------------------------------------------------- +DPCR Dma control register $1f8010f0 +|1f 1c|1b 18|17 14|13 10|0f 0c|0b 08|07 04|03 00| +| |Dma6 |Dma5 |Dma4 |Dma3 |Dma2 |Dma1 |Dma0 | + +Each register has a 4 bit control block allocated in this +register. +Bit 3: 1= Dma Enabled + 2: ? + 1: ? + 0: ? + +Bit 3 must be set for a channel to operate. + +-------------------------------------------------------------------------- +Common GPU functions, step by step. +-------------------------------------------------------------------------- +* Initializing the GPU. + +First thing to do when using the GPU is to initialize it. To do that take +the following steps: + +1 - Reset the GPU (GP1 command $00). This turns off the display aswell. +2 - Set horizontal and vertical start/end. (GP1 command $06, $07) +3 - Set display mode. (GP1 command $08) +4 - Set display offset. (GP1 command $05) +5 - Set draw mode. (GP0 command $e1) +6 - Set draw area. (GP0 command $e3, $e4) +7 - Set draw offset. (GP0 command $e5) +8 - Enable display. + +* Sending a linked list. + +The normal way to send large numbers of primitives is by using a linked +list dma transfer. This list is built up of entries of which each points to +the next. One entry looks like this: + + dw $nnYYYYYY ; nn = the number of words in the list entry + ; YYYYYY = address of next list entry & $00ffffff + +1 dw .. ; here goes the primitive. +2 dw .. ; +. dw .. ; +nn-1 dw .. ; +nn dw .. ; + +The last entry in the list should have $ffffff as pointer, which is the +terminator. As soon as this value is found DMA is ended. If the entry +size is set to 0, no data will be transferred to the GPU and the next +entry is processed. + +To send the list do this: +1 - Wait for the GPU to be ready to recieve commands. (bit $1c == 1) +2 - Enable DMA channel 2 +3 - Set GPU to DMA cpu->gpu mode. ($04000002) +3 - Set D2_MADR to the start of the list +4 - Set D2_BCR to zero. +5 - Set D2_CHCR to link mode, mem->GPU and dma enable. ($01000401) + +* Uploading Image data through DMA. + +To upload an image to VRAM take the following steps: + +1 - Wait for the GPU to be idle and DMA to finish. Enable DMA channel 2 + if necessary. +2 - Send the 'Send image to VRAM' primitive. (You can send this through + dma if you want. Use the linked list method described above) +3 - Set DMA to CPU->GPU ($04000002) (if you didn't do so already in the + previous step) +4 - Set D2_MADR to the start of the list +5 - Set D2_BCR with : bits 31-16 = Number of words to send (H*W /2) + bits 15- 0 = Block size of 1 word. ($01) + if H*W is odd, add 1. (Pixels are 2 bytes, send + an extra blank pixel in case of an odd amount) +6 - Set D2_CHCR to continuous mode, mem -> GPU and dma enable. ($01000201) + +Note that H, W, X and Y are always in frame buffer pixels, even if you send +image data in other formats. +You can use bigger block sizes if you need more speed. If the number of +words to be sent is not a multiple of the blocksize, you'll have to send +the remainder seperately, because the GPU only accepts an extra halfword +if the number of pixels is odd. (ie. of the last word sent, only the low +half word is used.) Also take care not to use blocksizes bigger than $10, as +the buffer of the GPU is only 64 bytes (=$10 words). + +* Waiting to send commands + +You can send new commands as soon as DMA has ceased and the GPU is ready. +1 - Wait for bit $18 to become 0 in D2_CHCR +2 - Wait for bit $1c to become 1 in GP1. + +* Vsync + +Step by step for a VSYNC counter coming up (not)soon. + +Meanwhile you can init the pad driver and as soon as you want to +check for VSYNC, fill the return buffer with 0 and wait for it to change. +The pad driver checks the pads every VSYNC. Check the greentro source for +an example. + +-------------------------------------------------------------------------- +Missing info. +-------------------------------------------------------------------------- +There's still a lot yet uncovered, so if you have/know anything that's not +in here please mail it to me. Things i'm looking for particularly are +info on the differences between the various versions and revisions of the +GPU, and something about drawing speeds and other timing. + +-------------------------------------------------------------------------- +History: +-------------------------------------------------------------------------- +23/apr/1999 First public release. +28/apr/1999 Some bugfixes and rewrites. + Info on texture pages corrected. <Silpheed> + 8/may/1999 Detailed packet composition. +20/may/1999 DMA & Step by steps added. +25/jun/1999 More DMA, OT and lists. +30/aug/1999 Correction. ($03) +-------------------------------------------------------------------------- +Maintained by doomed/padua. Any errors, additions -> <doomed@c64.org> +-------------------------------------------------------------------------- +--== http://psx.rules.org/ ==-- +--== http://www.padua.org/ ==-- +-------------------------------------------------------------------------- +Thanx & Hello to: +Silpheed Groepaz Brainwalker & Hitmen, Antiloop Middy Danzig & Napalm, +K-Communications, Blackbag, TDJ Sander & Focus, Burglar LCF & SCS*TRC, +Deekay & Crest, Graham NO-XS & Oxyron, MrAlpha Fungus & F4CG, Zealot & +Wrath Design, Shape, Naphalm Jazzcat & Onslaught, Reyn Ouwehand, WHW & WOW, +all active people on PSX and C64, #psxdev, #c-64. +-------------------------------------------------------------------------- diff --git a/misc/gte.txt b/misc/gte.txt @@ -0,0 +1,999 @@ +========================================================================== +GTE.txt - Documentation & Explanation. +========================================================================== + +Disclaimer. +-------------------------------------------------------------------------- +This document is a collection of all info on the GTE i could find and my +own notes. Most of this is the result of experiment, so not all info might +be correct. This document is most probably not complete, and not all +capabilities and quirks of the GTE are documented. No responsibility is +taken for anything that might occur using the information in this document. + +-------------------------------------------------------------------------- +Introduction. +-------------------------------------------------------------------------- +The Geometry Transformation Engine (GTE) is the heart of all 3d +calculations on the psx. The GTE has specialised functions for perspective +transformations, light sourcing and the like, and is much faster than the +CPU on these operations. It is mounted as the second coprocessor and as +such has no physical address in the memory of the psx. All control is done +through special instructions. + +-------------------------------------------------------------------------- +Basic mathematics +-------------------------------------------------------------------------- +The GTE is basicly an engine for vector mathematics, so some knowledge +of that area is vital for correct usage of the GTE. I will not delve to +deeply in that area, as that's beyond the scope of this text, but i'll +introduce some concepts. + +The basic representation of a point(vertex) in 3d space is through a vector +of the sort [X,Y,Z]. In GTE operation there's basicly two kinds of these, +vectors of variable length and vectors of a unit length of 1.0, called +normal vectors. The first is used to decribe a locations and translations +in 3d space, the second to describe a direction. + +Rotation of vertices is performed by multiplying the vector of the vertex +with a rotation matrix. The rotation matrix is a 3x3 matrix consisting of +3 normal vectors which are orthogonal to each other. (It's actually the +matrix which describes the coordinate system in which the vertex is located +in relation to the unit coordinate system. See a maths book for more +details.) This matrix is derived from rotation angles as follows: + +(s? = sin(?), c? = cos(?)) + +Rotation angle A Rotation angle B Rotation angle C +about X axis: about Y axis: about Z axis: + +| 1 0 0| | cB 0 sB| | cC -sC 0| +| 0 cA -sA| | 0 1 0| | sC cC 0| +| 0 sA cA| |-sB 0 cB| | 0 0 1| + +Rotation about multiple axis can be done by multiplying these matrices +with eachother. Note that the order in which this multiplication is done +*IS* important. The GTE has no sine or cosine functions, so the calculation +of these must be done by the CPU. + +Translation is the simple addition of two vectors, relocating the vertex +within its current coordinate system. Needless to say the order in which +translation and rotation occur for a vector is important. + +-------------------------------------------------------------------------- +Brief Function descriptions +-------------------------------------------------------------------------- +RTPS/RTPT Rotate, translate and perpective transformation. + +These two functions perform the final 3d calculations on one or three +vertices at once. The points are first multiplied with a rotation matrix(R), +and after that translated(TR). Finally a perspective transformation is +applied, which results in 2d screen coordinates. It also returns an +interpolation value to be used with the various depth cueing instructions. +-------------------------------------------------------------------------- +MVMVA Matrix & Vector multiplication and addition. + +Multiplies a vector with either the rotation matrix, the light matrix or +the color matrix and then adds the translation vector or background color +vector. +-------------------------------------------------------------------------- +DCPL Depth cue light color + +First calculates a color from a light vector(normal vector of a plane +multiplied with the light matrix and zero limited) and a provided RGB value. +Then performs depth cueing by interpolating between the far color vector and +the newfound color. +-------------------------------------------------------------------------- +DPCS/DPCT Depth cue single/triple + +Performs depth cueing by interpolating between a color and the far color +vector on one or three colors. +-------------------------------------------------------------------------- +INTPL Interpolation + +Interpolates between a vector and the far color vector. +-------------------------------------------------------------------------- +SQR Square + +Calculates the square of a vector. +-------------------------------------------------------------------------- +NCS/NCT Normal Color + +Calculates a color from the normal of a point or plane and the light +sources and colors. The basic color of the plane or point the normal +refers to is assumed to be white. +-------------------------------------------------------------------------- +NCDS/NCDT Normal Color Depth Cue. + +Same as NCS/NCT but also performs depth cueing (like DPCS/DPCT) +-------------------------------------------------------------------------- +NCCS/NCCT + +Same NCS/NCT, but the base color of the plane or point is taken into +account. +-------------------------------------------------------------------------- +CDP + +A color is calculated from a light vector (base color is assumed to be +white) and depth cueing is performed (like DPCS). +-------------------------------------------------------------------------- +CC + +A color is calculated from a light vector and a base color. +-------------------------------------------------------------------------- +NCLIP + +Calculates the outer product of three 2d points.(ie. 3 vertices which +define a plane after projection.) + +The 3 vertices should be stored clockwise according to the visual point: + + Z+ + / + /____ X+ + | + | + Y+ + +If this is so, the result of this function will be negative if we are +facing the backside of the plane. +-------------------------------------------------------------------------- +AVSZ3/AVSZ4 + +Adds 3 or 4 z values together and multplies them by a fixed point value. +This value is normally chosen so that this function returns the average +of the z values (usually further divided by 2 or 4 for easy adding to the +OT) +-------------------------------------------------------------------------- +OP + +Calculates the outer product of 2 vectors. +-------------------------------------------------------------------------- +GPF + +Multiplies 2 vectors. Also returns the result as 24bit rgb value. +-------------------------------------------------------------------------- +GPL + +Multiplies a vector with a scalar and adds the result to another vector. +Also returns the result as 24bit rgb value. + +-------------------------------------------------------------------------- +GTE Operation. +-------------------------------------------------------------------------- +Instructions. +-------------------------------------------------------------------------- +The CPU has six special load and store instructions for the GTE registers, +and an instruction to issue commands to the coprocessor. + +rt CPU register 0-31 +gd GTE data register 0-31 +gc GTE control register 0-31 +imm 16 bit immediate value +base CPU register 0-31 +imm(base) address pointed to by base + imm. +b25 25 bit wide data field. + +LWC2 gd, imm(base) stores value at imm(base) in gte data register gd. +SWC2 gd, imm(base) stores gte data register at imm(base). +MTC2 rt, gd stores register rt in GTE data register gd. +MFC2 rt, gd stores GTE data register gd in register rt. +CTC2 rt, gc stores register rt in GTE control register gc. +CFC2 rt, gc stores GTE control register in register rt. + +COP2 b25 Issues a GTE command. + +Gte load and store instructions have a delay of 2 instructions, for any +gte commands or operations accessing that register. + +-------------------------------------------------------------------------- +Registers. +-------------------------------------------------------------------------- +The GTE has 32 data registers, and 32 control registers,each 32 bits wide. +The following list describes their common use and format. Note in some +functions format is different from the one that's given here. The numbers +in the format fields are the signed, integer and fractional parts of the +field. So 1,3,12 means signed(1 bit), 3 bits integral part, 12 bits +fractional part. + +Control registers: +No. Name |31-24 23-16|15-08 07-01| Description + 0 R11R12 |R12 1, 3,12|R11 1, 3,12| Rotation matrix elements 11, 12 + 1 R13R21 |R21 1, 3,12|R13 1, 3,12| Rotation matrix elements 13, 21 + 2 R22R23 |R23 1, 3,12|R22 1, 3,12| Rotation matrix elements 22, 23 + 3 R31R32 |R32 1, 3,12|R31 1, 3,12| Rotation matrix elements 31, 32 + 4 R33 | 0|R33 1, 3,12| Rotation matrix element 33 + 5 TRX |TRX 1,31, 0 | Translation vector X + 6 TRY |TRY 1,31, 0 | Translation vector Y + 7 TRZ |TRZ 1,31, 0 | Translation vector Z + 8 L11L12 |L12 1, 3,12|L11 1, 3,12| Light source matrix elements 11, 12 + 9 L13L21 |L21 1, 3,12|L13 1, 3,12| Light source matrix elements 13, 21 +10 L22L23 |L23 1, 3,12|L22 1, 3,12| Light source matrix elements 22, 23 +11 L31L32 |L32 1, 3,12|L31 1, 3,12| Light source matrix elements 31, 32 +12 L33 | 0|L33 1, 3,12| Light source matrix element 33 +13 RBK |RBK 1,19,12 | Background color red component. +14 GBK |GBK 1,19,12 | Background color green component. +15 BBK |RBK 1,19,12 | Background color blue component. +16 LR1LR2 |LR2 1, 3,12|LR1 1, 3,12| Light color matrix source 1&2 red comp. +17 LR3LG1 |LG1 1, 3,12|LR3 1, 3,12| Light color matrix source 3 red, 1 green +18 LG2LG3 |LG3 1, 3,12|LG2 1, 3,12| Light color matrix source 2&3 green comp. +19 LB1LB2 |LB2 1, 3,12|LB1 1, 3,12| Light color matrix source 1&2 blue comp. +20 LB3 | 0|LB3 1, 3,12| Light color matrix source 3 blue component. +21 RFC |RFC 1,27, 4 | Far color red component. +22 GFC |GFC 1,27, 4 | Far color green component. +23 BFC |BFC 1,27, 4 | Far color blue component. +24 OFX |OFX 1,15,16 | Screen offset X +25 OFY |OFY 1,15,16 | Screen offset Y +26 H | 0|H 0,16, 0| Projection plane distance. +27 DQA | 0|DQA 1, 7, 8| Depth queing parameter A.(coefficient.) +28 DQB | 0|DQB 1, 7,24| Depth queing parameter B.(offset.) +29 ZSF3 | 0|ZSF3 1,3,12| Z3 average scale factor (normally 1/3) +30 ZSF4 | 0|ZSF4 1,3,12| Z4 average scale factor (normally 1/4) +31 FLAG |See gte funcions | Returns any calculation errors. + +Data registers: +No. Name rw|31-24 23-16|15-08 07-01| Description + 0 VXY0 rw|VY0 |VX0 | Vector 0 X and Y. 1,3,12 or 1,15,0 + 1 VZ0 rw| 0|VZ0 | Vector 0 Z. + 2 VXY1 rw|VY1 |VX1 | Vector 1 X and Y. 1,3,12 or 1,15,0 + 3 VZ1 rw| 0|VZ1 | Vector 1 Z. + 4 VXY2 rw|VY2 |VX2 | Vector 2 X and Y. 1,3,12 or 1,15,0 + 5 VZ2 rw| 0|VZ2 | Vector 2 Z. + 6 RGB rw|Code |B |G |R | Rgb value. Code is passed, but not used in calc. + 7 OTZ r | |OTZ 0,15, 0| Z Average value. + 8 IR0 rw|Sign |IR0 1, 3,12| Intermediate value 0. *1 + 9 IR1 rw|Sign |IR0 1, 3,12| Intermediate value 1. *1 +10 IR2 rw|Sign |IR0 1, 3,12| Intermediate value 2. *1 +11 IR3 rw|Sign |IR0 1, 3,12| Intermediate value 3. *1 +12 SXY0 rw|SY0 1,15, 0|SX0 1,15, 0| Screen XY coordinate fifo. *2 +13 SXY1 rw|SY1 1,15, 0|SX1 1,15, 0| +14 SXY2 rw|SY2 1,15, 0|SX2 1,15, 0| +15 SXYP rw|SYP 1,15, 0|SXP 1,15, 0| +16 SZ0 rw| 0|SZ0 0,16, 0| Screen Z fifo. *2 +17 SZ1 rw| 0|SZ1 0,16, 0| +18 SZ2 rw| 0|SZ2 0,16, 0| +19 SZ3 rw| 0|SZ3 0,16, 0| +20 RGB0 rw|CD0 |B0 |G0 |R0 | Characteristic color fifo. *2 +21 RGB1 rw|CD1 |B1 |G1 |R1 | +22 RGB2 rw|CD2 |B2 |G2 |R2 | CD2 is the bit pattern of currently executed function +23 (RES1) | | Prohibited +24 MAC0 rw|MAC0 1,31,0 | Sum of products value 0 +25 MAC1 rw|MAC1 1,31,0 | Sum of products value 1 +26 MAC2 rw|MAC2 1,31,0 | Sum of products value 2 +27 MAC3 rw|MAC3 1,31,0 | Sum of products value 3 +28 IRGB w| |IB |IG |IR | *3 +29 ORGB r | |OB |OG |OR | *4 +30 LZCS w|LZCS 1,31,0 | Leading zero count source data.*5 +31 LZCR r |LZCR 0,6,0 | Leading zero count result.*5 + +*1) The specified format is the format which GTE functions output to these + registers. The input format is mostly (1,19,12) + +*2) The SXYx, SZx and RGBx are first in first out registers (fifo). The last + calculation result is stored in the last register, and previous results + are stored in previous registers. So for example when a new SXY value + is obtained the following happens: + SXY0 = SXY1 + SXY1 = SXY2 + SXY2 = SXYP + SXYP = result. + +*3) IRGB: + |31 15|14-10| 9- 5| 4- 0| + | 0|IR |IG |IB | + When writing a value to IRGB the following happens: + IR1 = IR format converted to (1,11,4) + IR2 = IG format converted to (1,11,4) + IR3 = IB format converted to (1,11,4) + +*4) ORGB: + |31 15|14-10| 9- 5| 4- 0| + | 0|IR |IG |IB | + When writing a value to IRGB the following happens: + IR = (IR1>>7) &$1f + IG = (IR2>>7) &$1f + IB = (IR3>>7) &$1f +*5) Reading LZCR returns the leading 0 count of LZCS if LZCS is positive + and the leading 1 count of LZCS if LZCS is negative. + +-------------------------------------------------------------------------- +Programming Considerations. +-------------------------------------------------------------------------- +Before use the GTE must be turned on. The GTE has bit 30 allocated to it in +the status register of the stystem control coprocessor (cop0). Before any +GTE instruction is used, this bit must be set. + +GTE instructions and functions should not be used in +- Delay slots of jumps and branches +- Event handlers or interrupts. + +If an instruction that reads a GTE register or a GTE command is executed +before the current GTE command is finished, the cpu will hold until the +instruction has finished. The number of cycles each GTE instruction takes +is in the command list. + +-------------------------------------------------------------------------- +Function Operation. +-------------------------------------------------------------------------- +This part describes the actual calculations performed by the various GTE +functions. The first line contains the name of the function, the number +of cycles it takes and a brief description. The second line any fields that +may be set in the opcode and in the third line is the actual opcode. See +the end of the list for the fields and their descriptions. Then follows a +list of all registers which are needed in the calculation under the 'in', +and a list of registers which modified under the 'out' with a brief +description and the format of the data. Next follows the calculation which +is performed after initiating the function. The format field left is the +size in which the data is stored, the format field on the right contains +the format in which the calculation is performed. At certain points in the +calculation checks and limitations are done and their results stored in the +flag register, see the table below. They are identified with the code from +the second column of the table directly followed by square brackets +enclosing the part of the calculation on which the check is performed. The +additional Lm_ identifier means the value is limited to the bottom or +ceiling of the check if it exceeds the boundary. + +bit description +31 Checksum. +30 A1 Result larger than 43 bits and positive +29 A2 Result larger than 43 bits and positive +28 A3 Result larger than 43 bits and positive +27 A1 Result larger than 43 bits and negative +26 A2 Result larger than 43 bits and negative +25 A3 Result larger than 43 bits and negative +24 B1 Value negative(lm=1) or larger than 15 bits(lm=0) +23 B2 Value negative(lm=1) or larger than 15 bits(lm=0) +22 B3 Value negative(lm=1) or larger than 15 bits(lm=0) +21 C1 Value negative or larger than 8 bits. +20 C2 Value negative or larger than 8 bits. +19 C3 Value negative or larger than 8 bits. +18 D Value negative or larger than 16 bits. +17 E Divide overflow. (quotient > 2.0) +16 F Result larger than 31 bits and positive. +15 F Result larger than 31 bits and negative. +14 G1 Value larger than 10 bits. +13 G2 Value larger than 10 bits. +12 H Value negative or larger than 12 bits. + + +-------------------------------------------------------------------------- +RTPS 15 Perspective transformation +Fields: none +Opcode: cop2 $0180001 + +In: V0 Vector to transform. [1,15,0] + R Rotation matrix [1,3,12] + TR Translation vector [1,31,0] + H View plane distance [0,16,0] + DQA Depth que interpolation values. [1,7,8] + DQB [1,7,8] + OFX Screen offset values. [1,15,16] + OFY [1,15,16] +Out: SXY fifo Screen XY coordinates.(short) [1,15,0] + SZ fifo Screen Z coordinate.(short) [0,16,0] + IR0 Interpolation value for depth queing. [1,3,12] + IR1 Screen X (short) [1,15,0] + IR2 Screen Y (short) [1,15,0] + IR3 Screen Z (short) [1,15,0] + MAC1 Screen X (long) [1,31,0] + MAC2 Screen Y (long) [1,31,0] + MAC3 Screen Z (long) [1,31,0] + +Calculation: +[1,31,0] MAC1=A1[TRX + R11*VX0 + R12*VY0 + R13*VZ0] [1,31,12] +[1,31,0] MAC2=A2[TRY + R21*VX0 + R22*VY0 + R23*VZ0] [1,31,12] +[1,31,0] MAC3=A3[TRZ + R31*VX0 + R32*VY0 + R33*VZ0] [1,31,12] +[1,15,0] IR1= Lm_B1[MAC1] [1,31,0] +[1,15,0] IR2= Lm_B2[MAC2] [1,31,0] +[1,15,0] IR3= Lm_B3[MAC3] [1,31,0] + SZ0<-SZ1<-SZ2<-SZ3 +[0,16,0] SZ3= Lm_D(MAC3) [1,31,0] + SX0<-SX1<-SX2, SY0<-SY1<-SY2 +[1,15,0] SX2= Lm_G1[F[OFX + IR1*(H/SZ)]] [1,27,16] +[1,15,0] SY2= Lm_G2[F[OFY + IR2*(H/SZ)]] [1,27,16] +[1,31,0] MAC0= F[DQB + DQA * (H/SZ)] [1,19,24] +[1,15,0] IR0= Lm_H[MAC0] [1,31,0] + +Notes: +Z values are limited downwards at 0.5 * H. For smaller z values you'll have +write your own routine. +-------------------------------------------------------------------------- +RTPT 23 Perspective Transformation on 3 points. +Fields none +opcode cop2 $0280030 + +in V0 Vector to transform. [1,15,0] + V1 [1,15,0] + V2 [1,15,0] + R Rotation matrix [1,3,12] + TR Translation vector [1,31,0] + H View plane distance [0,16,0] + DQA Depth que interpolation values. [1,7,8] + DQB [1,7,8] + OFX Screen offset values. [1,15,16] + OFY [1,15,16] +out SXY fifo Screen XY coordinates.(short) [1,15,0] + SZ fifo Screen Z coordinate.(short) [0,16,0] + IR0 Interpolation value for depth queing. [1,3,12] + IR1 Screen X (short) [1,15,0] + IR2 Screen Y (short) [1,15,0] + IR3 Screen Z (short) [1,15,0] + MAC1 Screen X (long) [1,31,0] + MAC2 Screen Y (long) [1,31,0] + MAC3 Screen Z (long) [1,31,0] + +Calculation: Same as RTPS, but repeats for V1 and V2. +-------------------------------------------------------------------------- +MVMVA 8 Multiply vector by matrix and vector addition. +Fields: sf,mx,v,cv,lm +Opcode: cop2 $0400012 + +in: V0/V1/V2/IR Vector v0, v1, v2 or [IR1,IR2,IR3] + R/LLM/LCM Rotation, light or color matrix. [1,3,12] + TR/BK Translation or background color vector. +out: [IR1,IR2,IR3] Short vector + [MAC1,MAC2,MAC3] Long vector + +Calculation: +MX = matrix specified by mx +V = vector specified by v +CV = vector specified by cv + + + MAC1=A1[CV1 + MX11*V1 + MX12*V2 + MX13*V3] + MAC2=A2[CV2 + MX21*V1 + MX22*V2 + MX23*V3] + MAC3=A3[CV3 + MX31*V1 + MX32*V2 + MX33*V3] + IR1=Lm_B1[MAC1] + IR2=Lm_B2[MAC2] + IR3=Lm_B3[MAC3] + +Notes: +The cv field allows selection of the far color vector, but this vector +is not added correctly by the GTE. +-------------------------------------------------------------------------- +DCPL 8 Depth Cue Color light +Fields: none +Opcode: cop2 $0680029 +In: RGB Primary color. R,G,B,CODE [0,8,0] + IR0 interpolation value. [1,3,12] + [IR1,IR2,IR3] Local color vector. [1,3,12] + CODE Code value from RGB. CODE [0,8,0] + FC Far color. [1,27,4] +Out: RGBn RGB fifo Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: +[1,27,4] MAC1=A1[R*IR1 + IR0*(Lm_B1[RFC - R * IR1])] [1,27,16] +[1,27,4] MAC2=A2[G*IR2 + IR0*(Lm_B1[GFC - G * IR2])] [1,27,16] +[1,27,4] MAC3=A3[B*IR3 + IR0*(Lm_B1[BFC - B * IR3])] [1,27,16] +[1,11,4] IR1=Lm_B1[MAC1] [1,27,4] +[1,11,4] IR2=Lm_B2[MAC2] [1,27,4] +[1,11,4] IR3=Lm_B3[MAC3] [1,27,4] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +DPCS 8 Depth Cueing. +Fields: none +Opcode: cop2 $0780010 + +In: IR0 Interpolation value [1,3,12] + RGB Color R,G,B,CODE [0,8,0] + FC Far color RFC,GFC,BFC [1,27,4] +Out: RGBn RGB fifo Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculations: +[1,27,4] MAC1=A1[(R + IR0*(Lm_B1[RFC - R])] [1,27,16][lm=0] +[1,27,4] MAC2=A2[(G + IR0*(Lm_B1[GFC - G])] [1,27,16][lm=0] +[1,27,4] MAC3=A3[(B + IR0*(Lm_B1[BFC - B])] [1,27,16][lm=0] +[1,11,4] IR1=Lm_B1[MAC1] [1,27,4][lm=0] +[1,11,4] IR2=Lm_B2[MAC2] [1,27,4][lm=0] +[1,11,4] IR3=Lm_B3[MAC3] [1,27,4][lm=0] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +INTPL 8 Interpolation of a vector and far color vector. +Fields: none +Opcode: cop2 $0980011 + +In: [IR1,IR2,IR3] Vector [1,3,12] + IR0 Interpolation value [1,3,12] + CODE Code value from RGB. CODE [0,8,0] + FC Far color RFC,GFC,BFC [1,27,4] +Out: RGBn RGB fifo Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculations: +[1,27,4] MAC1=A1[IR1 + IR0*(Lm_B1[RFC - IR1])] [1,27,16] +[1,27,4] MAC2=A2[IR2 + IR0*(Lm_B1[GFC - IR2])] [1,27,16] +[1,27,4] MAC3=A3[IR3 + IR0*(Lm_B1[BFC - IR3])] [1,27,16] +[1,11,4] IR1=Lm_B1[MAC1] [1,27,4] +[1,11,4] IR2=Lm_B2[MAC2] [1,27,4] +[1,11,4] IR3=Lm_B3[MAC3] [1,27,4] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +SQR 5 Square vector. +Fields: sf +Opcode: cop2 $0a00428 + sf=0 sf=1 +in: [IR1,IR2,IR3] vector [1,15,0][1,3,12] +out: [IR1,IR2,IR3] vector^2 [1,15,0][1,3,12] + [MAC1,MAC2,MAC3] vector^2 [1,31,0][1,19,12] + +Calculation: (left format sf=0, right format sf=1) + +[1,31,0][1,19,12] MAC1=A1[IR1*IR1] [1,43,0][1,31,12] +[1,31,0][1,19,12] MAC2=A2[IR2*IR2] [1,43,0][1,31,12] +[1,31,0][1,19,12] MAC3=A3[IR3*IR3] [1,43,0][1,31,12] +[1,15,0][1,3,12] IR1=Lm_B1[MAC1] [1,31,0][1,19,12][lm=1] +[1,15,0][1,3,12] IR2=Lm_B2[MAC2] [1,31,0][1,19,12][lm=1] +[1,15,0][1,3,12] IR3=Lm_B3[MAC3] [1,31,0][1,19,12][lm=1] +-------------------------------------------------------------------------- +NCS 14 Normal color +Fields: none +Opcode: cop2 $0C8041E + +In: V0 Normal vector [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + CODE Code value from RGB. CODE [0,8,0] + LCM Color matrix [1,3,12] + LLM Light matrix [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +[1,19,12] MAC1=A1[L11*VX0 + L12*VY0 + L13*VZ0] [1,19,24] +[1,19,12] MAC2=A2[L21*VX0 + L22*VY0 + L23*VZ0] [1,19,24] +[1,19,12] MAC3=A3[L31*VX0 + L32*VY0 + L33*VZ0] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3] [1,19,24] +[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3] [1,19,24] +[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +NCT 30 Normal color +Fields: none +Opcode: cop2 $0D80420 + +In: V0,V1,V2 Normal vector [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + CODE Code value from RGB. CODE [0,8,0] + LCM Color matrix [1,3,12] + LLM Light matrix [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: Same as NCS, but repeated for V1 and V2. +-------------------------------------------------------------------------- +NCDS 19 Normal color depth cue single vector +Fields: none +Opcode: cop2 $0e80413 +In: V0 Normal vector [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + RGB Primary color R,G,B,CODE [0,8,0] + LLM Light matrix [1,3,12] + LCM Color matrix [1,3,12] + IR0 Interpolation value [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: +[1,19,12] MAC1=A1[L11*VX0 + L12*VY0 + L13*VZ0] [1,19,24] +[1,19,12] MAC2=A1[L21*VX0 + L22*VY0 + L23*VZ0] [1,19,24] +[1,19,12] MAC3=A1[L31*VX0 + L32*VY0 + L33*VZ0] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3] [1,19,24] +[1,19,12] MAC2=A1[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3] [1,19,24] +[1,19,12] MAC3=A1[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,27,4] MAC1=A1[R*IR1 + IR0*(Lm_B1[RFC-R*IR1])] [1,27,16][lm=0] +[1,27,4] MAC2=A1[G*IR2 + IR0*(Lm_B2[GFC-G*IR2])] [1,27,16][lm=0] +[1,27,4] MAC3=A1[B*IR3 + IR0*(Lm_B3[BFC-B*IR3])] [1,27,16][lm=0] +[1,3,12] IR1= Lm_B1[MAC1] [1,27,4][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,27,4][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,27,4][lm=1] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +NCDT 44 Normal color depth cue triple vectors +Fields: none +Opcode: cop2 $0f80416 +In: V0 Normal vector [1,3,12] + V1 Normal vector [1,3,12] + V2 Normal vector [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + FC Far color RFC,GFC,BFC [1,27,4] + RGB Primary color R,G,B,CODE [0,8,0] + LLM Light matrix [1,3,12] + LCM Color matrix [1,3,12] + IR0 Interpolation value [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: +Same as NCDS but repeats for v1 and v2. +-------------------------------------------------------------------------- +DPCT 17 Depth Cueing. +Fields: none +Opcode: cop2 $0F8002A + +In: IR0 Interpolation value [1,3,12] + RGB0,RGB1,RGB2 Colors in RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + FC Far color RFC,GFC,BFC [1,27,4] +Out: RGBn RGB fifo Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculations: +[1,27,4] MAC1=A1[R0+ IR0*(Lm_B1[RFC - R0])] [1,27,16][lm=0] +[1,27,4] MAC2=A2[G0+ IR0*(Lm_B1[GFC - G0])] [1,27,16][lm=0] +[1,27,4] MAC3=A3[B0+ IR0*(Lm_B1[BFC - B0])] [1,27,16][lm=0] +[1,11,4] IR1=Lm_B1[MAC1] [1,27,4][lm=0] +[1,11,4] IR2=Lm_B2[MAC2] [1,27,4][lm=0] +[1,11,4] IR3=Lm_B3[MAC3] [1,27,4][lm=0] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] + +Performs this calculation 3 times, so all three RGB values have been +replaced by the depth cued RGB values. + +-------------------------------------------------------------------------- +NCCS 17 Normal Color Color single vector +Fields: none +Opcode: cop2 $108041B + +In: V0 Normal vector [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + RGB Primary color R,G,B,CODE [0,8,0] + LLM Light matrix [1,3,12] + LCM Color matrix [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: + +[1,19,12] MAC1=A1[L11*VX0 + L12*VY0 + L13*VZ0] [1,19,24] +[1,19,12] MAC2=A2[L21*VX0 + L22*VY0 + L23*VZ0] [1,19,24] +[1,19,12] MAC3=A3[L31*VX0 + L32*VY0 + L33*VZ0] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3] [1,19,24] +[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3] [1,19,24] +[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,27,4] MAC1=A1[R*IR1] [1,27,16] +[1,27,4] MAC2=A2[G*IR2] [1,27,16] +[1,27,4] MAC3=A3[B*IR3] [1,27,16] +[1,3,12] IR1= Lm_B1[MAC1] [1,27,4][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,27,4][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,27,4][lm=1] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +NCCT 39 Normal Color Color triple vector +Fields: none +Opcode: cop2 $118043F + +In: V0 Normal vector 1 [1,3,12] + V1 Normal vector 2 [1,3,12] + V2 Normal vector 3 [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + RGB Primary color R,G,B,CODE [0,8,0] + LLM Light matrix [1,3,12] + LCM Color matrix [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: +Same as NCCS but repeats for v1 and v2. +-------------------------------------------------------------------------- +CDP 13 Color Depth Que +Fields: none +Opcode: cop2 $1280414 + +In: [IR1,IR2,IR3] Vector [1,3,12] + RGB Primary color R,G,B,CODE [0,8,0] + IR0 Interpolation value [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + LCM Color matrix [1,3,12] + FC Far color RFC,GFC,BFC [1,27,4] +Out: RGBn RGB fifo Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculation: +[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3] [1,19,24] +[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3] [1,19,24] +[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,27,4] MAC1=A1[R*IR1 + IR0*(Lm_B1[RFC-R*IR1])] [1,27,16][lm=0] +[1,27,4] MAC2=A2[G*IR2 + IR0*(Lm_B2[GFC-G*IR2])] [1,27,16][lm=0] +[1,27,4] MAC3=A3[B*IR3 + IR0*(Lm_B3[BFC-B*IR3])] [1,27,16][lm=0] +[1,3,12] IR1= Lm_B1[MAC1] [1,27,4][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,27,4][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,27,4][lm=1] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +CC 11 Color Color. +Fields: none +Opcode: cop2 $138041C +In: [IR1,IR2,IR3] Vector [1,3,12] + BK Background color RBK,GBK,BBK [1,19,12] + RGB Primary color R,G,B,CODE [0,8,0] + LCM Color matrix [1,3,12] +Out: RGBn RGB fifo. Rn,Gn,Bn,CDn [0,8,0] + [IR1,IR2,IR3] Color vector [1,11,4] + [MAC1,MAC2,MAC3] Color vector [1,27,4] + +Calculations: +[1,19,12] MAC1=A1[RBK + LR1*IR1 + LR2*IR2 + LR3*IR3] [1,19,24] +[1,19,12] MAC2=A2[GBK + LG1*IR1 + LG2*IR2 + LG3*IR3] [1,19,24] +[1,19,12] MAC3=A3[BBK + LB1*IR1 + LB2*IR2 + LB3*IR3] [1,19,24] +[1,3,12] IR1= Lm_B1[MAC1] [1,19,12][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,19,12][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,19,12][lm=1] +[1,27,4] MAC1=A1[R*IR1] [1,27,16] +[1,27,4] MAC2=A2[G*IR2] [1,27,16] +[1,27,4] MAC3=A3[B*IR3] [1,27,16] +[1,3,12] IR1= Lm_B1[MAC1] [1,27,4][lm=1] +[1,3,12] IR2= Lm_B2[MAC2] [1,27,4][lm=1] +[1,3,12] IR3= Lm_B3[MAC3] [1,27,4][lm=1] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] [1,27,4] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] [1,27,4] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] [1,27,4] +-------------------------------------------------------------------------- +NCLIP 8 Normal clipping +Fields: +Opcode: cop2 $1400006 + +in: SXY0,SXY1,SXY2 Screen coordinates [1,15,0] +out: MAC0 Outerproduct of SXY1 and SXY2 with [1,31,0] + SXY0 as origin. + +Calculation: +[1,31,0] MAC0 = F[SX0*SY1+SX1*SY2+SX2*SY0-SX0*SY2-SX1*SY0-SX2*SY1] [1,43,0] +-------------------------------------------------------------------------- +AVSZ3 5 Average of three Z values +fields: +Opcode: cop2 $158002D + +in: SZ1, SZ2, SZ3 Z-Values [0,16,0] + ZSF3 Divider [1,3,12] +out: OTZ Average. [0,16,0] + MAC0 Average. [1,31,0] + +Calculation: +[1,31,0] MAC0=F[ZSF3*SZ1 + ZSF3*SZ2 + ZSF3*SZ3] [1,31,12] +[0,16,0] OTZ=Lm_D[MAC0] [1,31,0] +-------------------------------------------------------------------------- +AVSZ4 6 Average of four Z values +Fields: +Opcode: cop2 $168002E + +in: SZ1,SZ2,SZ3,SZ4 Z-Values [0,16,0] + ZSF4 Divider [1,3,12] +out: OTZ Average. [0,16,0] + MAC0 Average. [1,31,0] + +Calculation: +[1,31,0] MAC0=F[ZSF4*SZ0 + ZSF4*SZ1 + ZSF4*SZ2 + ZSF4*SZ3] [1,31,12] +[0,16,0] OTZ=Lm_D[MAC0] [1,31,0] +-------------------------------------------------------------------------- +OP 6 Outer product of 2 vectors +Fields: sf +Opcode: cop2 $170000C + +in: [R11R12,R22R23,R33] vector 1 + [IR1,IR2,IR3] vector 2 +out: [IR1,IR2,IR3] outer product + [MAC1,MAC2,MAC3] outer product + +Calculation: (D1=R11R12,D2=R22R23,D3=R33) + + MAC1=A1[D2*IR3 - D3*IR2] + MAC2=A2[D3*IR1 - D1*IR3] + MAC3=A3[D1*IR2 - D2*IR1] + IR1=Lm_B1[MAC0] + IR2=Lm_B2[MAC1] + IR3=Lm_B3[MAC2] + +-------------------------------------------------------------------------- +GPF 5 General purpose interpolation +Fields: sf +Opcode: cop2 $190003D + +in: IR0 scaling factor + CODE code field of RGB + [IR1,IR2,IR3] vector +out: [IR1,IR2,IR3] vector + [MAC1,MAC2,MAC3] vector + RGB2 RGB fifo. + +Calculation: + + MAC1=A1[IR0 * IR1] + MAC2=A2[IR0 * IR2] + MAC3=A3[IR0 * IR3] + IR1=Lm_B1[MAC1] + IR2=Lm_B2[MAC2] + IR3=Lm_B3[MAC3] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] +-------------------------------------------------------------------------- +GPL 5 General purpose interpolation +Fields: sf +Opcode: cop2 $1A0003E + +in: IR0 scaling factor + CODE code field of RGB + [IR1,IR2,IR3] vector + [MAC1,MAC2,MAC3] vector +out: [IR1,IR2,IR3] vector + [MAC1,MAC2,MAC3] vector + RGB2 RGB fifo. + +Calculation: + + MAC1=A1[MAC1 + IR0 * IR1] + MAC2=A2[MAC2 + IR0 * IR2] + MAC3=A3[MAC3 + IR0 * IR3] + IR1=Lm_B1[MAC1] + IR2=Lm_B2[MAC2] + IR3=Lm_B3[MAC3] +[0,8,0] Cd0<-Cd1<-Cd2<- CODE +[0,8,0] R0<-R1<-R2<- Lm_C1[MAC1] +[0,8,0] G0<-G1<-G2<- Lm_C2[MAC2] +[0,8,0] B0<-B1<-B2<- Lm_C3[MAC3] +-------------------------------------------------------------------------- +Field decriptions. + +bit |24 23 22 21 20|19|18 17|16 15|14 13|12 11|10| +desc| |sf|mx |v |cv | |lm| + +bit |09 08 07 06 05 04 03 02 01 00| +desc| | + +sf 0 Normal calculation. + 1 Calculations on data shifted 12 bits to the left in the IR regs. +(Not entirely sure about what really happens.) + +mx 0 Multiply with rotation matrix + 1 Multiply with light matrix + 2 Multiply with color matrix + 3 - + +v 0 V0 source vector (short) + 1 V1 source vector (short) + 2 V2 source vector (short) + 3 IR source vector (long) + +cv 0 Add translation vector (TR) + 1 Add back color vector (BK) + 2 Bugged. Should add far color vector. (FC) + 3 Add no vector + +lm 0 No negative limit. + 1 Limit negative results to 0. + +-------------------------------------------------------------------------- +A list of common MVMVA instructions: + +rtv0 cop2 $0486012 v0 * rotmatrix +rtv1 cop2 $048E012 v1 * rotmatrix +rtv2 cop2 $0496012 v2 * rotmatrix + +rtir12 cop2 $049E012 ir * rotmatrix +rtir0 cop2 $041E012 ir * rotmatrix. + +rtv0tr cop2 $0480012 v0 * rotmatrix + tr vector +rtv1tr cop2 $0488012 v1 * rotmatrix + tr vector +rtv2tr cop2 $0490012 v2 * rotmatrix + tr vector +rtirtr cop2 $0498012 ir * rotmatrix + tr vector + +rtv0bk cop2 $0482012 v0 * rotmatrix + bk vector +rtv1bk cop2 $048A012 v1 * rotmatrix + bk vector +rtv2bk cop2 $0492012 v2 * rotmatrix + bk vector +rtirbk cop2 $049A012 ir * rotmatrix + bk vector + +ll cop2 $04A6412 v0 * light matrix. Lower limit result to 0. + +llv0 cop2 $04A6012 v0 * light matrix +llv1 cop2 $04AE012 v1 * light matrix +llv2 cop2 $04B6012 v2 * light matrix +llir cop2 $04BE012 ir * light matrix + +llv0tr cop2 $04A0012 v0 * light matrix + tr vector +llv1tr cop2 $04A8012 v1 * light matrix + tr vector +llv2tr cop2 $04B0012 v2 * light matrix + tr vector +llirtr cop2 $04B8012 ir * light matrix + tr vector + +llv0bk cop2 $04A2012 v0 * light matrix + bk vector +llv1bk cop2 $04AA012 v1 * light matrix + bk vector +llv2bk cop2 $04B2012 v2 * light matrix + bk vector +llirbk cop2 $04BA012 ir * light matrix + bk vector + +lc cop2 $04DA412 + +lcv0 cop2 $04C6012 v0 * color matrix +lcv1 cop2 $04CE012 v1 * color matrix +lcv2 cop2 $04D6012 v2 * color matrix +lcir cop2 $04DE012 ir * color matrix + +lcv0tr cop2 $04C0012 v0 * color matrix + tr vector +lcv1tr cop2 $04C8012 v1 * color matrix + tr vector +lcv2tr cop2 $04D0012 v2 * color matrix + tr vector +lcirtr cop2 $04D8012 ir * color matrix + tr vector + +lcv0bk cop2 $04C2012 v0 * color matrix + bk vector +lcv1bk cop2 $04CA012 v1 * color matrix + bk vector +lcv2bk cop2 $04D2012 v2 * color matrix + bk vector +lcirbk cop2 $04DA012 ir * color matrix + bk vector + +Other instructions: + +sqr12 cop2 $0A80428 square of ir (1,19,12) +sqr0 cop2 $0A00428 (1,31, 0) + +op12 cop2 $178000C outer product (1,19,12) +op0 cop2 $170000C (1,31, 0) + +gpf12 cop2 $198003D general purpose interpolation (1,19,12) +gpf0 cop2 $190003D (1,31, 0) + +gpl12 cop2 $1A8003E general purpose interpolation (1,19,12) +gpl0 cop2 $1A0003E (1,31, 0) + +-------------------------------------------------------------------------- +doomed@c64.org <- corrections/additions latest update -> psx.rules.org +-------------------------------------------------------------------------- + 8/jun/1999 Initial version. +23/aug/1999 Almost completely rewritten. + 2/feb/2000 Small fix. +-------------------------------------------------------------------------- +If you miss anything that was in the previous version please mail me. +-------------------------------------------------------------------------- + + +\ No newline at end of file diff --git a/misc/psx_documentation_project.pdf b/misc/psx_documentation_project.pdf Binary files differ. diff --git a/misc/spu.txt b/misc/spu.txt @@ -0,0 +1,526 @@ +========================================================================== +SPU - Sound Processing Unit. Information & Documentation. +========================================================================== + +Disclaimer. +-------------------------------------------------------------------------- +This document is a collection of all info on the SPU i could find and my +own notes. Most of this is the result of experiment, so not all info might +be correct. This document is most probably not complete, and not all +capabilities and quirks of the SPU are documented. No responsibility is +taken for anything that might occur using the information in this document. + + +Introduction. +-------------------------------------------------------------------------- +The SPU is the unit responsible for all aural capabilities of the psx. It +handles 24 voices, has a 512kb sound buffer, has ADSR envelope filters for +each voice and lots of other features. + + +Notations and conventions +When the format of data is given it's shown as a bitwise representation +like this: + + +bit |0f|0e 0d 0c 0b 0a|09 08 07 06 05|04 03 02 01 00| +desc.| | + +The bit row shows which bits of the data are used, and separators are used +to show where the different elements of the data stop and start. MSB is on +the left, LSB is on the right. Stuff like |0f-08| means bit $0f to bit $08. +The desc. row shows the description of the different elements. With +separators where the element starts and ends. + + + +-------------------------------------------------------------------------- +The Sound Buffer +-------------------------------------------------------------------------- + +The SPU has control over a 512kb sound buffer. Data is stored compressed +into blocks of 16 bytes. Each block contains 14 packed sample bytes and two +header bytes, one for the packing and one for sample end and looping +information. One such block is decoded into 28 sample bytes (= 14 16bit +samples). + +In the first 4 kb of the buffer the SPU stores the decoded data of CD audio +after volume processing and the sound data of voice 1 and voice 3 after +envelope processing. The decoded data is stored as 16 bit signed values, +one sample per clock (44.1 khz). + +Following this first 4kb are 8 bytes reserved by the system. The memory +beyond that is free to store samples, up to the reverb work area if the +effect processor is used. The size of this work area depends on which +type of effect is being processed. More on that later. + +Memory layout: +$00000-$003ff CD audio left +$00400-$007ff CD audio right +$00800-$00bff Voice 1 +$00c00-$00fff Voice 3 +$01000-$0100f System area. +$01008-$xxxxx Sound data area. +$0xxxx-$7ffff Reverb work area. + +-------------------------------------------------------------------------- +Voices. +-------------------------------------------------------------------------- +The SPU has 24 hardware voices. These voices can be used to reproduce sample +data, noise or can be used as frequency modulator on the next voice. +Each voice has it's own programmable ADSR envelope filter. The main volume +can be programmed independently for left and right output. + +The ADSR envelope filter works as follows: +Ar = Attack rate, which specifies the speed at which the volume increases + from zero to it's maximum value, as soon as the note on is given. The + slope can be set to lineair or exponential. +Dr = Decay rate specifies the speed at which the volume decreases to the + sustain level. Decay is always decreasing exponentially. +Sl = Sustain level, base level from which sustain starts. +Sr = Sustain rate is the rate at which the volume of the sustained note + increases or decreases. This can be either lineair or exponential. +Rr = Release rate is the rate at which the volume of the note decreases + as soon as the note off is given. + + lvl | + ^ | /\Dr __ + Sl _| _ / _ \__--- \ + | / ---__ \ Rr + | /Ar Sr \ \ + | / \\ + |/___________________\________ + ->time + +The overal volume can also be set to sweep up or down lineairly or +exponentially from it's current value. This can be done seperately +for left and right. + + +-------------------------------------------------------------------------- +SPU Operation +-------------------------------------------------------------------------- + +The SPU occupies the area $1f801c00-$1f801dff. All registers are 16 bit +wide. + +============================================================= +$1f801c00- Voice data area. For each voice there are 8 16 bit +$1f801d7f registers structured like this: + +(xx = $c0 + voice number) +------------------------------------------------------------- +$1f801xx0 Volume Left +$1f801xx2 Volume Right + +Volume mode: +bit |0f|0e|0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.| 0| S| VV | + +VV $0000-$3fff Voice volume. +S 0 Phase Normal + 1 Inverted + +Sweep mode: +bit |0f|0e|0d|0c|0b 0a 09 08 07|06 05 04 03 02 01 00| +desc.| 1|Sl|Dr|Ph| |VV | + +VV $0000-$007f Voice volume. +Sl 0 Lineair slope + 1 Exponential slope +Dr 0 Increase + 1 Decrease +Ph 0 Normal phase + 1 Inverted phase + +In sweep mode, the current volume increases to its maximum value, +or decreases to its mimimum value, according to mode. Choose +phase equal to the the phase of the current volume. +------------------------------------------------------------- +$1f801xx4 Pitch +bit |0f 0e|0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.| |Pt | + +Pt $0000-$3fff Specifies pitch. + +Any value can be set, table shows only octaves: +$0200 - 3 octaves +$0400 - 2 +$0800 - 1 +$1000 sample pitch +$2000 + 1 +$3fff + 2 +------------------------------------------------------------- +$1f801xx6 Startaddress of Sound +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|Addr | + +Addr Startaddress of sound in Sound buffer /8 +------------------------------------------------------------- +$1f801xx8 Attack/Decay/Sustain level +bit |0f|0e 0d 0c 0b 0a 09 08|07 06 05 04|03 02 01 00| +desc.|Am| Ar |Dr |Sl | + +Am 0 Attack mode Linear + 1 Exponential + +Ar 0-7f attack rate +Dr 0-f decay rate +Sl 0-f sustain level +------------------------------------------------------------- +$1f801xxa Sustain rate, Release Rate. +bit |0f|0e|0d|0c 0b 0a 09 08 07 06|05|04 03 02 01 00| +desc.|Sm|Sd| 0| Sr |Rm|Rr | + +Sm 0 sustain rate mode linear + 1 exponential +Sd 0 sustain rate mode increase + 1 decrease +Sr 0-7f Sustain Rate +Rm 0 Linear decrease + 1 Exponential decrease +Rr 0-1f Release Rate + +Note: decay mode is always Expontial decrease, and thus cannot +be set. +------------------------------------------------------------- +$1f801xxc Current ADSR volume +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|ADSRvol | + +ADSRvol Returns the current envelope volume when + read. +------------------------------------------------------------- +$1f801xxe Repeat address. +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|Ra | + +Ra $0000-$ffff Address sample loops to at end. + +Note: Setting this register only has effect after the voice +has started (ie. KeyON), else the loop address gets reset +by the sample. +============================================================= +$1f801d80 Mainvolume left +$1f801d82 Mainvolume right +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.| | + +Sets Main volume, these work the same as the channel volume +registers. See those for details. +------------------------------------------------------------- +$1f801d84 Reverberation depth left +$1f801d86 Reverberation depth right +bit |0f|0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|P |Rvd | + +Rvd $0000-$7fff Sets the wet volume for the effect. +P 0 Normal phase + 1 Inverted phase +============================================================= +Following registers have a common layout: + +first register: +bit |0f|0e|0d|0c|0b|0a|09|08|07|06|05|04|03|02|01|00| +desc.|cf|ce|cd|cc|cb|ca|c9|c8|c7|c6|c5|c4|c3|c2|c1|c0| + +second register: +bit |0f 08|07 |06 |05 |04 |03 |02 |01 | 00| +desc.| 0|c17|c16|c15|c14|c13|c12|c11|c10| + +c0-c17 0 Mode for channel c?? off + 1 Mode for channel c?? on +------------------------------------------------------------- +$1f801d88 Voice ON (0-15) +$1f801d8a Voice ON (16-23) + +Sets the current voice to key on. (ie. start ads) +------------------------------------------------------------- +$1f801d8c Voice OFF (0-15) +$1f801d8e Voice OFF (16-23) + +Sets the current voice to key off.(ie. release) +------------------------------------------------------------- +$1f801d90 Channel FM (pitch lfo) mode (0-15) +$1f801d92 Channel FM (pitch lfo) mode (16-23) + +Sets the channel frequency modulation. Uses the previous channel +as modulator. +------------------------------------------------------------- +$1f801d94 Channel Noise mode (0-15) +$1f801d96 Channel Noise mode (16-23) + +Sets the channel to noise. +------------------------------------------------------------- +$1f801d98 Channel Reverb mode (0-15) +$1f801d9a Channel Reverb mode (16-23) + +Sets reverb for the channel. As soon as the sample ends, the +reverb for that channel is turned off. +------------------------------------------------------------- +$1f801d9c Channel ON/OFF (0-15) ? +$1f801d9e Channel ON/OFF (16-23) ? + +Returns wether the channel is mute or not. ? +============================================================= +$1f801da2 Reverb work area start +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|Revwa | + +Revwa $0000-$ffff Reverb work area start in sound buffer /8 +------------------------------------------------------------- +$1f801da4 Sound buffer IRQ address. +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|IRQa | + +IRQa $0000-$ffff IRQ address in sound buffer /8 +?? +------------------------------------------------------------- +$1f801da6 Sound buffer address +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|Sba | + +SBA $0000-$ffff Address in sound buffer divided by eight. + Next transfer to this address. +------------------------------------------------------------- +$1f801da8 SPU data +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.| | + +Data forwarding reg, for non DMA transfer. +------------------------------------------------------------- +$1f801daa SPU control sp0 +bit |0f|0e|0d 0c 0b 0a 09 08|07|06 |05 04|03|02|01|00| +desc.|En|Mu|Noise |Rv|Irq|DMA |Er|Cr|Ee|Ce| + +En 0 SPU off + 1 SPU on +Mu 0 Mute SPU + 1 Unmute SPU +Noise Noise clock frequency +Rv 0 Reverb Disabled + 1 Reverb Enabled +Irq 0 Irq disabled + 1 Irq enabled +DMA 00 + 01 Non DMA write? (transfer through data reg) + 10 DMA Write + 11 DMA Read +Er 0 Reverb for external off + 1 Reverb for external on +Cr 0 Reverb for CD off + 1 Reverb for CD on +Ee 0 External audio off + 1 External audio on +Ce 0 CD audio off + 1 CD audio on +------------------------------------------------------------- +$1f801dac SPU status +bit |0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.| | + +Don't know what this is for, but in SPU init routines this +register get loaded with $4. +------------------------------------------------------------- +$1f801dae SPU status +bit |0f 0e 0d 0c|0b|0a|09 08 07 06 05 04 03 02 01 00| +desc.| |Dh|Rd| | + +Dh 0 Decoding in first half of buffer + 1 Decoding in second half of buffer +Rd 0 Spu ready to transfer + 1 Spu not ready + +Some of bits 9-0 are also ready/not ready states. More on +that later. Functions that wait for the SPU to be ready, +wait for bits a-0 to become 0. +------------------------------------------------------------- +$1f801db0 CD volume left +$1f801db2 CD volume right +bit |0f|0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|P |CDvol | + +CDvol $0000-$7fff Set volume of CD input. +P 0 Normal phase. + 1 Inverted phase. +------------------------------------------------------------- +$1f801db4 Extern volume left +$1f801db6 Extern volume right +bit |0f|0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00| +desc.|P |Exvol | + +Exvol $0000-$7fff Set volume of External input. +P 0 Normal phase. + 1 Inverted phase. +------------------------------------------------------------- +1dc0-1dff Reverb configuration area +$1f801dc0 +$1f801dc2 +$1f801dc4 Lowpass Filter Frequency. 7fff = max value= no filtering +$1f801dc6 Effect volume 0 - $7fff, bit 15 = phase. +$1f801dc8 +$1f801dca +$1f801dcc +$1f801dce Feedback +$1f801dd0 +$1f801dd2 +$1f801dd4 Delaytime(see below) +$1f801dd6 Delaytime(see below) +$1f801dd8 Delaytime(see below) +$1f801dda +$1f801ddc +$1f801dde +$1f801de0 Delaytime(see below) +$1f801de2 +$1f801de4 +$1f801de6 +$1f801de8 +$1f801dea +$1f801dec +$1f801dee +$1f801df0 +$1f801df2 +$1f801df4 Delaytime +$1f801df6 Delaytime +$1f801df8 +$1f801dfa +$1f801dfc +$1f801dfe + +-------------------------------------------------------------------------- +Reverb +-------------------------------------------------------------------------- +The SPU is equipped with an effect processor for reverb echo and delay type +of effects. This effect processor can do one effect at a time, and for each +voice you can specify wether it should have the effect applied or not. + +The effect is setup by initializing the registers $1dc0 to $1ffe to the +desired effect. I do not exactly know how these work, but you can use +the presets below. + +The effect processor needs a bit of sound buffer memory to perform it's +calculations. The size of this depends on the effect type. For the presets +the sizes are: + +Reverb off $00000 Hall $0ade0 +Room $026c0 Space echo $0f6c0 +Studio small $01f40 Echo $18040 +Studio medium $04840 Delay $18040 +Studio large $06fe0 Half echo $03c00 + +The location at which the work area is location is set in register $1da2 +and it's value is the location in the sound buffer divided by eight. Common +values are as follows: + +Reverb off $FFFE Hall $EA44 +Room $FB28 Space echo $E128 +Studio small $FC18 Echo $CFF8 +Studio medium $F6F8 Delay $CFF8 +Studio large $F204 Half echo $F880 + +For the delay and echo effects (not space echo or half echo) you can +specify the delay time, and feedback. (range 0-127) Calculations are shown +below. + +When you setup up a new reverb effect, take the following steps: + +-Turn off the reverb (bit 7 in sp0) +-Set Depth to 0 +-First make delay & feedback calculations. +-Copy the preset to the effect registers +-Turn on the reverb +-Set Depth to desired value. + +Also make sure there is the reverb work area is cleared, else you might get +some unwanted noise. + +To use the effect on a voice, simple turn on the corresponing bit in the +channel reverb registers. Note that these get turned off autmatically when +the sample for the channel ends. + + +------------------------------------------------------------- +Effect presets: copy these in order to $1dc0-$1dfe + +Reverb off: +$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000 +$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000 +$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000 +$0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000 + +Room: +$007D, $005B, $6D80, $54B8, $BED0, $0000, $0000, $BA80 +$5800, $5300, $04D6, $0333, $03F0, $0227, $0374, $01EF +$0334, $01B5, $0000, $0000, $0000, $0000, $0000, $0000 +$0000, $0000, $01B4, $0136, $00B8, $005C, $8000, $8000 + +Studio Small: +$0033, $0025 $70F0 $4FA8 $BCE0 $4410 $C0F0 $9C00 +$5280 $4EC0 $03E4 $031B $03A4 $02AF $0372 $0266 +$031C $025D $025C $018E $022F $0135 $01D2 $00B7 +$018F $00B5 $00B4 $0080 $004C $0026 $8000 $8000 + +Studio Medium: +$00B1 $007F $70F0 $4FA8 $BCE0 $4510 $BEF0 $B4C0 +$5280 $4EC0 $0904 $076B $0824 $065F $07A2 $0616 +$076C $05ED $05EC $042E $050F $0305 $0462 $02B7 +$042F $0265 $0264 $01B2 $0100 $0080 $8000 $8000 + +Studio Large: +$00E3 $00A9 $6F60 $4FA8 $BCE0 $4510 $BEF0 $A680 +$5680 $52C0 $0DFB $0B58 $0D09 $0A3C $0BD9 $0973 +$0B59 $08DA $08D9 $05E9 $07EC $04B0 $06EF $03D2 +$05EA $031D $031C $0238 $0154 $00AA $8000 $8000 + +Hall: +$01A5 $0139 $6000 $5000 $4C00 $B800 $BC00 $C000 +$6000 $5C00 $15BA $11BB $14C2 $10BD $11BC $0DC1 +$11C0 $0DC3 $0DC0 $09C1 $0BC4 $07C1 $0A00 $06CD +$09C2 $05C1 $05C0 $041A $0274 $013A $8000 $8000 + +Space Echo: +$033D $0231 $7E00 $5000 $B400 $B000 $4C00 $B000 +$6000 $5400 $1ED6 $1A31 $1D14 $183B $1BC2 $16B2 +$1A32 $15EF $15EE $1055 $1334 $0F2D $11F6 $0C5D +$1056 $0AE1 $0AE0 $07A2 $0464 $0232 $8000 $8000 + +Echo: +$0001 $0001 $7FFF $7FFF $0000 $0000 $0000 $8100 +$0000 $0000 $1FFF $0FFF $1005 $0005 $0000 $0000 +$1005 $0005 $0000 $0000 $0000 $0000 $0000 $0000 +$0000 $0000 $1004 $1002 $0004 $0002 $8000 $8000 + +Delay: + +$0001 $0001 $7FFF $7FFF $0000 $0000 $0000 $0000 +$0000 $0000 $1FFF $0FFF $1005 $0005 $0000 $0000 +$1005 $0005 $0000 $0000 $0000 $0000 $0000 $0000 +$0000 $0000 $1004 $1002 $0004 $0002 $8000 $8000 + +Half Echo: +$0017 $0013 $70F0 $4FA8 $BCE0 $4510 $BEF0 $8500 +$5F80 $54C0 $0371 $02AF $02E5 $01DF $02B0 $01D7 +$0358 $026A $01D6 $011E $012D $00B1 $011F $0059 +$01A0 $00E3 $0058 $0040 $0028 $0014 $8000 $8000 + +------------------------------------------------------------- +Delay time calculation: +Choose delay time in range 0-$7f. rXXXX means register $1f80XXXX. + +r1dd4 = dt*64.5 - r1dc0 +r1dd6 = dt*32.5 - r1dc2 + +r1dd8 = r1dda + dt*32.5 +r1de0 = r1de2 + dt*32.5 +r1df4 = r1df8 + dt*32.5 +r1df6 = r1dfa + dt*32.5 + +-------------------------------------------------------------------------- +doomed@c64.org <- corrections/additions latest update -> psx.rules.org +-------------------------------------------------------------------------- + 5/jun/1999 First posting. Far from completion. + +(thanx to ppl in <>) +-------------------------------------------------------------------------- +thanx & hello to the usual. + diff --git a/misc/system.txt b/misc/system.txt @@ -0,0 +1,865 @@ +-------------------------------------------------------------------------- +System Operation +-------------------------------------------------------------------------- + +Introduction +-------------------------------------------------------------------------- +This text covers the usage of the R3000, the system control coprocessor and +hardware registers, the file server and some system calls. + +-------------------------------------------------------------------------- +R3000 +-------------------------------------------------------------------------- +The heart of the psx is a MIPS R3000. The version in the PSX has two +coproccors, (cop0 - System Control Coproccessor, cop2 - GTE), one +multiplier/divider, 32 general registers, one ALU, one shifter, one +address adder, 4kb of Instuction Cache, 1 kb of Data cache and NO floating +point unit. + +Registers +------------------------------------------------------------- +All registers are 32 bits wide. + + 0 zero Constant, always 0 + 1 at Assembler temporary. + 2- 3 v0-v1 Subroutine return values + 4- 7 a0-a3 Subroutine arguments + 8-15 t0-t7 Temporaries, may be changed by subroutines +16-23 s0-s7 Register variables, must be saved by subs. +24-25 t8-t9 Temporaries, may be changed by subroutines +26-27 k0-k1 Reserved for the kernel +28 gp Global pointer +29 sp Stack pointer +30 fp(s8) 9th register variable, subs can use this as a frame + pointer +31 ra Return address + + - pc Program counter + - hi,lo Registers of the multiplier/divider. + +All registers behave the same, remarks are not hardware bound, but general +programming good practice. Respect these for compatability, especially if +you intend to use kernel routines. +Exceptions are register 0, and 31. Zero will always return 0, regardless +of any writing attempts. Ra is used by the normal jal instruction for the +return address. (points to the second instruction after the jal). Note that +the jalr instruction can use any register for the return address, though +usually only register 31 is used. + +The PC is not really a register, and should not be seen like one. Hi, Lo +are the registers which the multiplier/divider returns its results to. +Special instructions are implemented to deal with them. + +------------------------------------------------------------- +Instructions +------------------------------------------------------------- +rt target register (cpu general register 0-31) +rs source register (cpu general register 0-31) +rd destination register (cpu general register 0-31) +base base register (cpu general register 0-31) +imm 16 bit immediate +b? immediate value of ? bits wide. +c0r Coprocessor 0 register +c2d Coprocessor 2 (GTE) data register +c2c Coprocessor 2 (GTE) control register + + +imm(base) means an address of the value in the register + the immediate + value. + +inst instruction name. +d number of instructions to wait before using r1 (target reg). +args format of the operand fields. +desc. description of the instruction. + + +inst d args desc. + +*Load/Store instructions + +lb 1 rt,imm(base) loads lowest byte of rt with addressed byte and + extends sign. +lbu 1 rt,imm(base) loads lowest byte of rt with addressed byte. +lh 1 rt,imm(base) loads lowest halfword of rt with addressed halfword + and extends sign. +lhu 1 rt,imm(base) loads lowest halfword of rt with addressed halfword. +lw 1 rt,imm(base) loads r1 with addressed word. +lwl 0 rt,imm(base) loads high order byte of rt with addressed byte and + then loads up to the low order word boundary into rt. +lwr 0 rt,imm(base) loads low order byte of rt with addressed byte and + then loads up to the high order word boundary into + rt. + + There's no delay for lwl and lwr, so you can use them + directly following eachother. fe. to load a word + anywhere in memory without regard to alignment: + lwl a0,$0003(t0) + lwr a0,$0000(t0) + +sb 1 rt,imm(base) stores lowest byte of rt in addressed byte. +sh 1 rt,imm(base) stores lowest halfword of rt in addressed halfword. +sw 1 rt,imm(base) stores rt in addressed word. +swl 0 rt,imm(base) unaligned store, see lwl +swr 0 rt,imm(base) unaligned store, see lwr + +lui 0 rt,imm loads rt with immediate<<$10 + +*arithmic instructions + +When an arithmic overflow occurs, rd will not be modified. + +add 0 rd,rs,rt Adds rt to rs and stores the result in rd. +addu 0 rd,rs,rt Adds rt to rs, ignores arithmic overflow and stores + result in rd. +sub 0 rd,rs,rt Substracts rt from rs and stores result in rd. +subu 0 rd,rs,rt Substracts rt from rs, ignores arithmic overflow and + stores result in rd. + +addi 0 rd,rs,imm Adds signextended immediate to rs, and stores the + result in rd. +addiu 0 rd,rs,imm Adds signextended immediate to rs, ignores arithmic + overflow and stores the result in rd. + +subi 0 rd,rs,imm Substracts signextended immediate from rs and stores + the result in rd. +subiu 0 rd,rs,imm Substracts signextended immediate from rs, ignores + arithmic overflow, and stores the result in rd. + +mult rs,rt Multiplies rs with rt, and stores the 64 bit sign + extended result in hi/lo. +multu rs,rt Multiplies rs with rt, and stores the 64 bit result + in hi/lo. +div rs,rt Divides rs by rt, and stores the quotient into lo, + and the remainder into high. Results are sign + extended. +divu rs,rt Divides rs by rt, and stores the quotient into lo, + and the remainder into high. + + +*logical instructions + +and 0 rd,rs,rt Performs a bit wise AND between rs and rt, and + stores the result in rd. +or 0 rd,rs,rt Performs a bit wise OR between rs and rt, and + stores the result in rd. +xor 0 rd,rs,rt Performs a bit wise XOR between rs and rt, and + stores the result in rd. +nor 0 rd,rs,rt Performs a bit wise NOR between rs and rt, and + stores the result in rd. + +andi 0 rd,rs,imm Performs a bit wise AND between rs and unsigned + immediate and stores the result in rd. +ori 0 rd,rs,imm Performs a bit wise OR between rs and unsigned + immediate and stores the result in rd. +xori 0 rd,rs,imm Performs a bit wise XOR between rs and unsigned + immediate and stores the result in rd. + +*shifting instructions + +sllv 0 rd,rs,rt Shifts rs rt bits to the left and stores the result + in rd. +srlv 0 rd,rs,rt Shifts rs rt bits to the right and stores the result + in rd. +srav 0 rd,rs,rt Shifts the value in rs rt bits to the right, + preserving sign, and stores the value in rd. + + +sll 0 rd,rs,b5 Shifts rs b5 bits to the left and stores the result + in rd. +srl 0 rd,rs,b5 Shifts rs b5 bits to the right and stores the result + in rd. +sra 0 rd,rs,b5 Shifts rs b5 bits to the right, preserving sign and + stores the result in rd. + +*comparison instructions. + +slt 0 rd,rs,rt rd=1 if rs < rt, else rd = 0 +sltu 0 rd,rs,rt rd=1 if (unsigned)rs <(unsigned)rt, else rd = 0 + +slti 0 rd,rs,imm rd=1 if rs < imm, else rd = 0 +sltiu 0 rd,rs,imm rd=1 if (unsigned)rs < (unsigned)imm, else rd = 0 + +*jumps and branches + +Note the the instruction following the branch will always be executed. + +j target jumps to target +jal target jumps to target and stores pc+8 into RA (second + instruction after the jal instruction) + +jr rd jumps to address in rd +jalr (rt,) rd jumps to address in rd and stores pc+8 into RA, or + in rt. + +beq rs,rt,imm branches to imm if rs == rt +bne rs,rt,imm branches to imm if rs != rt + +bgtz rs,imm branches to imm if rs > 0 +bltz rs,imm branches to imm if rs < 0 +blez rs,imm branches to imm if rs <= 0 +bgez rs,imm branches to imm if rs >= 0 +bltzal rs,imm branches to imm and stores pc+8 into RA if rs < 0 +bgezal rs,imm branches to imm rd and stores pc+8 into RA if rs >= 0 + +*system instructions + +mfhi 2 rd moves HI into rd +mflo 2 rd moves LO into rd +mthi 2 rs moves rs into HI +mtlo 2 rs moves rs into LO + +mtc0 2 rs,c0r moves rs into cop0 register c0r +mfc0 2 rd,c0r moves cop0 register c0r into rd + +mtc2 2 rs,c2d moves rs into cop2 data register c2d +mfc2 2 rd,c2d moves cop2 data register c2d into rd + +ctc2 2 rs,c2c moves rs into cop2 control register c2d +cfc2 2 rd,c2c moves cop2 control register c2d into rd + +lwc2 1 c2d,imm(base) load cop2 data register with addressed word +swc2 1 c2d,imm(base) stores cop2 data register at addressed word + +syscall (b20) generates a system call exception +break (b20) generates a breakpoint exception + the 20bits wide code field is not passed, but + must be read from the instuction itself if you + want to use it. + +cop2 b25 Coprocessor operation is started. b25 is + passed as parameter. + +rfe restores the interrupt enable and kernel + previlege bits. + +tlb instructions see MIPS doc. + +-------------------------------------------------------------------------- +Cop0 - System control coprocessor +-------------------------------------------------------------------------- + +Registers: +# Name rw Desciption. + +------------------------------------------------------------- +16 ERREG +------------------------------------------------------------- +15 PRid r COP0 type and rev level +bit |31 16|15 8|7 0| +desc| |Imp |Rev | + +Imp 3 CP0 type R3000A + 7 IDT unique (3041) use REV to determine correct + config. +Rev Revision level. +------------------------------------------------------------- +14 EPC r Return address from trap + +Contains the return address after an exception. This address is +the instruction at which the exception took place, unless BD is +set in CAUSE, when the instruction is EPC+4. +------------------------------------------------------------- +13 CAUSE r Describes the most recently recognised exception +bit |31|30|29 28|27 26 25 24 23 22 21 20 19 18 17 16| +desc|BD| 0|CE | 0| +bit |15 14 13 12 11 10 09 08|07|06 05 04 03 02|01 00| +desc|Ip | 0|Excode | 0| + +BD Is set when last exception points to the + branch instuction instead of the instruction + in the branch delay slot, where the exception + occurred. +CE Contains the coprocessor number if the exception + occurred because of a coprocessor instuction for + a coprocessor which wasn't enabled in SR. +Ip Interrupt pending field. Bit 8 and 9 are RW, and + contain the last value written to them. As long + as any of the bits are set they will cause an + interrupt if the corresponding bit is set in IM. +Excode Describes what kind of exception occured: +0 INT Interrupt +1 MOD Tlb modification +2 TLBL Tlb load +3 TLBS Tlb store +4 AdEL Address error, load/I-fetch +5 AdES Address error, store + The address errors occur when attempting to read + outside of KUseg in user mode and when the address + is misaligned. +6 IBE Bus error on Instruction fetch. +7 DBE Bus error on Data load. +8 Syscall Generated unconditionally by at syscall instruction +9 BP Breakpoint - break instruction. +10 RI Reserved instruction +11 CpU Coprocessor unusable +12 Ov Arithmic overflow +------------------------------------------------------------- +12 SR rw System status register +bit |31 |30 |29 |28 |27 26|25|24 23|22 |21|20|19|18|17 |16 | +desc|CU3|CU2|CU1|CU0| 0|RE| 0|BEV|TS|PE|CM|PZ|SwC|IsC| + +bit |15 14 13 12 11 10 09 08|07 06|05 |04 |03 |02 |01 |00 | +desc|Im | 0|KUo|IEo|KUp|IEp|KUc|IEc| + +CUx 0 Coprocessor x disabled + 1 Coprocessor x enabled + CU2 is for the GTE, CU1 is for the FPA, which is + not available in the PSX. +CU0 0 Cop0 in kernal mode. + 1 Cop0 in user mode. + Makes some nominally privileged instruction usable + in user mode. Normal instructions are usable regardless + of this bit's setting. +RE 0 Normal 'endianness' + 1 Reverse 'endianness' + Reverses the byte order in which data is stored in + memory. (lo-hi -> hi-lo) +BEV 0 Boot exception vectors in RAM + 1 Boot exception vectors in ROM (kseg1) +TS TLB shutdown. Gets set if a programm address simultaniously + matches 2 TLB entries. +PE Cache parity error. Does not cause exception. +CM Shows the result of the last load operation with the D-cache + isolated. It gets set if the cache really contained data + for the addressed memory location. +PZ When set cache parity bits are written as 0. +Isc 0 Do not isolate cache. + 1 Isolate cache. All load and store operations are targetted + to the Data cache, and never the main memory. +Swc 0 Normal cache mode. + 1 Swapped cache mode. I cache will act as D cache and vice + versa. Use only with Isc to access & invalidate i cache + entries +Im 8 bit interrupt mask fields. When set the corresponding + interrupts are allowed to cause an exception. +KUc 0 User mode privilege , rfe pops KUp here + 1 Kernal mode privilege +IEc 0 Interrupts enabled , rfe pops IUp here + 1 All interrupts disabled. +KUp KUc gets pushed here on an exception, rfe pops KUo here +IUp IUc gets pushed here on an exception, rfe pops IUo here +KUo KUp gets pushed here on an exception +IUo IUp gets pushed here on an exception +------------------------------------------------------------- +11 BPCM rw Execute breakpoint mask. + +Program counter is ANDed with this value and then compared to +the value in BPC. +------------------------------------------------------------- +10 TLBHI/PID +------------------------------------------------------------- +9 BDAM rw Data Access breakpoint mask. + +Data fetch address is ANDed with this value and then compared +to the value in BDA +------------------------------------------------------------- +8 BadVaddr r Bad Virtual Address. + +Contains the address whose reference caused an exception. Set +on any MMU type of exceptions, on references outside of kuseg +and on any misaligned reference. +------------------------------------------------------------- +7 DCIC rw Breakpoint control +|1f 1e 1d 1c|1b|1a|19|18|17|16 15 14 13 12 11 10||0f 00| +| 1 1 1 0| W| R|DA|PC| 1| 0| 0| + +W 0 + 1 Break on Write +R 0 + 1 Break on Read +DA 0 Data access breakpoint disabled + 1 Data access breakpoint enabled +PC 0 Execution breakpoint disabled + 1 Execution breakpoint enabled + +To use the Execution breakpoint, set PC. To use the Data access +breakpoint set DA and either R, W or both. Both breakpoints +can be used simultaniously. When a breakpoint occurs the PSX +jumps to $00000040. +------------------------------------------------------------- +6 PIDMASK +------------------------------------------------------------- +5 BDA rw Breakpoint on data access. + +Sets the breakpoint address for load/store operations +------------------------------------------------------------- +4 CTXT +------------------------------------------------------------- +3 BPC rw Breakpoint on execute. + +Sets the breakpoint address to break on on execute. +------------------------------------------------------------- +2 TLBLO +1 RAND +0 INX + +For TLB details see mips doc. + +-------------------------------------------------------------------------- +PC file server +-------------------------------------------------------------------------- +Caetla supports pcdrv: device, the SN systems device extension to access +files on the drive of the pc. This fileserver can be accessed by using the +kernel functions, with the 'pcdrv:' device name prefix to the filenames or +using the SN system calls. + +------------------------------------------------------------- +SN System calls +------------------------------------------------------------- +The following SN system calls for the fileserver are provided. +Accessed by setting the registers and using the break command +with the specified field. +------------------------------------------------------------- +PCInit Inits the fileserver. +break $0101 +------------------------------------------------------------- +PCCreat Creates a new file on PC. +break $0102 +in: a1 pointer to file name + a2 file attribute +out: v0 0 = success, -1 = failure + v1 file handle or error code if v0 is negative +------------------------------------------------------------- +PCOpen Opens a file on the PC. +break $0103 +in: a1 pointer to file name + a2 access mode 0 read only + 1 write only + 2 r/w +out: v0 0 = succes, -1 = failure + v1 file handle or error code if v0 is negative +------------------------------------------------------------- +PCClose Closes a file on the PC. +break $0104 +in: a1 file handle +out: v0 0 = succes, -1 = failure + v1 0 = succes, error code if v0 is negative +------------------------------------------------------------- +PCRead Reads from an open file on PC. +break $0105 +in: a1 file handle + a2 length in bytes + a3 pointer to store address +out: v0 0 = succes, -1 = failure + v1 number of read bytes or error code if v0 is + negative. + +Note: Does not stop at eof, so if you set more bytes to read + than the filelength, the fileserver will pad with zero + bytes. If you are not sure of the the filelength obtain + the filelength by PClSeek (a2 = 0, a3 = 2, v1 will return + the length of the file, don't forget to reset the file + pointer to the start before calling PCread!) +------------------------------------------------------------- +PCWrite Writes to an open file on PC. +break $0106 +in: a1 file handle + a2 length in bytes + a3 pointer to read address +out: v0 0 = succes, -1 = failure + v1 number of written bytes or error code if v0 + is negative. +------------------------------------------------------------- +PClSeek Repositions the file pointer +break $0107 +in: a1 file handle + a2 number of bytes to move. + a3 position from 0 Beginning of file + 1 Current pointer + 2 End of file +out: v0 0 = succes, -1 = failure + v1 file pointer +------------------------------------------------------------- +Attributes are passed as is. File attributes for the pc file +system are like this: +bit | 7 6| 5| 4| 3| 2| 1| 0| +desc| 0| A| D| 0| S| H| R| + +A Archive file +D Directory +S System file +H Hidden file +R Read only file +------------------------------------------------------------- + + +-------------------------------------------------------------------------- +System calls +-------------------------------------------------------------------------- +Kernel system calls are accessed by loading the call number in t1, and +jumping to the specifeed address. +A0 call $3f means: load t1 with $3f and jump to $000000a0. + +------------------------------------------------------------- +Printf Print string to console. +A0 call $3f +in: a0 Pointer to 0 terminated string. + a1-a3 Arguments. + sp+$10 + +Prints the specified string to the console (ie. pc screen). +String can contain standard C escape sequences and conversion +characters, except the floating point types (%e, %f, %g). +Variables are passed in a1 to a3. More variables are passed at +sp+$10. +------------------------------------------------------------- +openevent adds an event structure to the event table. +B0 call $08 +in: a0 Event class. + a1 Event spec. + a2 Event mode. + a3 Address of function to be executed when + event occurs. +out: v0 Event descriptor, -1 if failed. + +Opens an event, should be called within a critical section. +The return value is used to identify the event to the other +even functions. +A list of event classes, specs and modes is at the end of this +section. +------------------------------------------------------------- +closeevent releases an event structure from the +B0 call $09 event table. +in: a0 Event descriptor. +out: v0 1 on success, 0 if failed. +------------------------------------------------------------- +enableevent Turns on event handling for specified event. +B0 call $0c +in: a0 Event descriptor. +out: v0 1 on success, 0 if failed. +------------------------------------------------------------- +disableevent Turns off event handling for specified event. +B0 call $0d +in: a0 Event descriptor. +out: v0 1 on success, 0 if failed. +------------------------------------------------------------- +open Opens a file for IO. +B0 call $32 +in: a0 File name, terminated with 0 + a1 Access mode +out: v0 File handle, or -1 if error. + +Opens a file on the target device for io. Access mode is set +like this: + +bit 0 1 = Read + 1 1 = Write + 9 1 = New file + 15 1 = Asynchronous mode? + 16-31 Number of memory card blocks for a new file on the + memory card. + +The PSX can have a maximum of 16 files open at any time. +------------------------------------------------------------- +lseek Move the file pointer. +B0 call $33 +in: a0 File handle + a1 Movement offset in bytes + a2 0 = from start of file + 1 = from current file pointer + 2 = Bugs. Should be from end of file. + +Moves the file pointer the number of bytes in a1, relative to +the location specified by a2. Movement from the eof is incorrect. +Also, movement beyond the end of the file is not checked. +------------------------------------------------------------- +read Read data from an open file. +B0 call $34 +in: a0 File Handle + a1 Pointer to address to store read data + a2 Number of bytes to read +out: v0 Number of bytes actually read, -1 if failed. + +Reads the number of bytes from the specified open file. If length +is not specified an error is returned. Read per $0080 bytes from +memory card (bu:) and per $0800 from cdrom (cdrom:). +------------------------------------------------------------- +write Write data to an open file. +B0 call $35 +in: a0 File handle + a1 Pointer to adress to read data from. + a2 Number of bytes to write. +out: v0 Number of bytes written. + +Writes the number of bytes to the specified open file. Write +to the memory card per $0080 bytes. Writing to the cdrom returns 0. +------------------------------------------------------------- +close Close an open file. +B0 call $36 +in: a0 File handle +out: v0 File hande if success, -1 if failed. +------------------------------------------------------------- +cd Change the current directory on target device. +B0 call $40 +in: a0 Pointer to new directory path +out: v0 1 if success, 0 if failed. + +Changes the current directory on target system. +------------------------------------------------------------- +firstfile Finds the first file to match the name. +B0 call $42 +in: a0 Pointer to the file name. + a1 Pointer to direntry structure. +out: v0 0 if unsuccessfull, else same as a1. + +Searches for the first file to match the name in the string +pointed to by a0. Wildcards (?, *) may be used. Start the name +with the device you want to address. (ie. pcdrv:) Different +drives can be accessed as normally by their drive names (a:, c:) +if path is omitted after the device, the current directory will +be used. + +A direntry structure looks like this: + +$00 - $13 db Filename, terminated with 0. +$14 dw File attribute +$18 dw File size +$1c dw Pointer to next direntry +$20 - $27 db Reserved by system +------------------------------------------------------------- +nextfile Searches for the next file to match the name. +B0 call $43 +in: a0 Pointer to direntry structure +out: v0 0 if unsuccesful, else same as a0. + +Uses the settings of a previous firstfile command. +------------------------------------------------------------- +rename Rename a file on target device. +B0 call $44 +in: a0 Pointer to old file name + a1 Pointer to new file name +out: v0 1 if successful, 0 if failed. +------------------------------------------------------------- +delete Delete a file on target device. +B0 call $45 +in: a0 Pointer to file name +out: v0 1 if successful, 0 if failed. +------------------------------------------------------------- + +Event Classes + +The upper byte of each event type, is a descriptor byte, which +identifies the type of event to kernal routines. + +Descriptors: +$ff Thread +$f0 Hardware +$f1 Event +$f2 Root counter +$f3 User event +$f4 BIOS + +Hardware events: +$f0000001 VBLANK +$f0000002 GPU +$f0000003 CDROM Decoder +$f0000004 DMA controller +$f0000005 RTC0 +$f0000006 RTC1 +$f0000007 RTC2 +$f0000008 Controller +$f0000009 SPU +$f000000a PIO +$f000000b SIO +$f0000010 Exception +$f0000011 memory card +$f0000012 memory card +$f0000013 memory card + +Root counter events: +$f2000000 counter 0 (pixel clock) +$f2000001 counter 1 (horizontal retrace) +$f2000002 counter 2 (one-eighth of system clock) +$f2000003 counter 3 (vertical retrace) + +Bios events: +$f4000001 memory card +$f4000002 libmath + +Event Specs: +$0001 counter becomes zero +$0002 interrupted +$0004 end of i/o +$0008 file was closed +$0010 command acknowledged +$0020 command completed +$0040 data ready +$0080 data end +$0100 time out +$0200 unknown command +$0400 end of read buffer +$0800 end of write buffer +$1000 general interrupt +$2000 new device +$4000 system call instruction +$8000 error happned +$8001 previous write error happned +$0301 domain error in libmath +$0302 range error in libmath + +Event modes: +$1000 Handle on interrupt +$2000 Do not handle on interrupt. + +-------------------------------------------------------------------------- +Root Counters +-------------------------------------------------------------------------- +There are 4 root counters. + +Counter Base address Synced to +0 $1f801100 pixelclock +1 $1f801110 horizontal retrace +2 $1f801120 1/8 system clock +3 vertical retrace + +Each have three registers, one with the current value, one with the counter +mode, and one with a target value. + +------------------------------------------------------------- +$11x0 Count r +bit |31 16|15 0| +desc|Garbage |Count | + +Count Current count value, 0-$ffff + +Upper word seems to contain only garbage. +------------------------------------------------------------- +$11x4 Mode rw +bit |31 10|9 |8 |7 |6 |5 |4 |3 | 2 1| 0| +desc|Garbage |Div|Clc| |Iq2| |Iq1|Tar| |En| + +En 0 Counter running + 1 Counter stopped (only counter 2) +Tar 0 Count to $ffff + 1 Count to value in target register +Iq1 Set both for IRQ on target reached. +Iq2 +Clc 0 System clock (it seems) + 1 Pixel clock (counter 0) + Horizontal retrace (counter 1) +Div 0 System clock (it seems) + 1 1/8 * System clock (counter 2) + +When Clc and Div of the counters are zero, they all run at the +same speed. This speed seems to be about 8 times the normal +speed of root counter 2, which is specified as 1/8 the system +clock. + +Bits 10 to 31 seem to contain only garbage. +------------------------------------------------------------- +$11x8 Target rw +bit |31 16|15 0| +desc|Garbage? |Target | + +Target Target value, 0-$ffff + +Upper word seems to contain only garbage. +------------------------------------------------------------- +Quick step-by-step: + +To set up an interrupt using these counters you can do the following: +1 - Reset the counter. (Mode = 0) +2 - Set its target value, set mode. +3 - Enable corresponding bit in the interrupt mask register ($1f801074) + bit 3 = Counter 3 (Vblank) + bit 4 = Counter 0 (System clock) + bit 5 = Counter 1 (Hor retrace) + bit 6 = Counter 2 (Pixel) +4 - Open an event. (Openevent bios call - $b0, $08) + With following arguments: + a0-Rootcounter event descriptor or'd with the counter number. + ($f2000000 - counter 0, $f2000001 - counter 1,$f2000002 - counter 2, + $f2000003 - counter 3) + a1-Spec = $0002 - interrupt event. + a2-Mode = Interrupt handling ($1000) + a3-Pointer to your routine to be excuted. + The return value in V0 is the event identifier. + +5 - Enable the event, with the corresponding bioscall ($b0,$0c) with + the identifier as argument. + +6 - Make sure interrupts are enabled. (Bit 0 and bit 10 of the COP0 status + register must be set.) + +Your handler just has to restore the registers it uses, and it should +terminate with a normal jr ra. + +To turn off the interrupt, first call disable event ($b0, $0d) and then +close it using the Close event call ($b0,$09) both with the event number +as argument. + +-------------------------------------------------------------------------- +DMA +-------------------------------------------------------------------------- + +------------------------------------------------------------- +DPCR Dma control register $1f8010f0 +|1f 1c|1b 18|17 14|13 10|0f 0c|0b 08|07 04|03 00| +| |Dma6 |Dma5 |Dma4 |Dma3 |Dma2 |Dma1 |Dma0 | + +Each register has a 4 bit control block allocated in this +register. +Bit 3: 1= Dma Enabled + 2: ? + 1: ? + 0: ? + +Bit 3 must be set for a channel to operate. +------------------------------------------------------------- +DICR Dma interrupt register $1f8010f4 + +------------------------------------------------------------- +The DMA channel registers are located starting at $1f801080. The +base adress for each channel is: +$1f801080 DMA channel 0 MDECin +$1f801090 DMA channel 1 MDECout +$1f8010a0 DMA channel 2 GPU (lists + image data) +$1f8010b0 DMA channel 3 CDrom +$1f8010c0 DMA channel 4 SPU +$1f8010d0 DMA channel 5 PIO +$1f8010e0 DMA channel 6 OTC (reverse clear OT) + +------------------------------------------------------------- +D_MADR DMA base address. $1f8010x0 +bit |1f 00| +desc|madr | + +madr pointer to the adress the DMA will start reading + from/writing to +------------------------------------------------------------- +D_BCR DMA block control $1f8010x4 +bit |1f 10|0f 00| +desc|ba |bs | + +ba Amount of blocks +bs Blocksize (words) + +The channel will transfer ba blocks of bs words. Take care +not to set the size larger than the buffer of the corresponding +unit can hold. (GPU & SPU both have a $10 word buffer). A +larger blocksize, means a faster transfer. +------------------------------------------------------------- +D_CHCR DMA channel control $1f8010x8 +bit |1f-19|18|17-0c|0b|0a|09|08|07 01|00| +desc| 0|Tr| 0| 0|Li|Co| 0| 0|Dr| + +Tr 0 No DMA transfer busy. + 1 Start DMA transfer/DMA transfer busy. +Li 1 Transfer linked list. (GPU only) +Co 1 Transfer continous stream of data. +Dr 0 direction to memory + 1 direction from memory +------------------------------------------------------------- + +-------------------------------------------------------------------------- +doomed@c64.org <- corrections/additions latest update -> psx.rules.org +-------------------------------------------------------------------------- +16/may/1999 Initial version. +19/may/1999 Added Breakpoint info. <Herozero> + 3/jun/1999 Root counters, some stuff on events and DMA added. + +(thanx to ppl in <>) +-------------------------------------------------------------------------- +thanx & hello to the usual. + + diff --git a/ntani.txt b/ntani.txt @@ -0,0 +1,4 @@ +ηθελα να σου πω οτι οτι ειπα δεν ηταν αληθεια και δεν το εννουσα +αλιμονο αν κατι τετοιο δεν το λαχταρουσα +συγγνωμη μονο μπορω να πω και σε παρακαλω πιστεψε με μετανοω +σε περιμενω την παρασκευη σαν τρελος να σε ειδω diff --git a/src/bios.c b/src/bios.c @@ -7,12 +7,12 @@ BIOS* BIOS_new(const char* path) { BIOS *b; - long pos; - FILE* f; + long pos; + FILE* f; f = fopen(path, "rb"); if (f == NULL) - { + { perror("ERROR"); exit(EXIT_FAILURE); } @@ -22,12 +22,12 @@ BIOS_new(const char* path) pos = ftell(f); /* If not 512KB then exit */ if (pos != 512*1024) - { + { fprintf(stderr, "INVALID BIOS_SIZE\n"); exit(1); } fseek(f, 0, SEEK_SET); - + b = (BIOS*)malloc(sizeof(BIOS)); b->data = (unsigned char*)malloc(sizeof(unsigned char)*pos); fread(b->data, 1, pos, f); diff --git a/src/cdrom.c b/src/cdrom.c @@ -1,10 +1,14 @@ /* CDRom Drive */ -#include "cdrom.h" -#include "util.h" #include <stdlib.h> #include <string.h> #include <stdio.h> +#include "cdrom.h" +#include "util.h" +#include "interconnect.h" + +extern Interconnect* inter; + cdrom* cdrom_new(void) { @@ -14,13 +18,13 @@ cdrom_new(void) } u8 -cdrom_fifo_is_empty(fifo fifo) +cdrom_fifo_empty(fifo fifo) { return fifo.write_idx == fifo.read_idx; } u8 -cdrom_fifo_is_full(fifo fifo) +cdrom_fifo_full(fifo fifo) { return fifo.write_idx == (fifo.read_idx ^ 0x10); } @@ -34,46 +38,45 @@ cdrom_store8(cdrom* cd, u32 off, u8 val) void cdrom_write(cdrom* cd, u32 offset, u8 val) { - //printf("**THE OFFSET IS %08X**\n", offset); - //printf("**THE VAL IS %d**\n", val); - u8 idx; + printf("CDROM_WRITE: THE OFFSET IS %08X\n", offset); + printf("CDROM_WRITE: THE VAL IS %d\n", val); + u8 idx; idx = cd->status & STATUS_INDEX_MASK; switch (offset) { case 0: - cd->status = (cd->status & (~3)) | (val & 3); + cd->status = val & 3; + break; + case 1: + fprintf(stderr, "offset 1\n"); + fprintf(stderr, "cd->status: %08X\n", cd->status); + + cdrom_exec_cmd(cd, val); + break; + case 2: + cdrom_irq_write_mask(cd, val); + break; + case 3: + switch (idx) { + case 1: + cdrom_irq_ack(cd, val & 0x1f); + + if ((val & 0x40) != 0) + memset(&cd->host_params, 0, sizeof(fifo)); + + break; + default: + fprintf(stderr, "cdrom_write: Unimplemented write on offset %08X and index %08X\n", offset, idx); + exit(EXIT_FAILURE); + } break; - case 1: - fprintf(stderr, "offset 1\n"); - break; - case 2: - fprintf(stderr, "offset 2\n"); - break; - case 3: - switch (idx) { - case 1: - //self.irq_ack(shared, val & 0x1f); - - //if val & 0x40 != 0 { - // self.host_params.clear(); - //} - - //if val & 0xa0 != 0 { - // panic!("Unhandled CDROM 3.1: {:02x}", val); - //} - break; - default: - fprintf(stderr, "cdrom_write: Unimplemented write on offset %08X and index %08X\n", offset, idx); - exit(EXIT_FAILURE); - } - break; default: break; } - return; + return; } u8 @@ -83,26 +86,66 @@ cdrom_load(cdrom* cd, u32 offset) switch (offset) { case 0: - return cd->status; + return cdrom_status(cd); default: break; } + return 0; } void -cdrom_command(cdrom* cd, u8 com) +cdrom_exec_cmd(cdrom* cd, u8 cmd) { // u32 ret; - switch ((cdrom_cmd)com) + switch ((cdrom_cmd)cmd) { case CDROM_CMD_GETSTAT: - cd->status = (cd->status & STATUS_INDEX_MASK); + cd->status = cdrom_status(cd); + break; + case CDROM_CMD_TEST: + fprintf(stderr, "Not implemented TEST COMMAND"); break; default: + fprintf(stderr, "ERR: Unimplemented command -> %02X\n", cmd); break; } return; } + +void +cdrom_irq_ack(cdrom* cd, u8 val) +{ + // TODO + cd->irq_flags &= val; +} + +void +cdrom_irq_write_mask(cdrom* cd, u8 val) +{ + if ((val & 0x18) != 0) + fprintf(stderr, "WARNING: Unhandled IRQ Mask: %02X\n", val); + + cd->irq_mask = val & 0x1f; +} + +u8 +cdrom_status(cdrom* cd) +{ + u8 S = cd->status; + + S |= 0 << 2; + + S |= cdrom_fifo_empty(cd->host_params) << 3; + S |= !cdrom_fifo_full(cd->host_params) << 4; + S |= !cdrom_fifo_full(cd->host_response) << 5; + + S |= (cd->rx_index < cd->rx_len) << 6; + + // BLOCKING + S |= 0 << 7;/* cd-rom->sub_cpu.busy() << 7; */ + + return S; +} diff --git a/src/cdrom.h b/src/cdrom.h @@ -1,5 +1,4 @@ #pragma once - #include "types.h" enum { @@ -20,7 +19,7 @@ enum { cdrom_audio_channels = 2, }; -typedef struct fifo { +typedef struct { // Data buffer u8 buffer[16]; // Write pointer (4bits + carry) @@ -30,9 +29,11 @@ typedef struct fifo { } fifo; typedef enum { - CDROM_CMD_SYNC = 0X00, - CDROM_CMD_GETSTAT = 0X01, - CDROM_CMD_INIT = 0XA, + CDROM_CMD_SYNC = 0x00, + CDROM_CMD_GETSTAT = 0x01, + CDROM_CMD_SETLOC = 0x02, + CDROM_CMD_TEST = 0x19, + CDROM_CMD_INIT = 0xA, } cdrom_cmd; #define STATUS_INDEX_MASK 0x03 @@ -43,16 +44,32 @@ typedef enum { #define STATUS_DRQSTS_MASK 0x40 #define STATUS_BUSYSTS_MASK 0x80 +typedef enum { + CDROM_IRQ_NOINTR, /* No interrupt */ + CDROM_IRQ_DATAREADY, /* Data Read */ + CDROM_IRQ_ACKNOWLEGE, /* Command Complete */ + CDROM_IRQ_COMPLETE, /* Acknowledge */ + CDROM_IRQ_DATAEND, /* End of data detected */ + CDROM_IRQ_DISKERROR /* Error detected */ +} cdrom_irq_type; + +/* 4 memory-mapped registers. + First of 'em has an index that dictates how the others behave. +*/ typedef struct cdrom { u8 status; /* Status Register */ - /// Command parameter FIFO + + /* Command parameter FIFO */ fifo host_params; fifo host_response; - u8 command; + cdrom_cmd command; u8 irq_flags; u8 irq_mask; + u8 rx_index; + u8 rx_len; + u8 sb[2340]; u32 SB_in; } cdrom; @@ -68,9 +85,12 @@ u8 cdrom_load(cdrom*, u32); void cdrom_load8(cdrom*, u8); void cdrom_load16(cdrom*, u16); void cdrom_load32(cdrom*, u32); +u8 cdrom_status(cdrom*); -u8 cdrom_fifo_is_empty(fifo); -u8 cdrom_fifo_is_full(fifo); +u8 cdrom_fifo_empty(fifo); +u8 cdrom_fifo_full(fifo); u8 cdrom_fifo_push(void); -void cdrom_command(cdrom*, u8); +void cdrom_exec_cmd(cdrom*, u8); +void cdrom_irq_ack(cdrom*, u8); +void cdrom_irq_write_mask(cdrom*, u8); diff --git a/src/interconnect.c b/src/interconnect.c @@ -18,6 +18,7 @@ new_interconnect(void) { inter->dma = DMA_new(); inter->gpu = GPU_new(); inter->cdrom = cdrom_new(); + inter->irq = irq_new(); return inter; } @@ -54,12 +55,11 @@ INTER_load8(Interconnect* inter, u32 addr) return 0; } -// contains = UTIL_contains(CDROM_START, CDROM_SIZE, abs_addr, &offset); -// if (contains) -// { -// cdrom_load(inter->cdrom, offset); -// return 0; -// } + contains = UTIL_contains(CDROM_START, CDROM_SIZE, abs_addr, &offset); + if (contains) + { + return cdrom_load(inter->cdrom, offset); + } fprintf(stderr, "Unhandled Load8 At Address %08X\n", addr); exit(EXIT_FAILURE); diff --git a/src/interconnect.h b/src/interconnect.h @@ -5,6 +5,7 @@ #include "gpu.h" #include "types.h" #include "cdrom.h" +#include "irq.h" struct Interconnect { BIOS* bios; @@ -12,6 +13,7 @@ struct Interconnect { DMA* dma; GPU* gpu; cdrom* cdrom; + irq irq; }; typedef struct Interconnect Interconnect; diff --git a/src/irq.c b/src/irq.c @@ -7,7 +7,7 @@ irq_new(void) { irq i; i.status = 0; - i.mask = 0; + i.mask = 99; return i; } @@ -18,7 +18,10 @@ irq_write(irq* i, u32 a, u32 v) //printf("[IRQ] Write: 0x%08x 0x%08x --- PAD TEMP\n", A, V); - if(a & 4) i->mask = v; else i->status &= v; + if(a & 4) + i->mask = v; + else + i->status &= v; return; } @@ -28,7 +31,7 @@ irq_load(irq* i, u32 a) { u32 ret = 0; - if(a & 4) + if (a & 4) ret = i->mask; else ret = i->status; diff --git a/src/main.c b/src/main.c @@ -1,8 +1,5 @@ #include <stdlib.h> #include <SDL2/SDL.h> -#include <lua.h> -#include <lualib.h> -#include <lauxlib.h> #include "cpu.h" #include "interconnect.h" @@ -10,46 +7,64 @@ #include "mem.h" #include "gpu.h" #include "sr.h" +#include "MiniFB.h" SDL_Event ev; +Interconnect *inter; + +int WINDOW_STATE; + +void +keyboard(struct mfb_window *window, mfb_key key, mfb_key_mod mod, bool isPressed) +{ + if (key == KB_KEY_ESCAPE) + exit(EXIT_FAILURE); + if (key == KB_KEY_A) + fprintf(stderr, "The value of OFFSET is: %08X", inter->cdrom->status & 3); +} + int main(int argc, char **argv) { int c; REN *ren; CPU *cpu; - Interconnect *inter; //lua_State *L = luaL_newstate(); //luaL_openlibs(L); - SDL_Init(SDL_INIT_VIDEO); + //SDL_Init(SDL_INIT_VIDEO); + //SDL_SetRenderDrawColor(ren->renderer, 0xff, 0xff, 0xff, 0xff); + //SDL_RenderClear(ren->renderer); + //SDL_RenderPresent(ren->renderer); + inter = new_interconnect(); cpu = new_cpu(inter); ren = inter->gpu->ren; - SDL_SetRenderDrawColor(ren->renderer, 0xff, 0xff, 0xff, 0xff); - SDL_RenderClear(ren->renderer); - SDL_RenderPresent(ren->renderer); + + mfb_set_keyboard_callback(inter->gpu->ren->window, keyboard); while(1) { + //while(mfb_wait_sync(inter->gpu->ren->window)) { /* Because it's too slow to run events every instr */ for (c = 0; c < 1e5; c++) CPU_run_next_instruction(cpu); - while(SDL_PollEvent(&ev) != 0) { - switch(ev.type) { - case SDL_QUIT: - SDL_Quit(); - exit(1); - case SDL_KEYDOWN: - if (ev.key.keysym.sym == SDLK_q) { - SDL_Quit(); - exit(1); - } else if (ev.key.keysym.sym == SDLK_a) { - fprintf(stderr, "The data %d\n;", inter->cdrom->host_params.buffer[0]); - break; - } - } - } + if (WINDOW_STATE < 0) + break; + + + //while(SDL_PollEvent(&ev) != 0) { + // switch(ev.type) { + // case SDL_QUIT: + // SDL_Quit(); + // exit(1); + // case SDL_KEYDOWN: + // if (ev.key.keysym.sym == SDLK_q) { + // SDL_Quit(); + // exit(1); + // } + // } + //} } free(inter->bios->data); @@ -64,7 +79,7 @@ main(int argc, char **argv) //lua_close(L); - SDL_Quit(); + //SDL_Quit(); return 0; } diff --git a/src/sr.c b/src/sr.c @@ -9,70 +9,72 @@ #include "sr.h" #include "defs.h" #include "util.h" +#include "MiniFB.h" ivec2 POSITION_from_gp0(u32 val) { - ivec2 pos; - pos.x = (i16)val; - pos.y = (i16)(val >> 16); + ivec2 pos; + pos.x = (i16)val; + pos.y = (i16)(val >> 16); - return pos; + return pos; } C COLOR_from_gp0(u32 val) { - C c; - c.r = (u8)val; - c.g = (u8)(val >> 8); - c.b = (u8)(val >> 16); - return c; + C c; + c.r = (u8)val; + c.g = (u8)(val >> 8); + c.b = (u8)(val >> 16); + return c; } void FB_flip_vert(u32 *data) { - u64 bytes_per_line; u32 *line; i32 half, j; - - bytes_per_line = W; - line = (u32 *)malloc(bytes_per_line * sizeof(u32)); - half = H>>1; - - for (j=0; j<half; j++) { - u64 l1 = j*bytes_per_line; - u64 l2 = (H-1-j)*bytes_per_line; - memmove((void *)line, (void *)(data+l1), bytes_per_line* sizeof(u32)); - memmove((void *)(data+l1), (void *)(data+l2), bytes_per_line* sizeof(u32)); - memmove((void *)(data+l2), (void *)line, bytes_per_line* sizeof(u32)); - } - free(line); + u64 bytes_per_line; u32 *line; i32 half, j; + + bytes_per_line = W; + line = (u32 *)malloc(bytes_per_line * sizeof(u32)); + half = H>>1; + + for (j=0; j<half; j++) { + u64 l1 = j*bytes_per_line; + u64 l2 = (H-1-j)*bytes_per_line; + memmove((void *)line, (void *)(data+l1), bytes_per_line* sizeof(u32)); + memmove((void *)(data+l1), (void *)(data+l2), bytes_per_line* sizeof(u32)); + memmove((void *)(data+l2), (void *)line, bytes_per_line* sizeof(u32)); + } + free(line); } C C_new(u32 b) { - C c; - c.r = (u8)(b & 0xff); - c.g = (u8)((b >> 8) & 0xff); - c.b = (u8)((b >> 16) & 0xff); - return c; + C c; + c.r = (u8)(b & 0xff); + c.g = (u8)((b >> 8) & 0xff); + c.b = (u8)((b >> 16) & 0xff); + return c; } -void REN_FB_set(REN* ren, i32 x, i32 y, u8 r, u8 g, u8 b) { +void +REN_FB_set(REN* ren, i32 x, i32 y, u8 r, u8 g, u8 b) { - u32 *fb; + u32 *fb; - if (!ren->fb || x < 0 || y < 0 || x >= W || y >= H) return; + if (!ren->fb || x < 0 || y < 0 || x >= W || y >= H) return; - // Clamp color values - r = r > 255 ? 255 : r; - g = g > 255 ? 255 : g; - b = b > 255 ? 255 : b; + // Clamp color values + r = r > 255 ? 255 : r; + g = g > 255 ? 255 : g; + b = b > 255 ? 255 : b; - // Direct write instead of memcpy - fb = ren->fb + (x + y * W); - *fb = r | (g << 8) | (b << 16); + // Direct write instead of memcpy + fb = ren->fb + (x + y * W); + *fb = r | (g << 8) | (b << 16); } //C* @@ -87,9 +89,10 @@ REN_new(void) { REN* ren; ren = (REN*)malloc(sizeof(REN)); - ren->window = SDL_CreateWindow("Ultimecia", 400 , 300, WIN_W, WIN_H, SDL_WINDOW_HIDDEN); - ren->renderer = SDL_CreateRenderer(ren->window, -1, 0); - ren->tex = SDL_CreateTexture(ren->renderer, SDL_PIXELFORMAT_RGB888, SDL_TEXTUREACCESS_STREAMING, W, H); + ren->window = mfb_open_ex("my display", 800, 600, WF_RESIZABLE | WF_ALWAYS_ON_TOP); + //ren->window = SDL_CreateWindow("Ultimecia", 400 , 300, WIN_W, WIN_H, SDL_WINDOW_HIDDEN); + //ren->renderer = SDL_CreateRenderer(ren->window, -1, 0); + //ren->tex = SDL_CreateTexture(ren->renderer, SDL_PIXELFORMAT_RGB888, SDL_TEXTUREACCESS_STREAMING, W, H); ren->verts = (ivec2*)malloc(sizeof(ivec2) * 10000); // Single allocation with larger size ren->colors = (C*)malloc(sizeof(C) * 10000); // Single allocation with larger size ren->fb = (u32*)malloc(W*H*sizeof(u32)); @@ -161,7 +164,7 @@ REN_triangle(REN* ren, ivec2 verts[3], C colors[3]) for (i32 y = verts[0].y; y < verts[1].y; y++) { draw_scanline(ren, y, xL >> 16, (C){(u8)(rL >> 16), (u8)(gL >> 16), (u8)(bL >> 16)}, - xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)}); + xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)}); xL += dx01; rL += dr01; gL += dg01; bL += db01; xR += dx02; rR += dr02; gR += dg02; bR += db02; } @@ -170,7 +173,7 @@ REN_triangle(REN* ren, ivec2 verts[3], C colors[3]) xL = verts[1].x << 16, rL = colors[1].r << 16, gL = colors[1].g << 16, bL = colors[1].b << 16; for (i32 y = verts[1].y; y < verts[2].y; y++) { draw_scanline(ren, y, xL >> 16, (C){(u8)(rL >> 16), (u8)(gL >> 16), (u8)(bL >> 16)}, - xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)}); + xR >> 16, (C){(u8)(rR >> 16), (u8)(gR >> 16), (u8)(bR >> 16)}); xL += dx12; rL += dr12; gL += dg12; bL += db12; xR += dx02; rR += dr02; gR += dg02; bR += db02; } @@ -179,60 +182,61 @@ REN_triangle(REN* ren, ivec2 verts[3], C colors[3]) void REN_push_triangle(REN* ren, ivec2 verts[3], C colors[3]) { - u8 i; + u8 i; - REN_flush(ren); + REN_flush(ren); - for (i = 0; i < 3; i++) { - ren->verts[ren->nvertices] = verts[i]; - ren->colors[ren->nvertices] = colors[i]; - ren->nvertices++; - } + for (i = 0; i < 3; i++) { + ren->verts[ren->nvertices] = verts[i]; + ren->colors[ren->nvertices] = colors[i]; + ren->nvertices++; + } } void REN_push_quad(REN* ren, ivec2 verts[4], C colors[4]) { - u8 i; - - REN_flush(ren); - - // First triangle: vertices 0,1,2 - for (i = 0; i < 3; i++) { - ren->verts[ren->nvertices] = verts[i]; - ren->colors[ren->nvertices] = colors[i]; - ren->nvertices++; - } - for (i = 1; i < 4; i++) { - ren->verts[ren->nvertices] = verts[i]; - ren->colors[ren->nvertices] = colors[i]; - ren->nvertices++; - } + u8 i; + + REN_flush(ren); + + // First triangle: vertices 0,1,2 + for (i = 0; i < 3; i++) { + ren->verts[ren->nvertices] = verts[i]; + ren->colors[ren->nvertices] = colors[i]; + ren->nvertices++; + } + for (i = 1; i < 4; i++) { + ren->verts[ren->nvertices] = verts[i]; + ren->colors[ren->nvertices] = colors[i]; + ren->nvertices++; + } } void REN_flush(REN* ren) { - u32 i; - for (i = 0; i < ren->nvertices; i += 3) - REN_triangle(ren, ren->verts + i, ren->colors + i); - ren->nvertices = 0; // Reset buffer for next frame + u32 i; + for (i = 0; i < ren->nvertices; i += 3) + REN_triangle(ren, ren->verts + i, ren->colors + i); + ren->nvertices = 0; // Reset buffer for next frame } void REN_draw(REN* ren) { - SDL_UpdateTexture(ren->tex, NULL, ren->fb, W * sizeof(u32)); - SDL_RenderCopy(ren->renderer, ren->tex, NULL, NULL); + //SDL_UpdateTexture(ren->tex, NULL, ren->fb, W * sizeof(u32)); + //SDL_RenderCopy(ren->renderer, ren->tex, NULL, NULL); + mfb_update_ex(ren->window, ren->fb, W , H); } void REN_display(REN* ren) { - // Flush any remaining vertices before displaying - if (ren->nvertices > 0) { - REN_flush(ren); - } + // Flush any remaining vertices before displaying + if (ren->nvertices > 0) { + REN_flush(ren); + } - REN_draw(ren); - SDL_RenderPresent(ren->renderer); + REN_draw(ren); + //SDL_RenderPresent(ren->renderer); } diff --git a/src/sr.h b/src/sr.h @@ -21,7 +21,8 @@ typedef struct { double x, y, z; } vec3f; enum mop {ADD, SUB, MUL, DIV}; typedef struct _RENDERER { - SDL_Window* window; + struct mfb_window* window; + //SDL_Window* window; SDL_Texture* tex; SDL_Renderer* renderer; ivec2* verts; diff --git a/src/time.c b/src/time.c diff --git a/src/time.h b/src/time.h diff --git a/test.cc b/test.cc @@ -14,7 +14,6 @@ main() uint8_t value = 1; int wow = 3; - cout << bitset<8>(status_bits) << endl; cout << bitset<8>(0x1f) << endl; cout << bitset<8>(~0x1f) << endl;