diff --git a/CMakeLists.txt b/CMakeLists.txt index 68f050da..0c56a4e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,7 @@ set(REDREAM_SOURCES src/core/interval_tree.c src/core/list.c src/core/log.c + src/core/mm_heap.c src/core/option.c src/core/profiler.c src/core/rb_tree.c @@ -160,22 +161,20 @@ set(REDREAM_SOURCES src/hw/memory.c src/hw/scheduler.c src/jit/backend/x64/x64_backend.cc - src/jit/backend/x64/x64_disassembler.cc - src/jit/backend/x64/x64_emitter.cc - src/jit/frontend/sh4/sh4_analyzer.cc - src/jit/frontend/sh4/sh4_builder.cc - src/jit/frontend/sh4/sh4_disassembler.cc - src/jit/frontend/sh4/sh4_frontend.cc - src/jit/ir/ir_builder.cc - src/jit/ir/ir_reader.cc - src/jit/ir/ir_writer.cc + src/jit/backend/x64/x64_disassembler.c + src/jit/frontend/sh4/sh4_analyze.c + src/jit/frontend/sh4/sh4_disasm.c + src/jit/frontend/sh4/sh4_frontend.c + src/jit/frontend/sh4/sh4_translate.c + src/jit/ir/ir.c + src/jit/ir/ir_read.c + src/jit/ir/ir_write.c #src/jit/ir/passes/constant_propagation_pass.cc - src/jit/ir/passes/conversion_elimination_pass.cc - src/jit/ir/passes/dead_code_elimination_pass.cc - src/jit/ir/passes/load_store_elimination_pass.cc - src/jit/ir/passes/pass_runner.cc - src/jit/ir/passes/pass_stats.cc - src/jit/ir/passes/register_allocation_pass.cc + src/jit/ir/passes/conversion_elimination_pass.c + src/jit/ir/passes/dead_code_elimination_pass.c + src/jit/ir/passes/load_store_elimination_pass.c + src/jit/ir/passes/pass_stat.c + src/jit/ir/passes/register_allocation_pass.c src/renderer/gl_backend.c src/sys/exception_handler.c src/sys/filesystem.c @@ -379,8 +378,8 @@ set(RETEST_SOURCES #test/test_interval_tree.cc #test/test_intrusive_list.cc test/test_list.cc - #test/test_dead_code_elimination_pass.cc - #test/test_load_store_elimination_pass.cc + test/test_dead_code_elimination_pass.cc + test/test_load_store_elimination_pass.cc #test/test_minmax_heap.cc test/test_sh4.cc ${asm_inc}) diff --git a/src/core/arena.h b/src/core/arena.h deleted file mode 100644 index a962d934..00000000 --- a/src/core/arena.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef ARENA_H -#define ARENA_H - -#include -#include -#include "core/assert.h" - -namespace re { - -class Arena { - struct Chunk { - Chunk(int capacity) : capacity(capacity), head(0), next(nullptr) { - buffer = (uint8_t *)malloc(capacity); - } - - ~Chunk() { - free(buffer); - } - - int capacity; - uint8_t *buffer; - int head; - Chunk *next; - }; - - public: - Arena(int chunk_size) - : chunk_size_(chunk_size), root_chunk_(nullptr), current_chunk_(nullptr) { - current_chunk_ = root_chunk_ = new Chunk(chunk_size_); - } - - ~Arena() { - Chunk *chunk = root_chunk_; - - while (chunk) { - Chunk *next = chunk->next; - delete chunk; - chunk = next; - } - } - - void *Alloc(int bytes) { - CHECK_LE(bytes, chunk_size_, - "Allocation of %zu bytes is greater than chunk size of %zu bytes", - bytes, chunk_size_); - - // alloc the next chunk if we're out of capacity - if ((current_chunk_->capacity - current_chunk_->head) < bytes) { - Chunk *next = current_chunk_->next; - if (!next) { - next = new Chunk(chunk_size_); - current_chunk_ = next; - } - current_chunk_ = next; - } - - void *ptr = current_chunk_->buffer + current_chunk_->head; - current_chunk_->head += bytes; - return ptr; - } - - template - T *Alloc() { - return (T *)Alloc(sizeof(T)); - } - - void Reset() { - current_chunk_ = root_chunk_; - current_chunk_->head = 0; - } - - private: - int chunk_size_; - Chunk *root_chunk_, *current_chunk_; -}; -} - -#endif diff --git a/src/core/array.h b/src/core/array.h deleted file mode 100644 index dc36131e..00000000 --- a/src/core/array.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef ARRAY_H -#define ARRAY_H - -#include -#include - -namespace re { - -template -class array { - public: - array(int size = 8) : data_(nullptr), size_(0), capacity_(0) { - Resize(size); - } - ~array() { - free(data_); - } - - array(array const &) = delete; - void operator=(array const &) = delete; - - T &operator[](int i) { - return data_[i]; - } - T operator[](int i) const { - return data_[i]; - } - - T *data() { - return data_; - } - const T *data() const { - return data_; - } - - T &front() { - return data_[0]; - } - T &back() { - return data_[size_ - 1]; - } - - int size() const { - return size_; - } - bool empty() const { - return !!size_; - } - int capacity() const { - return capacity_; - } - - void Resize(int size) { - Reserve(size); - size_ = size; - } - - void Reserve(int cap) { - if (capacity_ >= cap) { - return; - } - - // grow capacity to be >= cap - if (!capacity_) { - capacity_ = 1; - } - while (capacity_ < cap) { - capacity_ *= 2; - } - - data_ = reinterpret_cast(realloc(data_, capacity_ * sizeof(T))); - } - - void Clear() { - size_ = 0; - } - - void PushBack(T v) { - data_[size_++] = v; - } - - void PopBack() { - size_--; - } - - private: - T *data_; - int size_; - int capacity_; -}; -} - -#endif diff --git a/src/core/core.h b/src/core/core.h index 3287f9e9..325ac378 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -6,22 +6,29 @@ #ifdef __cplusplus -#define container_of(ptr, type, member) \ - ({ \ - const decltype(((type*)0)->member)* __mptr = (ptr); \ - (type*)((char*)__mptr - offsetof(type, member)); \ - }) +#include + +#define TYPEOF(n) typename std::remove_reference::type #else -#define container_of(ptr, type, member) \ - ({ \ - const __typeof__(((type*)0)->member)* __mptr = (ptr); \ - (type*)((char*)__mptr - offsetof(type, member)); \ - }) +#define TYPEOF(n) __typeof__(n) #endif +#define SWAP(a, b) \ + do { \ + TYPEOF(a) tmp = (a); \ + (a) = (b); \ + (b) = tmp; \ + } while (0) + +#define container_of(ptr, type, member) \ + ({ \ + const TYPEOF(((type*)0)->member)* __mptr = (ptr); \ + (type*)((char*)__mptr - offsetof(type, member)); \ + }) + #define array_size(arr) (sizeof(arr) / sizeof((arr)[0])) #define array_resize(arr, new_size) \ diff --git a/src/core/delegate.h b/src/core/delegate.h deleted file mode 100644 index 332c4fc3..00000000 --- a/src/core/delegate.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef DELEGATE_H -#define DELEGATE_H - -#include -#include -#include "core/assert.h" - -namespace re { - -template -class delegate; - -template -class delegate { - typedef R (*thunk_type)(void *, A...); - - template - struct const_member_data { - T *callee; - R (T::*func)(A...) const; - }; - - template - struct member_data { - T *callee; - R (T::*func)(A...); - }; - - typedef R (*func_data)(A...); - - public: - delegate() : thunk_(nullptr), data_() {} - - delegate(std::nullptr_t) : delegate() {} - - template - delegate(T *callee, std::nullptr_t) - : delegate() {} - - template - delegate(T *callee, R (T::*func)(A...) const) - : delegate() { - static_assert(sizeof(const_member_data) < sizeof(data_), - "data not large enough to hold member function pointer"); - - thunk_ = reinterpret_cast(&const_member_thunk); - - *reinterpret_cast *>(data_) = {callee, func}; - } - - template - delegate(T *callee, R (T::*func)(A...)) - : delegate() { - static_assert(sizeof(member_data) < sizeof(data_), - "data not large enough to hold member function pointer"); - - thunk_ = reinterpret_cast(&member_thunk); - - *reinterpret_cast *>(data_) = {callee, func}; - } - - delegate(R (*func)(A...)) : delegate() { - thunk_ = reinterpret_cast(&func_thunk); - - *reinterpret_cast(data_) = func; - } - - operator bool() const { - return !!thunk_; - } - - bool operator==(const delegate &rhs) const noexcept { - return (thunk_ == rhs.thunk_) && !memcmp(data_, rhs.data_, sizeof(data_)); - } - - bool operator!=(const delegate &rhs) const noexcept { - return !operator==(rhs); - } - - R operator()(A... args) { - DCHECK(thunk_); - return thunk_(data_, args...); - } - - private: - template - static R const_member_thunk(const_member_data *data, A... args) { - DCHECK(data->callee && data->func); - return (data->callee->*data->func)(args...); - } - - template - static R member_thunk(member_data *data, A... args) { - DCHECK(data->callee && data->func); - return (data->callee->*data->func)(args...); - } - - static R func_thunk(func_data *data, A... args) { - DCHECK(data); - return (*data)(args...); - } - - thunk_type thunk_; - uint8_t data_[32]; -}; - -template -delegate make_delegate(R (T::*func)(A...) const, T *callee) { - return delegate(callee, func); -} - -template -delegate make_delegate(R (T::*func)(A...), T *callee) { - return delegate(callee, func); -} -template -delegate make_delegate(R (*func)(A...)) { - return delegate(func); -} -} - -#endif diff --git a/src/core/intrusive_list.h b/src/core/intrusive_list.h deleted file mode 100644 index a4363ddb..00000000 --- a/src/core/intrusive_list.h +++ /dev/null @@ -1,314 +0,0 @@ -#ifndef INTRUSIVE_LIST_H -#define INTRUSIVE_LIST_H - -#include -#include -#include -#include "core/assert.h" - -namespace re { - -// Objects are directly stored in the instrusive container, not copies. Due to -// this, the lifetime of the object is not bound to the container. It's up to -// the caller to manage the lifetime of the object being stored. -template -class IntrusiveListNode { - template - friend class IntrusiveList; - - public: - IntrusiveListNode() : prev_(nullptr), next_(nullptr) {} - - T *prev() { - return prev_; - } - const T *prev() const { - return prev_; - } - - T *next() { - return next_; - } - const T *next() const { - return next_; - } - - private: - T *prev_; - T *next_; -}; - -template -class IntrusiveList { - // For the iterator, remember that a C++ iterator's range is [begin, end), - // meaning the end iterator will be wrapping an invalid node. - template - class shared_iterator - : public std::iterator { - friend class IntrusiveList; - - typedef shared_iterator self_type; - typedef typename std::conditional::type list_pointer; - typedef typename std::conditional::type - pointer; - - static const intptr_t sentinel_end = 0xdeadbeef; - - public: - // FIXME Can some of these nasty conditionals be removed? - // is_reverse_iterator is known at compile time - - self_type &operator++() { - node_ = is_reverse_iterator ? node_->prev() : node_->next(); - // if we've reached the end of the list, move onto the sentinel node - if (!node_) { - node_ = reinterpret_cast(sentinel_end); - } - return *this; - } - - self_type operator++(int) { - self_type old = *this; - ++(*this); - return old; - } - - self_type &operator--() { - // if we're at the sentinel node, the previous node is the list's tail - if (node_ == reinterpret_cast(sentinel_end)) { - node_ = is_reverse_iterator ? list_->head() : list_->tail(); - } else { - node_ = is_reverse_iterator ? node_->next() : node_->prev(); - } - return *this; - } - - self_type operator--(int) { - self_type old = *this; - --(*this); - return old; - } - - pointer operator*() { - return node_; - } - - pointer operator->() { - return node_; - } - - bool operator==(const self_type &other) const { - return node_ == other.node_; - } - - bool operator!=(const self_type &other) const { - return !(other == *this); - } - - private: - shared_iterator(list_pointer list, pointer node) - : list_(list), - node_(node ? node : reinterpret_cast(sentinel_end)) {} - - list_pointer list_; - pointer node_; - }; - - public: - typedef shared_iterator iterator; - typedef shared_iterator const_iterator; - typedef shared_iterator reverse_iterator; - typedef shared_iterator const_reverse_iterator; - - // regular iterators - const_iterator begin() const { - return const_iterator(this, head_); - } - const_iterator end() const { - return const_iterator(this, nullptr); - } - iterator begin() { - return iterator(this, head_); - } - iterator end() { - return iterator(this, nullptr); - } - - // reverse iterators - const_reverse_iterator rbegin() const { - return const_reverse_iterator(this, tail_); - } - const_reverse_iterator rend() const { - return const_reverse_iterator(this, nullptr); - } - reverse_iterator rbegin() { - return reverse_iterator(this, tail_); - } - reverse_iterator rend() { - return reverse_iterator(this, nullptr); - } - - const T *head() const { - return head_; - } - const T *tail() const { - return tail_; - } - - T *head() { - return head_; - } - T *tail() { - return tail_; - } - - IntrusiveList() : head_(nullptr), tail_(nullptr) {} - - void Prepend(T *v) { - Insert(nullptr, v); - } - - void Append(T *v) { - Insert(tail_, v); - } - - void Insert(T *after, T *v) { - DCHECK_EQ(reinterpret_cast(NULL), v->prev_); - DCHECK_EQ(reinterpret_cast(NULL), v->next_); - - // if after is null, insert at head - if (!after) { - if (head_) { - v->next_ = head_; - v->next_->prev_ = v; - } - - head_ = v; - } else { - T *next = after->next_; - - v->prev_ = after; - v->prev_->next_ = v; - - if (next) { - v->next_ = next; - v->next_->prev_ = v; - } else { - v->next_ = nullptr; - } - } - - if (!tail_ || after == tail_) { - tail_ = v; - } - } - - void Remove(T *v) { - if (v->prev_) { - v->prev_->next_ = v->next_; - } else { - head_ = v->next_; - } - - if (v->next_) { - v->next_->prev_ = v->prev_; - } else { - tail_ = v->prev_; - } - - v->prev_ = v->next_ = nullptr; - } - - void Clear() { - head_ = tail_ = nullptr; - } - - // Implements the mergesort for linked lists as described at - // http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html - template - void Sort(Compare comp) { - T *head = head_; - T *tail = nullptr; - int k = 1; - - while (true) { - int merges = 0; - T *p = head; - - head = nullptr; - tail = nullptr; - - while (p) { - // track the number of lists merged this pass - merges++; - - // step q forward k places, tracking the size of p - int psize = 0; - int qsize = k; - T *q = p; - while (psize < k && q) { - psize++; - q = q->next_; - } - - // merge the list starting at p of length psize with the list starting - // at q of at most, length qsize - while (psize || (qsize && q)) { - T *next; - - if (!psize) { - next = q; - q = q->next_; - qsize--; - } else if (!qsize || !q) { - next = p; - p = p->next_; - psize--; - } else if (comp(q, p)) { - next = q; - q = q->next_; - qsize--; - } else { - next = p; - p = p->next_; - psize--; - } - - // move merged node to tail - if (!tail) { - head = next; - } else { - tail->next_ = next; - } - next->prev_ = tail; - tail = next; - } - - p = q; - } - - if (tail) { - tail->next_ = nullptr; - } - - // if only 1 pair of lists was merged, this is the end - if (merges <= 1) { - break; - } - - k *= 2; - } - - // update internal head and tail with sorted head and tail - head_ = head; - tail_ = tail; - } - - private: - T *head_; - T *tail_; -}; -} - -#endif diff --git a/src/core/list.c b/src/core/list.c index 44280b55..444e2bd8 100644 --- a/src/core/list.c +++ b/src/core/list.c @@ -1,6 +1,10 @@ #include "core/assert.h" #include "core/list.h" +int list_empty(list_t *list) { + return !list->head; +} + void list_add(list_t *list, list_node_t *n) { list_add_after(list, list->tail, n); } @@ -53,6 +57,82 @@ void list_clear(list_t *list) { list->head = list->tail = NULL; } -int list_empty(list_t *list) { - return !list->head; +// Implements the mergesort for linked lists as described at +// http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html +void list_sort(list_t *list, list_node_cmp cmp) { + list_node_t *head = list->head; + list_node_t *tail = NULL; + int k = 1; + + while (true) { + int merges = 0; + list_node_t *p = head; + + head = NULL; + tail = NULL; + + while (p) { + // track the number of lists merged this pass + merges++; + + // step q forward k places, tracking the size of p + int psize = 0; + int qsize = k; + list_node_t *q = p; + while (psize < k && q) { + psize++; + q = q->next; + } + + // merge the list starting at p of length psize with the list starting + // at q of at most, length qsize + while (psize || (qsize && q)) { + list_node_t *next; + + if (!psize) { + next = q; + q = q->next; + qsize--; + } else if (!qsize || !q) { + next = p; + p = p->next; + psize--; + } else if (cmp(q, p) < 0) { + next = q; + q = q->next; + qsize--; + } else { + next = p; + p = p->next; + psize--; + } + + // move merged node to tail + if (!tail) { + head = next; + } else { + tail->next = next; + } + next->prev = tail; + tail = next; + } + + p = q; + } + + if (tail) { + tail->next = NULL; + } + + // if only 1 pair of lists was merged, this is the end + if (merges <= 1) { + break; + } + + k *= 2; + } + + // update internal head and tail with sorted head and tail + list->head = head; + list->tail = tail; } diff --git a/src/core/list.h b/src/core/list.h index 267db1d1..1b0155c1 100644 --- a/src/core/list.h +++ b/src/core/list.h @@ -20,11 +20,14 @@ typedef struct list_s { list_node_t *tail; } list_t; +typedef int (*list_node_cmp)(const list_node_t *a, const list_node_t *b); + +int list_empty(list_t *list); void list_add(list_t *list, list_node_t *n); void list_add_after(list_t *list, list_node_t *after, list_node_t *n); void list_remove(list_t *list, list_node_t *n); void list_clear(list_t *list); -int list_empty(list_t *list); +void list_sort(list_t *list, list_node_cmp cmp); #define list_for_each(list, it) \ for (list_node_t *it = (list)->head, *it##_next = it ? it->next : NULL; it; \ @@ -32,48 +35,41 @@ int list_empty(list_t *list); #define list_entry(n, type, member) container_of(n, type, member) +#define list_add_after_entry(list, after, member, n) \ + list_add_after(list, (after) ? &(after)->member : NULL, &(n)->member) + #define list_first_entry(list, type, member) \ ((list)->head ? list_entry((list)->head, type, member) : NULL) #define list_last_entry(list, type, member) \ ((list)->tail ? list_entry((list)->tail, type, member) : NULL) -#ifdef __cplusplus - -#define list_next_entry(n, member) \ - ((n) && (n)->member.next \ - ? list_entry((n)->member.next, \ - std::remove_reference::type, member) \ +#define list_next_entry(n, member) \ + ((n) && (n)->member.next \ + ? list_entry((n)->member.next, TYPEOF(*(n)), member) \ : NULL) -#define list_prev_entry(n, member) \ - ((n) && (n)->member.prev \ - ? list_entry((n)->member.prev, \ - std::remove_reference::type, member) \ +#define list_prev_entry(n, member) \ + ((n) && (n)->member.prev \ + ? list_entry((n)->member.prev, TYPEOF(*(n)), member) \ : NULL) -#else +#define list_for_each_entry(it, list, type, member) \ + for (type *it = list_first_entry(list, type, member); it; \ + it = list_next_entry(it, member)) -#define list_next_entry(n, member) \ - ((n) && (n)->member.next \ - ? list_entry((n)->member.next, __typeof__(*(n)), member) \ - : NULL) - -#define list_prev_entry(n, member) \ - ((n) && (n)->member.prev \ - ? list_entry((n)->member.prev, __typeof__(*(n)), member) \ - : NULL) - -#endif - -#define list_for_each_entry(list, type, member, it) \ - for (type *it = list_first_entry(list, type, member), \ - *it##_next = list_next_entry(it, member); \ +#define list_for_each_entry_safe(it, list, type, member) \ + for (type *it = list_first_entry(list, type, member), \ + *it##_next = list_next_entry(it, member); \ it; it = it##_next, it##_next = list_next_entry(it, member)) -#define list_for_each_entry_reverse(list, type, member, it) \ - for (type *it = list_last_entry(list, type, member), \ - *it##_next = list_prev_entry(it, member); \ +#define list_for_each_entry_reverse(it, list, type, member) \ + for (type *it = list_last_entry(list, type, member); it; \ + it = list_prev_entry(it, member)) + +#define list_for_each_entry_safe_reverse(it, list, type, member) \ + for (type *it = list_last_entry(list, type, member), \ + *it##_next = list_prev_entry(it, member); \ it; it = it##_next, it##_next = list_prev_entry(it, member)) #ifdef __cplusplus diff --git a/src/core/memory.h b/src/core/memory.h deleted file mode 100644 index 7cc1d178..00000000 --- a/src/core/memory.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef REDREAM_MEMORY_H -#define REDREAM_MEMORY_H - -#include - -#if PLATFORM_WINDOWS -#define alloca _alloca -#endif - -template -T load(const void *ptr) { - return *reinterpret_cast(ptr); -} - -template -void store(void *ptr, T v) { - *reinterpret_cast(ptr) = v; -} - -#endif diff --git a/src/core/minmax_heap.h b/src/core/minmax_heap.h deleted file mode 100644 index cd02c638..00000000 --- a/src/core/minmax_heap.h +++ /dev/null @@ -1,288 +0,0 @@ -#ifndef MINMAX_HEAP_H -#define MINMAX_HEAP_H - -#include -#include -#include "core/assert.h" - -// Min-max heap implementation, based on -// http://www.akira.ruc.dk/~keld/teaching/algoritmedesign_f03/Artikler/02../Atkinson86.pdf - -namespace re { - -template -static inline bool mmheap_is_max_level(T index) { - T n = index + 1; - T log2 = 0; - while (n >>= 1) log2++; - return log2 % 2 == 1; -} - -template -static inline T mmheap_parent(T index) { - return (index - 1) / 2; -} - -template -static inline T mmheap_grandparent(T index) { - return mmheap_parent(mmheap_parent(index)); -} - -template -static inline bool mmheap_has_grandparent(T index) { - return mmheap_parent(index) != 0; -} - -template -static inline T mmheap_left_child(T index) { - return 2 * index + 1; -} - -template -static inline T mmheap_left_grandchild(T index) { - return mmheap_left_child(mmheap_left_child(index)); -} - -template -static inline T mmheap_is_child(T parent, T child) { - return parent == ((child - 1) / 2); -} - -template -void mmheap_sift_up( - RandomIt first, RandomIt last, Compare comp, - typename std::iterator_traits::difference_type index) { - using difference_type = - typename std::iterator_traits::difference_type; - - // can't sift up past the root - if (!index) { - return; - } - - difference_type ancestor_index = mmheap_parent(index); - bool max_level = mmheap_is_max_level(ancestor_index); - - // if the node is smaller (greater) than its parent, then it is smaller - // (greater) than all other nodes at max (min) levels up to the root. swap - // the node with its parent and check min (max) levels up to the root until - // the min-max order property is satisfied - if (comp(*(first + index), *(first + ancestor_index)) ^ max_level) { - std::swap(*(first + ancestor_index), *(first + index)); - index = ancestor_index; - } - // if the node is greater (smaller) than its parent, then it is greater - // (smaller) than all other nodes at min (max) levels up to the root. the - // node is in the correct order with regards to its parent, but check max - // (min) levels up to the root until the min-max order property is satisfied - else { - max_level = !max_level; - } - - while (mmheap_has_grandparent(index)) { - ancestor_index = mmheap_grandparent(index); - - // once node is greater (smaller) than parent, the min-max order property - // is satisfied - if (!(comp(*(first + index), *(first + ancestor_index)) ^ max_level)) { - break; - } - - // swap node with parent - std::swap(*(first + ancestor_index), *(first + index)); - index = ancestor_index; - } -} - -template -void mmheap_sift_down( - RandomIt first, RandomIt last, Compare comp, - typename std::iterator_traits::difference_type index) { - using difference_type = - typename std::iterator_traits::difference_type; - - bool max_level = mmheap_is_max_level(index); - difference_type size = last - first; - - while (index < size) { - // get the smallest (largest) child or grandchild - difference_type smallest = index; - - difference_type i = mmheap_left_child(index); - difference_type end = std::min(i + 2, size); - for (; i < end; i++) { - if (comp(*(first + i), *(first + smallest)) ^ max_level) { - smallest = i; - } - } - - i = mmheap_left_grandchild(index); - end = std::min(i + 4, size); - for (; i < end; i++) { - if (comp(*(first + i), *(first + smallest)) ^ max_level) { - smallest = i; - } - } - - // already the smallest (largest) node, nothing to do - if (smallest == index) { - break; - } - - // swap the node with the smallest (largest) descendant - std::swap(*(first + index), *(first + smallest)); - - // if the swapped node was a child, then the current node, its child, and - // its grandchild are all ordered correctly at this point satisfying the - // min-max order property - if (mmheap_is_child(index, smallest)) { - break; - } - - // if the node's new parent is now smaller than it, swap again - if (comp(*(first + mmheap_parent(smallest)), *(first + smallest)) ^ - max_level) { - std::swap(*(first + mmheap_parent(smallest)), *(first + smallest)); - } - - // if the swapped node was a grandchild, iteration must continue to - // ensure it's now ordered with regard to its descendants - index = smallest; - } -} - -template -bool mmheap_validate(RandomIt first, RandomIt last, Comp comp) { - using difference_type = - typename std::iterator_traits::difference_type; - - difference_type size = last - first; - - for (difference_type i = 0; i < size; i++) { - bool flip_compare = mmheap_is_max_level(i); - - // values stored at nodes on even (odd) levels are smaller (greater) than - // or equal to the values stored at their descendants - - // validate children - difference_type j = std::min(mmheap_left_child(i), size); - difference_type end = std::min(j + 2, size); - - for (; j < end; j++) { - if (!(comp(*(first + i), *(first + j)) ^ flip_compare)) { - return false; - } - } - - // validate grandchildren - j = std::min(mmheap_left_grandchild(i), size); - end = std::min(j + 4, size); - - for (; j < end; j++) { - if (!(comp(*(first + i), *(first + j)) ^ flip_compare)) { - return false; - } - } - } - - return true; -} - -template -bool mmheap_validate(RandomIt first, RandomIt last) { - return mmheap_validate( - first, last, - std::less::value_type>()); -} - -template -void mmheap_push(RandomIt first, RandomIt last, Comp comp) { - mmheap_sift_up(first, last, comp, (last - first) - 1); -} - -template -void mmheap_push(RandomIt first, RandomIt last) { - mmheap_push(first, last, - std::less::value_type>()); -} - -template -RandomIt mmheap_find_min(RandomIt first, RandomIt last, Comp comp) { - return first; -} - -template -RandomIt mmheap_find_min(RandomIt first, RandomIt last) { - return mmheap_find_min( - first, last, - std::less::value_type>()); -} - -template -RandomIt mmheap_find_max(RandomIt first, RandomIt last, Comp comp) { - using difference_type = - typename std::iterator_traits::difference_type; - - difference_type size = last - first; - - if (size == 1) { - // root must be the max - return first; - } else if (size == 2) { - // root's child must be the max - return first + 1; - } else { - // must be the larger of the two children - if (comp(*(first + 1), *(first + 2))) { - return first + 2; - } else { - return first + 1; - } - } -} - -template -RandomIt mmheap_find_max(RandomIt first, RandomIt last) { - return mmheap_find_max( - first, last, - std::less::value_type>()); -} - -template -void mmheap_pop_min(RandomIt first, RandomIt last, Comp comp) { - if (first == last) { - return; - } - - RandomIt min = mmheap_find_min(first, last, comp); - std::swap(*min, *--last); - mmheap_sift_down(first, last, comp, std::distance(first, min)); -} - -template -void mmheap_pop_min(RandomIt first, RandomIt last) { - mmheap_pop_min( - first, last, - std::less::value_type>()); -} - -template -void mmheap_pop_max(RandomIt first, RandomIt last, Comp comp) { - if (first == last) { - return; - } - - RandomIt max = mmheap_find_max(first, last, comp); - std::swap(*max, *--last); - mmheap_sift_down(first, last, comp, std::distance(first, max)); -} - -template -void mmheap_pop_max(RandomIt first, RandomIt last) { - mmheap_pop_max( - first, last, - std::less::value_type>()); -} -} - -#endif diff --git a/src/core/mm_heap.c b/src/core/mm_heap.c new file mode 100644 index 00000000..ac664442 --- /dev/null +++ b/src/core/mm_heap.c @@ -0,0 +1,200 @@ +#include "core/assert.h" +#include "core/core.h" +#include "core/mm_heap.h" + +static inline bool mm_is_max_level(int index) { + int n = index + 1; + int log2 = 0; + while (n >>= 1) log2++; + return log2 % 2 == 1; +} + +static inline int mm_parent(int index) { + return (index - 1) / 2; +} + +static inline int mm_grandparent(int index) { + return mm_parent(mm_parent(index)); +} + +static inline bool mm_has_grandparent(int index) { + return mm_parent(index) != 0; +} + +static inline int mm_left_child(int index) { + return 2 * index + 1; +} + +static inline int mm_left_grandchild(int index) { + return mm_left_child(mm_left_child(index)); +} + +static inline int mm_is_child(int parent, int child) { + return parent == ((child - 1) / 2); +} + +static void mm_sift_up(mm_type *begin, int size, int index, mm_cmp cmp) { + // can't sift up past the root + if (!index) { + return; + } + + int ancestor_index = mm_parent(index); + bool max_level = mm_is_max_level(ancestor_index); + + // if the node is smaller (greater) than its parent, then it is smaller + // (greater) than all other nodes at max (min) levels up to the root. swap + // the node with its parent and check min (max) levels up to the root until + // the min-max order property is satisfied + if (cmp(*(begin + index), *(begin + ancestor_index)) ^ max_level) { + SWAP(*(begin + ancestor_index), *(begin + index)); + index = ancestor_index; + } + // if the node is greater (smaller) than its parent, then it is greater + // (smaller) than all other nodes at min (max) levels up to the root. the + // node is in the correct order with regards to its parent, but check max + // (min) levels up to the root until the min-max order property is satisfied + else { + max_level = !max_level; + } + + while (mm_has_grandparent(index)) { + ancestor_index = mm_grandparent(index); + + // once node is greater (smaller) than parent, the min-max order property + // is satisfied + if (!(cmp(*(begin + index), *(begin + ancestor_index)) ^ max_level)) { + break; + } + + // swap node with parent + SWAP(*(begin + ancestor_index), *(begin + index)); + index = ancestor_index; + } +} + +static void mm_sift_down(mm_type *begin, int size, int index, mm_cmp cmp) { + bool max_level = mm_is_max_level(index); + + while (index < size) { + // get the smallest (largest) child or grandchild + int smallest = index; + + int i = mm_left_child(index); + int end = MIN(i + 2, size); + for (; i < end; i++) { + if (cmp(*(begin + i), *(begin + smallest)) ^ max_level) { + smallest = i; + } + } + + i = mm_left_grandchild(index); + end = MIN(i + 4, size); + for (; i < end; i++) { + if (cmp(*(begin + i), *(begin + smallest)) ^ max_level) { + smallest = i; + } + } + + // already the smallest (largest) node, nothing to do + if (smallest == index) { + break; + } + + // swap the node with the smallest (largest) descendant + SWAP(*(begin + index), *(begin + smallest)); + + // if the swapped node was a child, then the current node, its child, and + // its grandchild are all ordered correctly at this point satisfying the + // min-max order property + if (mm_is_child(index, smallest)) { + break; + } + + // if the node's new parent is now smaller than it, swap again + int parent = mm_parent(smallest); + if (cmp(*(begin + parent), *(begin + smallest)) ^ max_level) { + SWAP(*(begin + parent), *(begin + smallest)); + } + + // if the swapped node was a grandchild, iteration must continue to + // ensure it's now ordered with regard to its descendants + index = smallest; + } +} + +bool mm_validate(mm_type *begin, int size, mm_cmp cmp) { + for (int i = 0; i < size; i++) { + bool flip_compare = mm_is_max_level(i); + + // values stored at nodes on even (odd) levels are smaller (greater) than + // or equal to the values stored at their descendants + + // validate children + int j = MIN(mm_left_child(i), size); + int end = MIN(j + 2, size); + + for (; j < end; j++) { + if (!(cmp(*(begin + i), *(begin + j)) ^ flip_compare)) { + return false; + } + } + + // validate grandchildren + j = MIN(mm_left_grandchild(i), size); + end = MIN(j + 4, size); + + for (; j < end; j++) { + if (!(cmp(*(begin + i), *(begin + j)) ^ flip_compare)) { + return false; + } + } + } + + return true; +} + +void mm_push(mm_type *begin, int size, mm_cmp cmp) { + mm_sift_up(begin, size, size - 1, cmp); +} + +mm_type *mm_find_min(mm_type *begin, int size, mm_cmp cmp) { + return begin; +} + +mm_type *mm_find_max(mm_type *begin, int size, mm_cmp cmp) { + if (size == 1) { + // root must be the max + return begin; + } else if (size == 2) { + // root's child must be the max + return begin + 1; + } else { + // must be the larger of the two children + if (cmp(*(begin + 1), *(begin + 2))) { + return begin + 2; + } else { + return begin + 1; + } + } +} + +void mm_pop_min(mm_type *begin, int size, mm_cmp cmp) { + if (!size) { + return; + } + + mm_type *min = mm_find_min(begin, size, cmp); + SWAP(*min, *(begin + size - 1)); + mm_sift_down(begin, size - 1, min - begin, cmp); +} + +void mm_pop_max(mm_type *begin, int size, mm_cmp cmp) { + if (!size) { + return; + } + + mm_type *max = mm_find_max(begin, size, cmp); + SWAP(*max, *(begin + size - 1)); + mm_sift_down(begin, size - 1, max - begin, cmp); +} diff --git a/src/core/mm_heap.h b/src/core/mm_heap.h new file mode 100644 index 00000000..df1b4c1a --- /dev/null +++ b/src/core/mm_heap.h @@ -0,0 +1,27 @@ +#ifndef MM_HEAP_H +#define MM_HEAP_H + +// Min-max heap implementation, based on +// http://www.akira.ruc.dk/~keld/teaching/algoritmedesign_f03/Artikler/02../Atkinson86.pdf + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void *mm_type; +typedef bool (*mm_cmp)(mm_type lhs, mm_type rhs); + +bool mm_validate(mm_type *begin, int size, mm_cmp cmp); +void mm_push(mm_type *begin, int size, mm_cmp cmp); +mm_type *mm_find_min(mm_type *begin, int size, mm_cmp cmp); +mm_type *mm_find_max(mm_type *begin, int size, mm_cmp cmp); +void mm_pop_min(mm_type *begin, int size, mm_cmp cmp); +void mm_pop_max(mm_type *begin, int size, mm_cmp cmp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/core/option.c b/src/core/option.c index ab1f0938..5a464ab9 100644 --- a/src/core/option.c +++ b/src/core/option.c @@ -6,7 +6,7 @@ static list_t s_options; static option_t *option_find(const char *name) { - list_for_each_entry(&s_options, option_t, it, opt) { + list_for_each_entry(opt, &s_options, option_t, it) { if (!strcmp(opt->name, name)) { return opt; } @@ -83,7 +83,7 @@ void option_print_help() { int max_name_width = 0; int max_desc_width = 0; - list_for_each_entry(&s_options, option_t, it, opt) { + list_for_each_entry(opt, &s_options, option_t, it) { int l = (int)strlen(opt->name); max_name_width = MAX(l, max_name_width); @@ -91,7 +91,7 @@ void option_print_help() { max_desc_width = MAX(l, max_desc_width); } - list_for_each_entry(&s_options, option_t, it, opt) { + list_for_each_entry(opt, &s_options, option_t, it) { switch (opt->type) { case OPT_BOOL: LOG_INFO("--%-*s %-*s [default %s]", max_name_width, opt->name, diff --git a/src/core/rb_tree.h b/src/core/rb_tree.h index a045726a..414489fa 100644 --- a/src/core/rb_tree.h +++ b/src/core/rb_tree.h @@ -5,6 +5,8 @@ extern "C" { #endif +#include "core/core.h" + #define RB_NODE(n) ((rb_node_t *)n) typedef enum { @@ -46,26 +48,12 @@ rb_node_t *rb_next(rb_node_t *n); #define rb_entry(n, type, member) container_of(n, type, member) -#ifdef __cplusplus - -#define rb_find_entry(t, search, member, cb) \ - ({ \ - rb_node_t *it = rb_find(t, &(search)->member, cb); \ - it ? rb_entry(it, std::remove_reference::type, \ - member) \ - : NULL; \ +#define rb_find_entry(t, search, member, cb) \ + ({ \ + rb_node_t *it = rb_find(t, &(search)->member, cb); \ + it ? rb_entry(it, TYPEOF(*search), member) : NULL; \ }) -#else - -#define rb_find_entry(t, search, member, cb) \ - ({ \ - rb_node_t *it = rb_find(t, &(search)->member, cb); \ - it ? rb_entry(it, __typeof__(*search), member) : NULL; \ - }) - -#endif - // #define rb_for_each_entry(t, member, it) \ // for (rb_node_t *it = rb_first(t), *it##_next = rb_next(it); it; \ // it = it##_next, it##_next = rb_next(it)) diff --git a/src/hw/holly/ta.cc b/src/hw/holly/ta.cc index 9507d75c..5fa0f578 100644 --- a/src/hw/holly/ta.cc +++ b/src/hw/holly/ta.cc @@ -548,7 +548,8 @@ void ta_clear_textures(ta_t *ta) { } void ta_clear_pending_textures(ta_t *ta) { - list_for_each_entry(&ta->invalid_entries, texture_entry_t, invalid_it, it) { + list_for_each_entry_safe(it, &ta->invalid_entries, texture_entry_t, + invalid_it) { ta_invalidate_texture(ta, it); ta->num_invalidated++; } diff --git a/src/hw/maple/controller.c b/src/hw/maple/controller.c index dd4cae28..c29e3e8f 100644 --- a/src/hw/maple/controller.c +++ b/src/hw/maple/controller.c @@ -2,7 +2,6 @@ #include "core/log.h" #include "core/option.h" #include "core/string.h" -#include "hw/maple/controller.h" #include "hw/maple/maple.h" DEFINE_OPTION_STRING(profile, "profiles/ps4.ini", "Controller profile"); @@ -60,6 +59,115 @@ static maple_deviceinfo_t controller_devinfo = { 0x01ae, 0x01f4}; +static void controller_load_profile(controller_t *ctrl, const char *path); +static int controller_ini_handler(void *user, const char *section, + const char *name, const char *value); +static void controller_destroy(controller_t *controller); +static bool controller_input(controller_t *ctrl, keycode_t key, int16_t value); +static bool controller_frame(controller_t *ctrl, const maple_frame_t *frame, + maple_frame_t *res); + +maple_device_t *controller_create() { + controller_t *ctrl = calloc(1, sizeof(controller_t)); + ctrl->base.destroy = (maple_destroy_cb)&controller_destroy; + ctrl->base.input = (maple_input_cb)&controller_input; + ctrl->base.frame = (maple_frame_cb)&controller_frame; + ctrl->cnd.function = FN_CONTROLLER; + + // buttons bitfield contains 0s for pressed buttons and 1s for unpressed + ctrl->cnd.buttons = 0xffff; + + // triggers completely unpressed + ctrl->cnd.rtrig = ctrl->cnd.ltrig = 0; + + // joysticks default to dead center + ctrl->cnd.joyy = ctrl->cnd.joyx = ctrl->cnd.joyx2 = ctrl->cnd.joyy2 = 0x80; + + // default profile + // CONT_JOYX + // CONT_JOYY + // CONT_LTRIG + // CONT_RTRIG + ctrl->map[K_SPACE] = CONT_START; + ctrl->map[(keycode_t)'k'] = CONT_A; + ctrl->map[(keycode_t)'l'] = CONT_B; + ctrl->map[(keycode_t)'j'] = CONT_X; + ctrl->map[(keycode_t)'i'] = CONT_Y; + ctrl->map[(keycode_t)'w'] = CONT_DPAD_UP; + ctrl->map[(keycode_t)'s'] = CONT_DPAD_DOWN; + ctrl->map[(keycode_t)'a'] = CONT_DPAD_LEFT; + ctrl->map[(keycode_t)'d'] = CONT_DPAD_RIGHT; + + // load profile + controller_load_profile(ctrl, OPTION_profile); + + return &ctrl->base; +} + +static void controller_load_profile(controller_t *ctrl, const char *path) { + if (!*path) { + return; + } + + LOG_INFO("Loading controller profile %s", path); + + if (ini_parse(path, controller_ini_handler, ctrl) < 0) { + LOG_WARNING("Failed to parse %s", path); + return; + } +} + +static int controller_ini_handler(void *user, const char *section, + const char *name, const char *value) { + controller_t *ctrl = user; + + int button = 0; + if (!strcmp(name, "joyx")) { + button = CONT_JOYX; + } else if (!strcmp(name, "joyy")) { + button = CONT_JOYY; + } else if (!strcmp(name, "ltrig")) { + button = CONT_LTRIG; + } else if (!strcmp(name, "rtrig")) { + button = CONT_RTRIG; + } else if (!strcmp(name, "start")) { + button = CONT_START; + } else if (!strcmp(name, "a")) { + button = CONT_A; + } else if (!strcmp(name, "b")) { + button = CONT_B; + } else if (!strcmp(name, "x")) { + button = CONT_X; + } else if (!strcmp(name, "y")) { + button = CONT_Y; + } else if (!strcmp(name, "dpad_up")) { + button = CONT_DPAD_UP; + } else if (!strcmp(name, "dpad_down")) { + button = CONT_DPAD_DOWN; + } else if (!strcmp(name, "dpad_left")) { + button = CONT_DPAD_LEFT; + } else if (!strcmp(name, "dpad_right")) { + button = CONT_DPAD_RIGHT; + } else { + LOG_WARNING("Unknown button %s", name); + return 0; + } + + keycode_t key = get_key_by_name(value); + if (key == K_UNKNOWN) { + LOG_WARNING("Unknown key %s", value); + return 0; + } + + ctrl->map[key] = button; + + return 1; +} + +static void controller_destroy(controller_t *controller) { + free(controller); +} + static bool controller_input(controller_t *ctrl, keycode_t key, int16_t value) { // map incoming key to dreamcast button int button = ctrl->map[key]; @@ -113,99 +221,3 @@ static bool controller_frame(controller_t *ctrl, const maple_frame_t *frame, return false; } - -static int controler_ini_handler(void *user, const char *section, - const char *name, const char *value) { - controller_t *ctrl = user; - - int button = 0; - if (!strcmp(name, "joyx")) { - button = CONT_JOYX; - } else if (!strcmp(name, "joyy")) { - button = CONT_JOYY; - } else if (!strcmp(name, "ltrig")) { - button = CONT_LTRIG; - } else if (!strcmp(name, "rtrig")) { - button = CONT_RTRIG; - } else if (!strcmp(name, "start")) { - button = CONT_START; - } else if (!strcmp(name, "a")) { - button = CONT_A; - } else if (!strcmp(name, "b")) { - button = CONT_B; - } else if (!strcmp(name, "x")) { - button = CONT_X; - } else if (!strcmp(name, "y")) { - button = CONT_Y; - } else if (!strcmp(name, "dpad_up")) { - button = CONT_DPAD_UP; - } else if (!strcmp(name, "dpad_down")) { - button = CONT_DPAD_DOWN; - } else if (!strcmp(name, "dpad_left")) { - button = CONT_DPAD_LEFT; - } else if (!strcmp(name, "dpad_right")) { - button = CONT_DPAD_RIGHT; - } else { - LOG_WARNING("Unknown button %s", name); - return 0; - } - - keycode_t key = get_key_by_name(value); - if (key == K_UNKNOWN) { - LOG_WARNING("Unknown key %s", value); - return 0; - } - - ctrl->map[key] = button; - - return 1; -} - -static void controller_load_profile(controller_t *ctrl, const char *path) { - if (!*path) { - return; - } - - LOG_INFO("Loading controller profile %s", path); - - if (ini_parse(path, controler_ini_handler, ctrl) < 0) { - LOG_WARNING("Failed to parse %s", path); - return; - } -} - -struct maple_device_s *maple_create_controller() { - controller_t *ctrl = calloc(1, sizeof(controller_t)); - ctrl->base.input = (maple_input_cb)&controller_input; - ctrl->base.frame = (maple_frame_cb)&controller_frame; - ctrl->cnd.function = FN_CONTROLLER; - - // buttons bitfield contains 0s for pressed buttons and 1s for unpressed - ctrl->cnd.buttons = 0xffff; - - // triggers completely unpressed - ctrl->cnd.rtrig = ctrl->cnd.ltrig = 0; - - // joysticks default to dead center - ctrl->cnd.joyy = ctrl->cnd.joyx = ctrl->cnd.joyx2 = ctrl->cnd.joyy2 = 0x80; - - // default profile - // CONT_JOYX - // CONT_JOYY - // CONT_LTRIG - // CONT_RTRIG - ctrl->map[K_SPACE] = CONT_START; - ctrl->map[(keycode_t)'k'] = CONT_A; - ctrl->map[(keycode_t)'l'] = CONT_B; - ctrl->map[(keycode_t)'j'] = CONT_X; - ctrl->map[(keycode_t)'i'] = CONT_Y; - ctrl->map[(keycode_t)'w'] = CONT_DPAD_UP; - ctrl->map[(keycode_t)'s'] = CONT_DPAD_DOWN; - ctrl->map[(keycode_t)'a'] = CONT_DPAD_LEFT; - ctrl->map[(keycode_t)'d'] = CONT_DPAD_RIGHT; - - // load profile - controller_load_profile(ctrl, OPTION_profile); - - return &ctrl->base; -} diff --git a/src/hw/maple/controller.h b/src/hw/maple/controller.h deleted file mode 100644 index 2c1dc1e5..00000000 --- a/src/hw/maple/controller.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef CONTROLLER_H -#define CONTROLLER_H - -#ifdef __cplusplus -extern "C" { -#endif - -struct maple_device_s; - -struct maple_device_s *maple_create_controller(); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/hw/maple/maple.c b/src/hw/maple/maple.c index 72576906..d73077c0 100644 --- a/src/hw/maple/maple.c +++ b/src/hw/maple/maple.c @@ -1,6 +1,5 @@ #include "hw/holly/holly.h" #include "hw/maple/maple.h" -#include "hw/maple/controller.h" #include "hw/sh4/sh4.h" #include "hw/dreamcast.h" @@ -20,13 +19,13 @@ static void maple_dma(maple_t *mp); static void maple_keydown(maple_t *mp, keycode_t key, int16_t value); DECLARE_REG_W32(maple_t *mp, SB_MDST); -struct maple_s *maple_create(struct dreamcast_s *dc) { +maple_t *maple_create(struct dreamcast_s *dc) { maple_t *mp = dc_create_device(dc, sizeof(maple_t), "maple", (device_init_cb)&maple_init); mp->base.window = window_interface_create(NULL, (device_keydown_cb)&maple_keydown); - mp->devices[0] = maple_create_controller(); + mp->devices[0] = controller_create(); return mp; } @@ -44,6 +43,21 @@ void maple_destroy(maple_t *mp) { dc_destroy_device(&mp->base); } +void maple_vblank(maple_t *mp) { + uint32_t enabled = mp->holly->reg[SB_MDEN]; + uint32_t vblank_initiate = mp->holly->reg[SB_MDTSEL]; + + // The controller can be started up by two methods: by software, or by + // hardware + // in synchronization with the V-BLANK signal. These methods are selected + // through the trigger selection register (SB_MDTSEL). + if (enabled && vblank_initiate) { + maple_dma(mp); + } + + // TODO maple vblank interrupt? +} + bool maple_init(maple_t *mp) { mp->holly = mp->base.dc->holly; mp->space = mp->base.dc->sh4->base.memory->space; @@ -60,21 +74,6 @@ bool maple_init(maple_t *mp) { return true; } -void maple_vblank(maple_t *mp) { - uint32_t enabled = mp->holly->reg[SB_MDEN]; - uint32_t vblank_initiate = mp->holly->reg[SB_MDTSEL]; - - // The controller can be started up by two methods: by software, or by - // hardware - // in synchronization with the V-BLANK signal. These methods are selected - // through the trigger selection register (SB_MDTSEL). - if (enabled && vblank_initiate) { - maple_dma(mp); - } - - // TODO maple vblank interrupt? -} - void maple_dma(maple_t *mp) { uint32_t start_addr = mp->holly->reg[SB_MDSTAR]; maple_transfer_t desc; diff --git a/src/hw/maple/maple.h b/src/hw/maple/maple.h index 06b03410..6e09500b 100644 --- a/src/hw/maple/maple.h +++ b/src/hw/maple/maple.h @@ -28,6 +28,8 @@ struct maple_s *maple_create(struct dreamcast_s *dc); void maple_destroy(struct maple_s *mp); void maple_vblank(struct maple_s *mp); +struct maple_device_s *controller_create(); + #ifdef __cplusplus } #endif diff --git a/src/hw/sh4/sh4.cc b/src/hw/sh4/sh4.cc index 20ce0b1f..7231a002 100644 --- a/src/hw/sh4/sh4.cc +++ b/src/hw/sh4/sh4.cc @@ -1,7 +1,9 @@ #include #include "core/math.h" -#include "core/memory.h" #include "core/profiler.h" +#include "core/string.h" +#include "jit/backend/backend.h" +#include "jit/frontend/sh4/sh4_analyze.h" #include "hw/sh4/sh4.h" #include "hw/sh4/sh4_code_cache.h" #include "hw/dreamcast.h" @@ -14,10 +16,6 @@ #include "hw/holly/pvr.h" #include "hw/holly/ta.h" -using namespace re::jit; -using namespace re::jit::backend; -using namespace re::jit::frontend::sh4; - static sh4_interrupt_info_t sh4_interrupts[NUM_SH_INTERRUPTS] = { #define SH4_INT(name, intevt, pri, ipr, ipr_shift) \ { intevt, pri, ipr, ipr_shift } \ @@ -177,18 +175,17 @@ bool sh4_init(sh4_t *sh4) { sh4->scheduler = sh4->base.dc->scheduler; sh4->space = sh4->base.memory->space; - re::jit::backend::MemoryInterface memif = { - &sh4->ctx, - sh4->base.memory->space->protected_base, - sh4->base.memory->space, - &address_space_r8, - &address_space_r16, - &address_space_r32, - &address_space_r64, - &address_space_w8, - &address_space_w16, - &address_space_w32, - &address_space_w64}; + mem_interface_t memif = {&sh4->ctx, + sh4->base.memory->space->protected_base, + sh4->base.memory->space, + &address_space_r8, + &address_space_r16, + &address_space_r32, + &address_space_r64, + &address_space_w8, + &address_space_w16, + &address_space_w32, + &address_space_w64}; sh4->code_cache = sh4_cache_create(&memif, &sh4_compile_pc); // initialize context @@ -252,7 +249,7 @@ void sh4_set_pc(sh4_t *sh4, uint32_t pc) { static void sh4_run_inner(sh4_t *sh4, int64_t ns) { // execute at least 1 cycle. the tests rely on this to step block by block - int64_t cycles = std::max(NANO_TO_CYCLES(ns, SH4_CLOCK_FREQ), INT64_C(1)); + int64_t cycles = MAX(NANO_TO_CYCLES(ns, SH4_CLOCK_FREQ), INT64_C(1)); // each block's epilog will decrement the remaining cycles as they run sh4->ctx.num_cycles = static_cast(cycles); @@ -399,11 +396,11 @@ void sh4_paint(sh4_t *sh4, bool show_main_menu) { // calculate average mips float avg_mips = 0.0f; - for (int i = std::max(0, perf->num_mips - MAX_MIPS_SAMPLES); - i < perf->num_mips; i++) { + for (int i = MAX(0, perf->num_mips - MAX_MIPS_SAMPLES); i < perf->num_mips; + i++) { avg_mips += perf->mips[i % MAX_MIPS_SAMPLES]; } - avg_mips /= std::max(std::min(perf->num_mips, MAX_MIPS_SAMPLES), 1); + avg_mips /= MAX(MIN(perf->num_mips, MAX_MIPS_SAMPLES), 1); char overlay_text[128]; snprintf(overlay_text, sizeof(overlay_text), "%.2f", avg_mips); @@ -869,14 +866,14 @@ template T sh4_read_cache(sh4_t *sh4, uint32_t addr) { CHECK_EQ(sh4->CCR->ORA, 1u); addr = CACHE_OFFSET(addr, sh4->CCR->OIX); - return load(&sh4->cache[addr]); + return *(T *)&sh4->cache[addr]; } template void sh4_write_cache(sh4_t *sh4, uint32_t addr, T value) { CHECK_EQ(sh4->CCR->ORA, 1u); addr = CACHE_OFFSET(addr, sh4->CCR->OIX); - store(&sh4->cache[addr], value); + *(T *)&sh4->cache[addr] = value; } template diff --git a/src/hw/sh4/sh4_code_cache.cc b/src/hw/sh4/sh4_code_cache.cc index 1a7107b5..2d50cae4 100644 --- a/src/hw/sh4/sh4_code_cache.cc +++ b/src/hw/sh4/sh4_code_cache.cc @@ -3,8 +3,11 @@ #include "hw/sh4/sh4_code_cache.h" #include "hw/memory.h" #include "jit/backend/x64/x64_backend.h" +#include "jit/frontend/sh4/sh4_analyze.h" #include "jit/frontend/sh4/sh4_frontend.h" -#include "jit/ir/ir_builder.h" +#include "jit/backend/backend.h" +#include "jit/frontend/frontend.h" +#include "jit/ir/ir.h" // #include "jit/ir/passes/constant_propagation_pass.h" // #include "jit/ir/passes/conversion_elimination_pass.h" #include "jit/ir/passes/dead_code_elimination_pass.h" @@ -12,14 +15,6 @@ #include "jit/ir/passes/register_allocation_pass.h" #include "sys/filesystem.h" -using namespace re::jit; -using namespace re::jit::backend; -using namespace re::jit::backend::x64; -using namespace re::jit::frontend; -using namespace re::jit::frontend::sh4; -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - static bool sh4_cache_handle_exception(sh4_cache_t *cache, re_exception_t *ex); static sh4_block_t *sh4_cache_lookup_block(sh4_cache_t *cache, uint32_t guest_addr); @@ -51,7 +46,7 @@ static rb_callback_t reverse_block_map_cb = { &reverse_block_map_cmp, NULL, NULL, }; -sh4_cache_t *sh4_cache_create(const re::jit::backend::MemoryInterface *memif, +sh4_cache_t *sh4_cache_create(const mem_interface_t *memif, code_pointer_t default_code) { sh4_cache_t *cache = reinterpret_cast(calloc(1, sizeof(sh4_cache_t))); @@ -62,21 +57,8 @@ sh4_cache_t *sh4_cache_create(const re::jit::backend::MemoryInterface *memif, cache, (exception_handler_cb)&sh4_cache_handle_exception); // setup parser and emitter - cache->frontend = new SH4Frontend(); - cache->backend = new X64Backend(*memif); - - cache->pass_runner = new PassRunner(); - // setup optimization passes - cache->pass_runner->AddPass( - std::unique_ptr(new LoadStoreEliminationPass())); - // cache->pass_runner->AddPass(std::unique_ptr(new - // ConstantPropagationPass())); - // cache->pass_runner->AddPass(std::unique_ptr(new - // ConversionEliminationPass())); - cache->pass_runner->AddPass( - std::unique_ptr(new DeadCodeEliminationPass())); - cache->pass_runner->AddPass(std::unique_ptr(new RegisterAllocationPass( - cache->backend->registers(), cache->backend->num_registers()))); + cache->frontend = sh4_frontend_create(); + cache->backend = x64_create(memif); // initialize all entries in block cache to reference the default block cache->default_code = default_code; @@ -90,9 +72,8 @@ sh4_cache_t *sh4_cache_create(const re::jit::backend::MemoryInterface *memif, void sh4_cache_destroy(sh4_cache_t *cache) { exception_handler_remove(cache->eh_handle); - delete cache->frontend; - delete cache->backend; - delete cache->pass_runner; + sh4_frontend_destroy(cache->frontend); + x64_destroy(cache->backend); free(cache); } @@ -123,9 +104,13 @@ static code_pointer_t sh4_cache_compile_code_inner(sh4_cache_t *cache, } // translate the SH4 into IR + ir_t ir = {}; + ir.buffer = cache->ir_buffer; + ir.capacity = sizeof(cache->ir_buffer); + int guest_size = 0; - IRBuilder &builder = - cache->frontend->TranslateCode(guest_addr, guest_ptr, flags, &guest_size); + cache->frontend->translate_code(cache->frontend, guest_addr, guest_ptr, flags, + &guest_size, &ir); #if 0 const char *appdir = fs_appdir(); @@ -141,11 +126,15 @@ static code_pointer_t sh4_cache_compile_code_inner(sh4_cache_t *cache, builder.Dump(output); #endif - cache->pass_runner->Run(builder); + // run optimization passes + lse_run(&ir); + dce_run(&ir); + ra_run(&ir, cache->backend->registers, cache->backend->num_registers); // assemble the IR into native code int host_size = 0; - const uint8_t *host_addr = cache->backend->AssembleCode(builder, &host_size); + const uint8_t *host_addr = + cache->backend->assemble_code(cache->backend, &ir, &host_size); if (!host_addr) { LOG_INFO("Assembler overflow, resetting block cache"); @@ -155,7 +144,7 @@ static code_pointer_t sh4_cache_compile_code_inner(sh4_cache_t *cache, // if the backend fails to assemble on an empty cache, there's nothing to be // done - host_addr = cache->backend->AssembleCode(builder, &host_size); + host_addr = cache->backend->assemble_code(cache->backend, &ir, &host_size); CHECK(host_addr, "Backend assembler buffer overflow"); } @@ -236,7 +225,7 @@ void sh4_cache_clear_blocks(sh4_cache_t *cache) { } // have the backend reset its codegen buffers as well - cache->backend->Reset(); + cache->backend->reset(cache->backend); } static bool sh4_cache_handle_exception(sh4_cache_t *cache, re_exception_t *ex) { @@ -249,7 +238,7 @@ static bool sh4_cache_handle_exception(sh4_cache_t *cache, re_exception_t *ex) { } // let the backend attempt to handle the exception - if (!cache->backend->HandleFastmemException(ex)) { + if (!cache->backend->handle_fastmem_exception(cache->backend, ex)) { return false; } diff --git a/src/hw/sh4/sh4_code_cache.h b/src/hw/sh4/sh4_code_cache.h index 096a0654..36278b29 100644 --- a/src/hw/sh4/sh4_code_cache.h +++ b/src/hw/sh4/sh4_code_cache.h @@ -1,11 +1,11 @@ #ifndef SH4_CODE_CACHE_H #define SH4_CODE_CACHE_H +#include "core/assert.h" #include "core/rb_tree.h" #include "jit/backend/x64/x64_backend.h" #include "jit/frontend/sh4/sh4_context.h" #include "jit/frontend/sh4/sh4_frontend.h" -#include "jit/ir/passes/pass_runner.h" #include "sys/exception_handler.h" // executable code sits between 0x0c000000 and 0x0d000000 (16mb). each instr @@ -15,6 +15,8 @@ #define BLOCK_OFFSET(addr) ((addr & BLOCK_ADDR_MASK) >> BLOCK_ADDR_SHIFT) #define MAX_BLOCKS (0x1000000 >> BLOCK_ADDR_SHIFT) +struct jit_backend_s; +struct mem_interface_s; struct sh4_block_s; typedef uint32_t (*code_pointer_t)(); @@ -31,9 +33,10 @@ typedef struct sh4_block_s { typedef struct sh4_cache_s { struct re_exception_handler_s *eh_handle; - re::jit::frontend::Frontend *frontend; - re::jit::backend::Backend *backend; - re::jit::ir::passes::PassRunner *pass_runner; + struct jit_frontend_s *frontend; + struct jit_backend_s *backend; + + uint8_t ir_buffer[1024 * 1024]; code_pointer_t default_code; code_pointer_t code[MAX_BLOCKS]; @@ -42,9 +45,8 @@ typedef struct sh4_cache_s { rb_tree_t reverse_blocks; } sh4_cache_t; -struct sh4_cache_s *sh4_cache_create( - const re::jit::backend::MemoryInterface *memif, - code_pointer_t default_code); +struct sh4_cache_s *sh4_cache_create(const struct mem_interface_s *memif, + code_pointer_t default_code); void sh4_cache_destroy(struct sh4_cache_s *cache); static inline code_pointer_t sh4_cache_get_code(struct sh4_cache_s *cache, diff --git a/src/jit/backend/backend.h b/src/jit/backend/backend.h index 59a51c82..a7897fe6 100644 --- a/src/jit/backend/backend.h +++ b/src/jit/backend/backend.h @@ -1,16 +1,23 @@ #ifndef BACKEND_H #define BACKEND_H -#include "jit/ir/ir_builder.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif struct address_space_s; +struct ir_s; struct re_exception_s; -namespace re { -namespace jit { -namespace backend { +typedef struct register_def_s { + const char *name; + int value_types; + const void *data; +} register_def_t; -struct MemoryInterface { +typedef struct mem_interface_s { void *ctx_base; void *mem_base; struct address_space_s *mem_self; @@ -22,34 +29,31 @@ struct MemoryInterface { void (*w16)(struct address_space_s *, uint32_t, uint16_t); void (*w32)(struct address_space_s *, uint32_t, uint32_t); void (*w64)(struct address_space_s *, uint32_t, uint64_t); -}; +} mem_interface_t; -struct Register { - const char *name; - int value_types; - const void *data; -}; +struct jit_backend_s; -class Backend { - public: - Backend(const MemoryInterface &memif) : memif_(memif) {} - virtual ~Backend() {} +typedef const register_def_t *(*registers_cb)(); +typedef int (*num_registers_cb)(); +typedef void (*reset_cb)(struct jit_backend_s *); +typedef const uint8_t *(*assemble_code_cb)(struct jit_backend_s *, + struct ir_s *, int *); +typedef void (*dump_code_cb)(struct jit_backend_s *, const uint8_t *, int); +typedef bool (*handle_fastmem_exception_cb)(struct jit_backend_s *, + struct re_exception_s *); - virtual const Register *registers() const = 0; - virtual int num_registers() const = 0; +typedef struct jit_backend_s { + const register_def_t *registers; + int num_registers; - virtual void Reset() = 0; + reset_cb reset; + assemble_code_cb assemble_code; + dump_code_cb dump_code; + handle_fastmem_exception_cb handle_fastmem_exception; +} jit_backend_t; - virtual const uint8_t *AssembleCode(ir::IRBuilder &builder, int *size) = 0; - virtual void DumpCode(const uint8_t *host_addr, int size) = 0; - - virtual bool HandleFastmemException(struct re_exception_s *ex) = 0; - - protected: - MemoryInterface memif_; -}; -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/backend/x64/x64_backend.cc b/src/jit/backend/x64/x64_backend.cc index ebdca22b..abb6e111 100644 --- a/src/jit/backend/x64/x64_backend.cc +++ b/src/jit/backend/x64/x64_backend.cc @@ -1,24 +1,29 @@ #include -#include -#include +#include #include -#include "core/memory.h" #include "core/profiler.h" #include "jit/backend/x64/x64_backend.h" #include "jit/backend/x64/x64_disassembler.h" +#include "jit/backend/backend.h" +#include "jit/ir/ir.h" #include "sys/exception_handler.h" +#include "sys/memory.h" -using namespace re; -using namespace re::jit; -using namespace re::jit::backend; -using namespace re::jit::backend::x64; -using namespace re::jit::ir; +// +// x64 stack layout +// -namespace re { -namespace jit { -namespace backend { -namespace x64 { +#if PLATFORM_WINDOWS +static const int STACK_SHADOW_SPACE = 32; +#else +static const int STACK_SHADOW_SPACE = 0; +#endif +static const int STACK_OFFSET_LOCALS = STACK_SHADOW_SPACE; +static const int STACK_SIZE = STACK_OFFSET_LOCALS; + +// // x64 register layout +// // %rax %eax %ax %al <-- both: temporary // %rcx %ecx %cx %cl <-- both: argument @@ -48,42 +53,6 @@ namespace x64 { // r10, r11, xmm1 are used for constant not eliminated by const propagation // r14, r15 are reserved for the context and memory pointers -const Register x64_registers[] = { - {"rbx", ir::VALUE_INT_MASK, - reinterpret_cast(&Xbyak::util::rbx)}, - {"rbp", ir::VALUE_INT_MASK, - reinterpret_cast(&Xbyak::util::rbp)}, - {"r12", ir::VALUE_INT_MASK, - reinterpret_cast(&Xbyak::util::r12)}, - {"r13", ir::VALUE_INT_MASK, - reinterpret_cast(&Xbyak::util::r13)}, - // {"r14", ir::VALUE_INT_MASK, - // reinterpret_cast(&Xbyak::util::r14)}, - // {"r15", ir::VALUE_INT_MASK, - // reinterpret_cast(&Xbyak::util::r15)}, - {"xmm6", ir::VALUE_FLOAT_MASK, - reinterpret_cast(&Xbyak::util::xmm6)}, - {"xmm7", ir::VALUE_FLOAT_MASK, - reinterpret_cast(&Xbyak::util::xmm7)}, - {"xmm8", ir::VALUE_FLOAT_MASK, - reinterpret_cast(&Xbyak::util::xmm8)}, - {"xmm9", ir::VALUE_FLOAT_MASK, - reinterpret_cast(&Xbyak::util::xmm9)}, - {"xmm10", ir::VALUE_FLOAT_MASK, - reinterpret_cast(&Xbyak::util::xmm10)}, - {"xmm11", ir::VALUE_VECTOR_MASK, - reinterpret_cast(&Xbyak::util::xmm11)}, - {"xmm12", ir::VALUE_VECTOR_MASK, - reinterpret_cast(&Xbyak::util::xmm12)}, - {"xmm13", ir::VALUE_VECTOR_MASK, - reinterpret_cast(&Xbyak::util::xmm13)}, - {"xmm14", ir::VALUE_VECTOR_MASK, - reinterpret_cast(&Xbyak::util::xmm14)}, - {"xmm15", ir::VALUE_VECTOR_MASK, - reinterpret_cast(&Xbyak::util::xmm15)}}; - -const int x64_num_registers = sizeof(x64_registers) / sizeof(Register); - #if PLATFORM_WINDOWS const int x64_arg0_idx = Xbyak::Operand::RCX; const int x64_arg1_idx = Xbyak::Operand::RDX; @@ -95,166 +64,351 @@ const int x64_arg2_idx = Xbyak::Operand::RDX; #endif const int x64_tmp0_idx = Xbyak::Operand::R10; const int x64_tmp1_idx = Xbyak::Operand::R11; -} -} -} -} -// this will break down if running two instances of the x64 backend, but it's -// extremely useful when profiling to group JITd blocks of code with an actual -// symbol name +const Xbyak::Reg64 arg0(x64_arg0_idx); +const Xbyak::Reg64 arg1(x64_arg1_idx); +const Xbyak::Reg64 arg2(x64_arg2_idx); +const Xbyak::Reg64 tmp0(x64_tmp0_idx); +const Xbyak::Reg64 tmp1(x64_tmp1_idx); + +const register_def_t x64_registers[] = { + {"rbx", VALUE_INT_MASK, reinterpret_cast(&Xbyak::util::rbx)}, + {"rbp", VALUE_INT_MASK, reinterpret_cast(&Xbyak::util::rbp)}, + {"r12", VALUE_INT_MASK, reinterpret_cast(&Xbyak::util::r12)}, + {"r13", VALUE_INT_MASK, reinterpret_cast(&Xbyak::util::r13)}, + // {"r14", VALUE_INT_MASK, + // reinterpret_cast(&Xbyak::util::r14)}, + // {"r15", VALUE_INT_MASK, + // reinterpret_cast(&Xbyak::util::r15)}, + {"xmm6", VALUE_FLOAT_MASK, + reinterpret_cast(&Xbyak::util::xmm6)}, + {"xmm7", VALUE_FLOAT_MASK, + reinterpret_cast(&Xbyak::util::xmm7)}, + {"xmm8", VALUE_FLOAT_MASK, + reinterpret_cast(&Xbyak::util::xmm8)}, + {"xmm9", VALUE_FLOAT_MASK, + reinterpret_cast(&Xbyak::util::xmm9)}, + {"xmm10", VALUE_FLOAT_MASK, + reinterpret_cast(&Xbyak::util::xmm10)}, + {"xmm11", VALUE_VECTOR_MASK, + reinterpret_cast(&Xbyak::util::xmm11)}, + {"xmm12", VALUE_VECTOR_MASK, + reinterpret_cast(&Xbyak::util::xmm12)}, + {"xmm13", VALUE_VECTOR_MASK, + reinterpret_cast(&Xbyak::util::xmm13)}, + {"xmm14", VALUE_VECTOR_MASK, + reinterpret_cast(&Xbyak::util::xmm14)}, + {"xmm15", VALUE_VECTOR_MASK, + reinterpret_cast(&Xbyak::util::xmm15)}}; + +const int x64_num_registers = sizeof(x64_registers) / sizeof(register_def_t); + +// +// x64 code buffer. this will break down if running two instances of the x64 +// backend, but it's extremely useful when profiling to group JITd blocks of +// code with an actual symbol name +// static const size_t x64_code_size = 1024 * 1024 * 8; -static uint8_t x64_codegen[x64_code_size]; +static uint8_t x64_code[x64_code_size]; -X64Backend::X64Backend(const MemoryInterface &memif) - : Backend(memif), emitter_(memif, x64_codegen, x64_code_size) { - CHECK_EQ(cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_), CS_ERR_OK); +// +// x64 emitters for each ir op +// +struct x64_backend_s; - Xbyak::CodeArray::protect(x64_codegen, x64_code_size, true); +typedef void (*X64Emit)(struct x64_backend_s *, Xbyak::CodeGenerator &, + const ir_instr_t *); - Reset(); +static X64Emit x64_emitters[NUM_OPS]; + +#define EMITTER(op) \ + void op(struct x64_backend_s *, Xbyak::CodeGenerator &, const ir_instr_t *); \ + static struct _x64_##op##_init { \ + _x64_##op##_init() { \ + x64_emitters[OP_##op] = &op; \ + } \ + } x64_##op##_init; \ + void op(struct x64_backend_s *backend, Xbyak::CodeGenerator &e, \ + const ir_instr_t *instr) + +// +// xmm constants. SSE / AVX provides no support for loading a constant into an +// xmm register, so instead frequently used constants are emitted to the code +// buffer and used as memory operands +// +typedef enum { + XMM_CONST_ABS_MASK_PS, + XMM_CONST_ABS_MASK_PD, + XMM_CONST_SIGN_MASK_PS, + XMM_CONST_SIGN_MASK_PD, + NUM_XMM_CONST, +} xmm_constant_t; + +typedef struct x64_backend_s { + jit_backend_t base; + mem_interface_t memif; + + Xbyak::CodeGenerator *codegen; + csh capstone_handle; + + Xbyak::Label xmm_const[NUM_XMM_CONST]; + void (*load_thunk[16])(); + void (*store_thunk)(); + + bool modified[x64_num_registers]; + int num_temps; +} x64_backend_t; + +const Xbyak::Reg x64_get_register(x64_backend_t *backend, const ir_value_t *v) { + auto &e = *backend->codegen; + + // if the value is a local or constant, copy it to a tempory register, else + // return the register allocated for it + if (ir_is_constant(v)) { + CHECK_LT(backend->num_temps, 2); + + Xbyak::Reg tmp = backend->num_temps++ ? tmp1 : tmp0; + + switch (v->type) { + case VALUE_I8: + tmp = tmp.cvt8(); + break; + case VALUE_I16: + tmp = tmp.cvt16(); + break; + case VALUE_I32: + tmp = tmp.cvt32(); + break; + case VALUE_I64: + // no conversion needed + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + + // copy value to the temporary register + e.mov(tmp, ir_zext_constant(v)); + + return tmp; + } + + int i = v->reg; + CHECK_NE(i, NO_REGISTER); + + const Xbyak::Reg ® = + *reinterpret_cast(x64_registers[i].data); + CHECK(reg.isREG()); + + switch (v->type) { + case VALUE_I8: + return reg.cvt8(); + case VALUE_I16: + return reg.cvt16(); + case VALUE_I32: + return reg.cvt32(); + case VALUE_I64: + return reg; + default: + LOG_FATAL("Unexpected value type"); + break; + } } -X64Backend::~X64Backend() { - cs_close(&capstone_handle_); +const Xbyak::Xmm x64_get_xmm_register(x64_backend_t *backend, + const ir_value_t *v) { + auto &e = *backend->codegen; + + // if the value isn't allocated a XMM register copy it to a temporary XMM, + // register, else return the XMM register allocated for it + if (ir_is_constant(v)) { + // copy value to the temporary register + if (v->type == VALUE_F32) { + float val = v->f32; + e.mov(e.eax, *(int32_t *)&val); + e.vmovd(e.xmm1, e.eax); + } else { + double val = v->f64; + e.mov(e.rax, *(int64_t *)&val); + e.vmovq(e.xmm1, e.rax); + } + return e.xmm1; + } + + int i = v->reg; + CHECK_NE(i, NO_REGISTER); + + const Xbyak::Xmm &xmm = + *reinterpret_cast(x64_registers[i].data); + CHECK(xmm.isXMM()); + return xmm; } -const Register *X64Backend::registers() const { - return x64_registers; +const Xbyak::Address x64_get_xmm_constant(x64_backend_t *backend, + xmm_constant_t c) { + auto &e = *backend->codegen; + + return e.ptr[e.rip + backend->xmm_const[c]]; } -int X64Backend::num_registers() const { - return sizeof(x64_registers) / sizeof(Register); +static bool x64_can_encode_as_imm(const ir_value_t *v) { + if (!ir_is_constant(v)) { + return false; + } + + return v->type <= VALUE_I32; } -void X64Backend::Reset() { - emitter_.Reset(); +static bool x64_is_callee_saved(const Xbyak::Reg ®) { + if (reg.isXMM()) { + return false; + } - EmitThunks(); + static bool callee_saved[16] = { + false, // RAX + false, // RCX + false, // RDX + true, // RBX + false, // RSP + true, // RBP +#if PLATFORM_WINDOWS + true, // RSI + true, // RDI +#else + false, // RSI + false, // RDI +#endif + false, // R8 + false, // R9 + false, // R10 + false, // R11 + true, // R12 + true, // R13 + true, // R14 + true, // R15 + }; + + return callee_saved[reg.getIdx()]; } -const uint8_t *X64Backend::AssembleCode(ir::IRBuilder &builder, int *size) { - // try to generate the x64 code. if the code buffer overflows let the backend - // know so it can reset the cache and try again - const uint8_t *fn = nullptr; +static void x64_emit_prolog(x64_backend_t *backend, ir_t *ir, + int *out_stack_size) { + auto &e = *backend->codegen; - try { - fn = emitter_.Emit(builder, size); - } catch (const Xbyak::Error &e) { - if (e != Xbyak::ERR_CODE_IS_TOO_BIG) { - LOG_FATAL("X64 codegen failure, %s", e.what()); + int stack_size = STACK_SIZE + ir->locals_size; + + // stack must be 16 byte aligned + stack_size = align_up(stack_size, 16); + + // add 8 for return address which will be pushed when this is called + stack_size += 8; + + CHECK_EQ((stack_size + 8) % 16, 0); + + // mark which registers have been modified + memset(backend->modified, 0, sizeof(backend->modified)); + + list_for_each_entry(instr, &ir->instrs, ir_instr_t, it) { + ir_value_t *result = instr->result; + + if (!result) { + continue; + } + + backend->modified[result->reg] = true; + } + + // push the callee-saved registers which have been modified + int pushed = 2; + + // always used by guest ctx and memory pointers + e.push(e.r15); + e.push(e.r14); + + for (int i = 0; i < x64_num_registers; i++) { + const Xbyak::Reg ® = + *reinterpret_cast(x64_registers[i].data); + + if (x64_is_callee_saved(reg) && backend->modified[i]) { + e.push(reg); + pushed++; } } + // if an odd amount of push instructions are emitted stack_size needs to be + // adjusted to keep the stack aligned + if ((pushed % 2) == 1) { + stack_size += 8; + } + + // adjust stack pointer + e.sub(e.rsp, stack_size); + + // copy guest context and memory base to argument registers + e.mov(e.r14, reinterpret_cast(backend->memif.ctx_base)); + e.mov(e.r15, reinterpret_cast(backend->memif.mem_base)); + + *out_stack_size = stack_size; +} + +static void x64_emit_body(x64_backend_t *backend, ir_t *ir) { + list_for_each_entry(instr, &ir->instrs, ir_instr_t, it) { + X64Emit emit = x64_emitters[instr->op]; + CHECK(emit, "Failed to find emitter for %s", ir_op_names[instr->op]); + + // reset temp count used by GetRegister + backend->num_temps = 0; + + emit(backend, *backend->codegen, instr); + } +} + +static void x64_emit_epilog(x64_backend_t *backend, ir_t *ir, int stack_size) { + auto &e = *backend->codegen; + + // adjust stack pointer + e.add(e.rsp, stack_size); + + // pop callee-saved registers which have been modified + for (int i = x64_num_registers - 1; i >= 0; i--) { + const Xbyak::Reg ® = + *reinterpret_cast(x64_registers[i].data); + + if (x64_is_callee_saved(reg) && backend->modified[i]) { + e.pop(reg); + } + } + + // pop r14 and r15 + e.pop(e.r14); + e.pop(e.r15); + + e.ret(); +} + +const uint8_t *x64_emit(x64_backend_t *backend, ir_t *ir, int *size) { + // PROFILER_RUNTIME("X64Emitter::Emit"); + + const uint8_t *fn = backend->codegen->getCurr(); + + int stack_size = 0; + x64_emit_prolog(backend, ir, &stack_size); + x64_emit_body(backend, ir); + x64_emit_epilog(backend, ir, stack_size); + + *size = backend->codegen->getCurr() - fn; + return fn; } -void X64Backend::DumpCode(const uint8_t *host_addr, int size) { - cs_insn *insns; - size_t count = cs_disasm(capstone_handle_, host_addr, size, 0, 0, &insns); - CHECK(count); - - for (size_t i = 0; i < count; i++) { - cs_insn &insn = insns[i]; - LOG_INFO("0x%" PRIx64 ":\t%s\t\t%s", insn.address, insn.mnemonic, - insn.op_str); - } - - cs_free(insns, count); -} - -bool X64Backend::HandleFastmemException(struct re_exception_s *ex) { - const uint8_t *data = reinterpret_cast(ex->thread_state.rip); - - // it's assumed a mov has triggered the exception - X64Mov mov; - if (!X64Disassembler::DecodeMov(data, &mov)) { - return false; - } - - // figure out the guest address that was being accessed - const uint8_t *fault_addr = reinterpret_cast(ex->fault_addr); - const uint8_t *protected_start = - reinterpret_cast(memif_.mem_base); - uint32_t guest_addr = static_cast(fault_addr - protected_start); - - // instead of handling the dynamic callback from inside of the exception - // handler, force rip to the beginning of a thunk which will invoke the - // callback once the exception handler has exited. this frees the callbacks - // from any restrictions imposed by an exception handler, and also prevents - // a possible recursive exceptions - - // push the return address (the next instruction after the current mov) to - // the stack. also, adjust the stack for the return address, with an extra - // 8 bytes to keep it aligned - store(reinterpret_cast(ex->thread_state.rsp - 8), - ex->thread_state.rip + mov.length); - ex->thread_state.rsp -= STACK_SHADOW_SPACE + 8 + 8; - CHECK(ex->thread_state.rsp % 16 == 0); - - if (mov.is_load) { - // prep argument registers (memory object, guest_addr) for read function - ex->thread_state.r[x64_arg0_idx] = - reinterpret_cast(memif_.mem_self); - ex->thread_state.r[x64_arg1_idx] = static_cast(guest_addr); - - // prep function call address for thunk - switch (mov.operand_size) { - case 1: - ex->thread_state.rax = reinterpret_cast(memif_.r8); - break; - case 2: - ex->thread_state.rax = reinterpret_cast(memif_.r16); - break; - case 4: - ex->thread_state.rax = reinterpret_cast(memif_.r32); - break; - case 8: - ex->thread_state.rax = reinterpret_cast(memif_.r64); - break; - } - - // resume execution in the thunk once the exception handler exits - ex->thread_state.rip = reinterpret_cast(load_thunk_[mov.reg]); - } else { - // prep argument registers (memory object, guest_addr, value) for write - // function - ex->thread_state.r[x64_arg0_idx] = - reinterpret_cast(memif_.mem_self); - ex->thread_state.r[x64_arg1_idx] = static_cast(guest_addr); - ex->thread_state.r[x64_arg2_idx] = ex->thread_state.r[mov.reg]; - - // prep function call address for thunk - switch (mov.operand_size) { - case 1: - ex->thread_state.rax = reinterpret_cast(memif_.w8); - break; - case 2: - ex->thread_state.rax = reinterpret_cast(memif_.w16); - break; - case 4: - ex->thread_state.rax = reinterpret_cast(memif_.w32); - break; - case 8: - ex->thread_state.rax = reinterpret_cast(memif_.w64); - break; - } - - // resume execution in the thunk once the exception handler exits - ex->thread_state.rip = reinterpret_cast(store_thunk_); - } - - return true; -} - -void X64Backend::EmitThunks() { - auto &e = emitter_; +static void x64_emit_thunks(x64_backend_t *backend) { + auto &e = *backend->codegen; { for (int i = 0; i < 16; i++) { e.align(32); - load_thunk_[i] = e.getCurr(); + backend->load_thunk[i] = e.getCurr(); Xbyak::Reg64 dst(i); e.call(e.rax); @@ -267,10 +421,1252 @@ void X64Backend::EmitThunks() { { e.align(32); - store_thunk_ = e.getCurr(); + backend->store_thunk = e.getCurr(); e.call(e.rax); e.add(e.rsp, STACK_SHADOW_SPACE + 8); e.ret(); } } + +static void x64_emit_constants(x64_backend_t *backend) { + auto &e = *backend->codegen; + + e.L(backend->xmm_const[XMM_CONST_ABS_MASK_PS]); + e.dq(INT64_C(0x7fffffff7fffffff)); + e.dq(INT64_C(0x7fffffff7fffffff)); + + e.L(backend->xmm_const[XMM_CONST_ABS_MASK_PD]); + e.dq(INT64_C(0x7fffffffffffffff)); + e.dq(INT64_C(0x7fffffffffffffff)); + + e.L(backend->xmm_const[XMM_CONST_SIGN_MASK_PS]); + e.dq(INT64_C(0x8000000080000000)); + e.dq(INT64_C(0x8000000080000000)); + + e.L(backend->xmm_const[XMM_CONST_SIGN_MASK_PD]); + e.dq(INT64_C(0x8000000000000000)); + e.dq(INT64_C(0x8000000000000000)); +} + +static void x64_reset(x64_backend_t *backend) { + backend->codegen->reset(); + + x64_emit_thunks(backend); + x64_emit_constants(backend); +} + +static const uint8_t *x64_assemble_code(x64_backend_t *backend, ir_t *ir, + int *size) { + // try to generate the x64 code. if the code buffer overflows let the backend + // know so it can reset the cache and try again + const uint8_t *fn = nullptr; + + try { + fn = x64_emit(backend, ir, size); + } catch (const Xbyak::Error &e) { + if (e != Xbyak::ERR_CODE_IS_TOO_BIG) { + LOG_FATAL("X64 codegen failure, %s", e.what()); + } + } + + return fn; +} + +static void x64_dump_code(x64_backend_t *backend, const uint8_t *host_addr, + int size) { + cs_insn *insns; + size_t count = + cs_disasm(backend->capstone_handle, host_addr, size, 0, 0, &insns); + CHECK(count); + + for (size_t i = 0; i < count; i++) { + cs_insn &insn = insns[i]; + LOG_INFO("0x%" PRIx64 ":\t%s\t\t%s", insn.address, insn.mnemonic, + insn.op_str); + } + + cs_free(insns, count); +} + +static bool x64_handle_fastmem_exception(x64_backend_t *backend, + struct re_exception_s *ex) { + const uint8_t *data = reinterpret_cast(ex->thread_state.rip); + + // it's assumed a mov has triggered the exception + x64_mov_t mov; + if (!x64_decode_mov(data, &mov)) { + return false; + } + + // figure out the guest address that was being accessed + const uint8_t *fault_addr = reinterpret_cast(ex->fault_addr); + const uint8_t *protected_start = + reinterpret_cast(backend->memif.mem_base); + uint32_t guest_addr = static_cast(fault_addr - protected_start); + + // instead of handling the dynamic callback from inside of the exception + // handler, force rip to the beginning of a thunk which will invoke the + // callback once the exception handler has exited. this frees the callbacks + // from any restrictions imposed by an exception handler, and also prevents + // a possible recursive exceptions + + // push the return address (the next instruction after the current mov) to + // the stack. also, adjust the stack for the return address, with an extra + // 8 bytes to keep it aligned + *(uintptr_t *)(ex->thread_state.rsp - 8) = ex->thread_state.rip + mov.length; + ex->thread_state.rsp -= STACK_SHADOW_SPACE + 8 + 8; + CHECK(ex->thread_state.rsp % 16 == 0); + + if (mov.is_load) { + // prep argument registers (memory object, guest_addr) for read function + ex->thread_state.r[x64_arg0_idx] = + reinterpret_cast(backend->memif.mem_self); + ex->thread_state.r[x64_arg1_idx] = static_cast(guest_addr); + + // prep function call address for thunk + switch (mov.operand_size) { + case 1: + ex->thread_state.rax = reinterpret_cast(backend->memif.r8); + break; + case 2: + ex->thread_state.rax = reinterpret_cast(backend->memif.r16); + break; + case 4: + ex->thread_state.rax = reinterpret_cast(backend->memif.r32); + break; + case 8: + ex->thread_state.rax = reinterpret_cast(backend->memif.r64); + break; + } + + // resume execution in the thunk once the exception handler exits + ex->thread_state.rip = + reinterpret_cast(backend->load_thunk[mov.reg]); + } else { + // prep argument registers (memory object, guest_addr, value) for write + // function + ex->thread_state.r[x64_arg0_idx] = + reinterpret_cast(backend->memif.mem_self); + ex->thread_state.r[x64_arg1_idx] = static_cast(guest_addr); + ex->thread_state.r[x64_arg2_idx] = ex->thread_state.r[mov.reg]; + + // prep function call address for thunk + switch (mov.operand_size) { + case 1: + ex->thread_state.rax = reinterpret_cast(backend->memif.w8); + break; + case 2: + ex->thread_state.rax = reinterpret_cast(backend->memif.w16); + break; + case 4: + ex->thread_state.rax = reinterpret_cast(backend->memif.w32); + break; + case 8: + ex->thread_state.rax = reinterpret_cast(backend->memif.w64); + break; + } + + // resume execution in the thunk once the exception handler exits + ex->thread_state.rip = reinterpret_cast(backend->store_thunk); + } + + return true; +} + +EMITTER(LOAD_HOST) { + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (ir_is_float(instr->result->type)) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_F32: + e.vmovss(result, e.dword[a]); + break; + case VALUE_F64: + e.vmovsd(result, e.qword[a]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_I8: + e.mov(result, e.byte[a]); + break; + case VALUE_I16: + e.mov(result, e.word[a]); + break; + case VALUE_I32: + e.mov(result, e.dword[a]); + break; + case VALUE_I64: + e.mov(result, e.qword[a]); + break; + default: + LOG_FATAL("Unexpected load result type"); + break; + } + } +} + +EMITTER(STORE_HOST) { + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (ir_is_float(instr->arg[1]->type)) { + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_F32: + e.vmovss(e.dword[a], b); + break; + case VALUE_F64: + e.vmovsd(e.qword[a], b); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_I8: + e.mov(e.byte[a], b); + break; + case VALUE_I16: + e.mov(e.word[a], b); + break; + case VALUE_I32: + e.mov(e.dword[a], b); + break; + case VALUE_I64: + e.mov(e.qword[a], b); + break; + default: + LOG_FATAL("Unexpected store value type"); + break; + } + } +} + +EMITTER(LOAD_FAST) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + switch (instr->result->type) { + case VALUE_I8: + e.mov(result, e.byte[a.cvt64() + e.r15]); + break; + case VALUE_I16: + e.mov(result, e.word[a.cvt64() + e.r15]); + break; + case VALUE_I32: + e.mov(result, e.dword[a.cvt64() + e.r15]); + break; + case VALUE_I64: + e.mov(result, e.qword[a.cvt64() + e.r15]); + break; + default: + LOG_FATAL("Unexpected load result type"); + break; + } +} + +EMITTER(STORE_FAST) { + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_I8: + e.mov(e.byte[a.cvt64() + e.r15], b); + break; + case VALUE_I16: + e.mov(e.word[a.cvt64() + e.r15], b); + break; + case VALUE_I32: + e.mov(e.dword[a.cvt64() + e.r15], b); + break; + case VALUE_I64: + e.mov(e.qword[a.cvt64() + e.r15], b); + break; + default: + LOG_FATAL("Unexpected store value type"); + break; + } +} + +EMITTER(LOAD_SLOW) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + void *fn = nullptr; + switch (instr->result->type) { + case VALUE_I8: + fn = reinterpret_cast(backend->memif.r8); + break; + case VALUE_I16: + fn = reinterpret_cast(backend->memif.r16); + break; + case VALUE_I32: + fn = reinterpret_cast(backend->memif.r32); + break; + case VALUE_I64: + fn = reinterpret_cast(backend->memif.r64); + break; + default: + LOG_FATAL("Unexpected load result type"); + break; + } + + e.mov(arg0, reinterpret_cast(backend->memif.mem_self)); + e.mov(arg1, a); + e.call(reinterpret_cast(fn)); + e.mov(result, e.rax); +} + +EMITTER(STORE_SLOW) { + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + + void *fn = nullptr; + switch (instr->arg[1]->type) { + case VALUE_I8: + fn = reinterpret_cast(backend->memif.w8); + break; + case VALUE_I16: + fn = reinterpret_cast(backend->memif.w16); + break; + case VALUE_I32: + fn = reinterpret_cast(backend->memif.w32); + break; + case VALUE_I64: + fn = reinterpret_cast(backend->memif.w64); + break; + default: + LOG_FATAL("Unexpected store value type"); + break; + } + + e.mov(arg0, reinterpret_cast(backend->memif.mem_self)); + e.mov(arg1, a); + e.mov(arg2, b); + e.call(reinterpret_cast(fn)); +} + +EMITTER(LOAD_CONTEXT) { + int offset = instr->arg[0]->i32; + + if (ir_is_vector(instr->result->type)) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_V128: + e.movups(result, e.ptr[e.r14 + offset]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else if (ir_is_float(instr->result->type)) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_F32: + e.vmovss(result, e.dword[e.r14 + offset]); + break; + case VALUE_F64: + e.vmovsd(result, e.qword[e.r14 + offset]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_I8: + e.mov(result, e.byte[e.r14 + offset]); + break; + case VALUE_I16: + e.mov(result, e.word[e.r14 + offset]); + break; + case VALUE_I32: + e.mov(result, e.dword[e.r14 + offset]); + break; + case VALUE_I64: + e.mov(result, e.qword[e.r14 + offset]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } +} + +EMITTER(STORE_CONTEXT) { + int offset = instr->arg[0]->i32; + + if (ir_is_constant(instr->arg[1])) { + switch (instr->arg[1]->type) { + case VALUE_I8: + e.mov(e.byte[e.r14 + offset], instr->arg[1]->i8); + break; + case VALUE_I16: + e.mov(e.word[e.r14 + offset], instr->arg[1]->i16); + break; + case VALUE_I32: + case VALUE_F32: + e.mov(e.dword[e.r14 + offset], instr->arg[1]->i32); + break; + case VALUE_I64: + case VALUE_F64: + e.mov(e.qword[e.r14 + offset], instr->arg[1]->i64); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } else { + if (ir_is_vector(instr->arg[1]->type)) { + const Xbyak::Xmm src = x64_get_xmm_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_V128: + e.vmovups(e.ptr[e.r14 + offset], src); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else if (ir_is_float(instr->arg[1]->type)) { + const Xbyak::Xmm src = x64_get_xmm_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_F32: + e.vmovss(e.dword[e.r14 + offset], src); + break; + case VALUE_F64: + e.vmovsd(e.qword[e.r14 + offset], src); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } else { + const Xbyak::Reg src = x64_get_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_I8: + e.mov(e.byte[e.r14 + offset], src); + break; + case VALUE_I16: + e.mov(e.word[e.r14 + offset], src); + break; + case VALUE_I32: + e.mov(e.dword[e.r14 + offset], src); + break; + case VALUE_I64: + e.mov(e.qword[e.r14 + offset], src); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } + } +} + +EMITTER(LOAD_LOCAL) { + int offset = STACK_OFFSET_LOCALS + instr->arg[0]->i32; + + if (ir_is_vector(instr->result->type)) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_V128: + e.movups(result, e.ptr[e.rsp + offset]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else if (ir_is_float(instr->result->type)) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_F32: + e.vmovss(result, e.dword[e.rsp + offset]); + break; + case VALUE_F64: + e.vmovsd(result, e.qword[e.rsp + offset]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + + switch (instr->result->type) { + case VALUE_I8: + e.mov(result, e.byte[e.rsp + offset]); + break; + case VALUE_I16: + e.mov(result, e.word[e.rsp + offset]); + break; + case VALUE_I32: + e.mov(result, e.dword[e.rsp + offset]); + break; + case VALUE_I64: + e.mov(result, e.qword[e.rsp + offset]); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } +} + +EMITTER(STORE_LOCAL) { + int offset = STACK_OFFSET_LOCALS + instr->arg[0]->i32; + + CHECK(!ir_is_constant(instr->arg[1])); + + if (ir_is_vector(instr->arg[1]->type)) { + const Xbyak::Xmm src = x64_get_xmm_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_V128: + e.vmovups(e.ptr[e.rsp + offset], src); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } + } else if (ir_is_float(instr->arg[1]->type)) { + const Xbyak::Xmm src = x64_get_xmm_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_F32: + e.vmovss(e.dword[e.rsp + offset], src); + break; + case VALUE_F64: + e.vmovsd(e.qword[e.rsp + offset], src); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } else { + const Xbyak::Reg src = x64_get_register(backend, instr->arg[1]); + + switch (instr->arg[1]->type) { + case VALUE_I8: + e.mov(e.byte[e.rsp + offset], src); + break; + case VALUE_I16: + e.mov(e.word[e.rsp + offset], src); + break; + case VALUE_I32: + e.mov(e.dword[e.rsp + offset], src); + break; + case VALUE_I64: + e.mov(e.qword[e.rsp + offset], src); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } +} + +EMITTER(FTOI) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + switch (instr->result->type) { + case VALUE_I32: + CHECK_EQ(instr->arg[0]->type, VALUE_F32); + e.cvttss2si(result, a); + break; + case VALUE_I64: + CHECK_EQ(instr->arg[0]->type, VALUE_F64); + e.cvttsd2si(result, a); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } +} + +EMITTER(ITOF) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + switch (instr->result->type) { + case VALUE_F32: + CHECK_EQ(instr->arg[0]->type, VALUE_I32); + e.cvtsi2ss(result, a); + break; + case VALUE_F64: + CHECK_EQ(instr->arg[0]->type, VALUE_I64); + e.cvtsi2sd(result, a); + break; + default: + LOG_FATAL("Unexpected result type"); + break; + } +} + +EMITTER(SEXT) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (a == result) { + // already the correct width + return; + } + + if (result.isBit(64) && a.isBit(32)) { + e.movsxd(result.cvt64(), a); + } else { + e.movsx(result, a); + } +} + +EMITTER(ZEXT) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (a == result) { + // already the correct width + return; + } + + if (result.isBit(64) && a.isBit(32)) { + // mov will automatically zero fill the upper 32-bits + e.mov(result.cvt32(), a); + } else { + e.movzx(result, a); + } +} + +EMITTER(TRUNC) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result.getIdx() == a.getIdx()) { + // noop if already the same register. note, this means the high order bits + // of the result won't be cleared, but I believe that is fine + return; + } + + Xbyak::Reg truncated = a; + switch (instr->result->type) { + case VALUE_I8: + truncated = a.cvt8(); + break; + case VALUE_I16: + truncated = a.cvt16(); + break; + case VALUE_I32: + truncated = a.cvt32(); + break; + default: + LOG_FATAL("Unexpected value type"); + } + + if (truncated.isBit(32)) { + // mov will automatically zero fill the upper 32-bits + e.mov(result, truncated); + } else { + e.movzx(result.cvt32(), truncated); + } +} + +EMITTER(FEXT) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + e.cvtss2sd(result, a); +} + +EMITTER(FTRUNC) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + e.cvtsd2ss(result, a); +} + +EMITTER(SELECT) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + const Xbyak::Reg cond = x64_get_register(backend, instr->arg[2]); + + // convert result to Reg32e to please xbyak + CHECK_GE(result.getBit(), 32); + Xbyak::Reg32e result_32e(result.getIdx(), result.getBit()); + + e.test(cond, cond); + if (result_32e != a) { + e.cmovnz(result_32e, a); + } + e.cmovz(result_32e, b); +} + +EMITTER(CMP) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.cmp(a, static_cast(ir_zext_constant(instr->arg[1]))); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.cmp(a, b); + } + + ir_cmp_t cmp = (ir_cmp_t)instr->arg[2]->i32; + + switch (cmp) { + case CMP_EQ: + e.sete(result); + break; + + case CMP_NE: + e.setne(result); + break; + + case CMP_SGE: + e.setge(result); + break; + + case CMP_SGT: + e.setg(result); + break; + + case CMP_UGE: + e.setae(result); + break; + + case CMP_UGT: + e.seta(result); + break; + + case CMP_SLE: + e.setle(result); + break; + + case CMP_SLT: + e.setl(result); + break; + + case CMP_ULE: + e.setbe(result); + break; + + case CMP_ULT: + e.setb(result); + break; + + default: + LOG_FATAL("Unexpected comparison type"); + } +} + +EMITTER(FCMP) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + if (instr->arg[0]->type == VALUE_F32) { + e.comiss(a, b); + } else { + e.comisd(a, b); + } + + ir_cmp_t cmp = (ir_cmp_t)instr->arg[2]->i32; + + switch (cmp) { + case CMP_EQ: + e.sete(result); + break; + + case CMP_NE: + e.setne(result); + break; + + case CMP_SGE: + e.setae(result); + break; + + case CMP_SGT: + e.seta(result); + break; + + case CMP_SLE: + e.setbe(result); + break; + + case CMP_SLT: + e.setb(result); + break; + + default: + LOG_FATAL("Unexpected comparison type"); + } +} + +EMITTER(ADD) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.add(result, (uint32_t)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.add(result, b); + } +} + +EMITTER(SUB) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.sub(result, (uint32_t)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.sub(result, b); + } +} + +EMITTER(SMUL) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + + if (result != a) { + e.mov(result, a); + } + + e.imul(result, b); +} + +EMITTER(UMUL) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + + if (result != a) { + e.mov(result, a); + } + + e.imul(result, b); +} + +EMITTER(DIV) { + LOG_FATAL("Unsupported"); +} + +EMITTER(NEG) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + e.neg(result); +} + +EMITTER(ABS) { + LOG_FATAL("Unsupported"); + // e.mov(e.rax, *result); + // e.neg(e.rax); + // e.cmovl(reinterpret_cast(result)->cvt32(), e.rax); +} + +EMITTER(FADD) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + if (instr->result->type == VALUE_F32) { + e.vaddss(result, a, b); + } else { + e.vaddsd(result, a, b); + } +} + +EMITTER(FSUB) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + if (instr->result->type == VALUE_F32) { + e.vsubss(result, a, b); + } else { + e.vsubsd(result, a, b); + } +} + +EMITTER(FMUL) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + if (instr->result->type == VALUE_F32) { + e.vmulss(result, a, b); + } else { + e.vmulsd(result, a, b); + } +} + +EMITTER(FDIV) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + if (instr->result->type == VALUE_F32) { + e.vdivss(result, a, b); + } else { + e.vdivsd(result, a, b); + } +} + +EMITTER(FNEG) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + if (instr->result->type == VALUE_F32) { + e.vxorps(result, a, x64_get_xmm_constant(backend, XMM_CONST_SIGN_MASK_PS)); + } else { + e.vxorpd(result, a, x64_get_xmm_constant(backend, XMM_CONST_SIGN_MASK_PD)); + } +} + +EMITTER(FABS) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + if (instr->result->type == VALUE_F32) { + e.vandps(result, a, x64_get_xmm_constant(backend, XMM_CONST_ABS_MASK_PS)); + } else { + e.vandpd(result, a, x64_get_xmm_constant(backend, XMM_CONST_ABS_MASK_PD)); + } +} + +EMITTER(SQRT) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + if (instr->result->type == VALUE_F32) { + e.vsqrtss(result, a); + } else { + e.vsqrtsd(result, a); + } +} + +EMITTER(VBROADCAST) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + + e.vbroadcastss(result, a); +} + +EMITTER(VADD) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + e.vaddps(result, a, b); +} + +EMITTER(VDOT) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + e.vdpps(result, a, b, 0b11110001); +} + +EMITTER(VMUL) { + const Xbyak::Xmm result = x64_get_xmm_register(backend, instr->result); + const Xbyak::Xmm a = x64_get_xmm_register(backend, instr->arg[0]); + const Xbyak::Xmm b = x64_get_xmm_register(backend, instr->arg[1]); + + e.vmulps(result, a, b); +} + +EMITTER(AND) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.and (result, (uint32_t)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.and (result, b); + } +} + +EMITTER(OR) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.or (result, (uint32_t)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.or (result, b); + } +} + +EMITTER(XOR) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.xor (result, (uint32_t)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.xor (result, b); + } +} + +EMITTER(NOT) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + e.not(result); +} + +EMITTER(SHL) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.shl(result, (int)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.mov(e.cl, b); + e.shl(result, e.cl); + } +} + +EMITTER(ASHR) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.sar(result, (int)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.mov(e.cl, b); + e.sar(result, e.cl); + } +} + +EMITTER(LSHR) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + if (result != a) { + e.mov(result, a); + } + + if (x64_can_encode_as_imm(instr->arg[1])) { + e.shr(result, (int)ir_zext_constant(instr->arg[1])); + } else { + const Xbyak::Reg b = x64_get_register(backend, instr->arg[1]); + e.mov(e.cl, b); + e.shr(result, e.cl); + } +} + +EMITTER(ASHD) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg v = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg n = x64_get_register(backend, instr->arg[1]); + + e.inLocalLabel(); + + if (result != v) { + e.mov(result, v); + } + + // check if we're shifting left or right + e.test(n, 0x80000000); + e.jnz(".shr"); + + // perform shift left + e.mov(e.cl, n); + e.sal(result, e.cl); + e.jmp(".end"); + + // perform right shift + e.L(".shr"); + e.test(n, 0x1f); + e.jz(".shr_overflow"); + e.mov(e.cl, n); + e.neg(e.cl); + e.sar(result, e.cl); + e.jmp(".end"); + + // right shift overflowed + e.L(".shr_overflow"); + e.sar(result, 31); + + // shift is done + e.L(".end"); + + e.outLocalLabel(); +} + +EMITTER(LSHD) { + const Xbyak::Reg result = x64_get_register(backend, instr->result); + const Xbyak::Reg v = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg n = x64_get_register(backend, instr->arg[1]); + + e.inLocalLabel(); + + if (result != v) { + e.mov(result, v); + } + + // check if we're shifting left or right + e.test(n, 0x80000000); + e.jnz(".shr"); + + // perform shift left + e.mov(e.cl, n); + e.shl(result, e.cl); + e.jmp(".end"); + + // perform right shift + e.L(".shr"); + e.test(n, 0x1f); + e.jz(".shr_overflow"); + e.mov(e.cl, n); + e.neg(e.cl); + e.shr(result, e.cl); + e.jmp(".end"); + + // right shift overflowed + e.L(".shr_overflow"); + e.mov(result, 0x0); + + // shift is done + e.L(".end"); + + e.outLocalLabel(); +} + +EMITTER(BRANCH) { + const Xbyak::Reg a = x64_get_register(backend, instr->arg[0]); + + e.mov(e.rax, a); +} + +EMITTER(BRANCH_COND) { + const Xbyak::Reg cond = x64_get_register(backend, instr->arg[0]); + const Xbyak::Reg true_addr = x64_get_register(backend, instr->arg[1]); + const Xbyak::Reg false_addr = x64_get_register(backend, instr->arg[2]); + + e.test(cond, cond); + e.cmovnz(e.eax, true_addr); + e.cmove(e.eax, false_addr); +} + +EMITTER(CALL_EXTERNAL) { + const Xbyak::Reg addr = x64_get_register(backend, instr->arg[0]); + + e.mov(arg0, reinterpret_cast(backend->memif.ctx_base)); + if (instr->arg[1]) { + const Xbyak::Reg arg = x64_get_register(backend, instr->arg[1]); + e.mov(arg1, arg); + } + e.mov(e.rax, addr); + e.call(e.rax); +} + +jit_backend_t *x64_create(const mem_interface_t *memif) { + x64_backend_t *backend = + reinterpret_cast(calloc(1, sizeof(x64_backend_t))); + + backend->base.registers = x64_registers; + backend->base.num_registers = sizeof(x64_registers) / sizeof(register_def_t); + backend->base.reset = (reset_cb)&x64_reset; + backend->base.assemble_code = (assemble_code_cb)&x64_assemble_code; + backend->base.dump_code = (dump_code_cb)&x64_dump_code; + backend->base.handle_fastmem_exception = + (handle_fastmem_exception_cb)&x64_handle_fastmem_exception; + + backend->memif = *memif; + + backend->codegen = new Xbyak::CodeGenerator(x64_code_size, x64_code); + + int res = cs_open(CS_ARCH_X86, CS_MODE_64, &backend->capstone_handle); + CHECK_EQ(res, CS_ERR_OK); + + x64_reset(backend); + + // protect the code buffer + int page_size = get_page_size(); + void *aligned_code = (void *)align_down((intptr_t)x64_code, page_size); + int aligned_code_size = align_up(x64_code_size, page_size); + bool success = + protect_pages(aligned_code, aligned_code_size, ACC_READWRITEEXEC); + CHECK(success); + + return (jit_backend_t *)backend; +} + +void x64_destroy(jit_backend_t *jit_backend) { + x64_backend_t *backend = (x64_backend_t *)jit_backend; + + cs_close(&backend->capstone_handle); + + delete backend->codegen; + + free(backend); +} diff --git a/src/jit/backend/x64/x64_backend.h b/src/jit/backend/x64/x64_backend.h index a8ab37f8..362059eb 100644 --- a/src/jit/backend/x64/x64_backend.h +++ b/src/jit/backend/x64/x64_backend.h @@ -1,51 +1,18 @@ #ifndef X64_BACKEND_H #define X64_BACKEND_H -#include -#include "jit/backend/backend.h" -#include "jit/backend/x64/x64_emitter.h" +#ifdef __cplusplus +extern "C" { +#endif -namespace re { -namespace jit { -namespace backend { -namespace x64 { +struct jit_backend_s; +struct mem_interface_s; -extern const Register x64_registers[]; -extern const int x64_num_registers; -extern const int x64_arg0_idx; -extern const int x64_arg1_idx; -extern const int x64_arg2_idx; -extern const int x64_tmp0_idx; -extern const int x64_tmp1_idx; +struct jit_backend_s *x64_create(const struct mem_interface_s *memif); +void x64_destroy(struct jit_backend_s *b); -typedef void (*SlowmemThunk)(); - -class X64Backend : public Backend { - public: - X64Backend(const MemoryInterface &memif); - ~X64Backend(); - - const Register *registers() const; - int num_registers() const; - - void Reset(); - - const uint8_t *AssembleCode(ir::IRBuilder &builder, int *size); - void DumpCode(const uint8_t *host_addr, int size); - - bool HandleFastmemException(struct re_exception_s *ex); - - private: - void EmitThunks(); - - csh capstone_handle_; - X64Emitter emitter_; - SlowmemThunk load_thunk_[16]; - SlowmemThunk store_thunk_; -}; -} -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/backend/x64/x64_disassembler.cc b/src/jit/backend/x64/x64_disassembler.c similarity index 68% rename from src/jit/backend/x64/x64_disassembler.cc rename to src/jit/backend/x64/x64_disassembler.c index 152975e8..09b2438e 100644 --- a/src/jit/backend/x64/x64_disassembler.cc +++ b/src/jit/backend/x64/x64_disassembler.c @@ -1,15 +1,12 @@ -#include "core/memory.h" #include "jit/backend/x64/x64_disassembler.h" -using namespace re::jit::backend::x64; - -bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { +bool x64_decode_mov(const uint8_t *data, x64_mov_t *mov) { const uint8_t *start = data; // test for operand size prefix bool has_opprefix = false; - if (load(data) == 0x66) { + if (*data == 0x66) { has_opprefix = true; data++; } @@ -22,8 +19,8 @@ bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { uint8_t rex_x = 0; uint8_t rex_b = 0; - if ((load(data) & 0xf0) == 0x40) { - rex = load(data); + if ((*data & 0xf0) == 0x40) { + rex = *data; rex_w = rex & 0b1000; rex_r = rex & 0b0100; rex_x = rex & 0b0010; @@ -41,40 +38,38 @@ bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { // MOV r16,r/m16 // MOV r32,r/m32 // MOV r64,r/m64 - if (load(data) == 0x8a || load(data) == 0x8b) { + if (*data == 0x8a || *data == 0x8b) { is_load = true; has_imm = false; - operand_size = - load(data) == 0x8a ? 1 : (has_opprefix ? 2 : (rex_w ? 8 : 4)); + operand_size = *data == 0x8a ? 1 : (has_opprefix ? 2 : (rex_w ? 8 : 4)); data++; } // MOV r/m8,r8 // MOV r/m16,r16 // MOV r/m32,r32 // MOV r/m64,r64 - else if (load(data) == 0x88 || load(data) == 0x89) { + else if (*data == 0x88 || *data == 0x89) { is_load = false; has_imm = false; - operand_size = - load(data) == 0x88 ? 1 : (has_opprefix ? 2 : (rex_w ? 8 : 4)); + operand_size = *data == 0x88 ? 1 : (has_opprefix ? 2 : (rex_w ? 8 : 4)); data++; } // MOV r8,imm8 // MOV r16,imm16 // MOV r32,imm32 - else if (load(data) == 0xb0 || load(data) == 0xb8) { + else if (*data == 0xb0 || *data == 0xb8) { is_load = true; has_imm = true; - operand_size = load(data) == 0xb0 ? 1 : (has_opprefix ? 2 : 4); + operand_size = *data == 0xb0 ? 1 : (has_opprefix ? 2 : 4); data++; } // MOV r/m8,imm8 // MOV r/m16,imm16 // MOV r/m32,imm32 - else if (load(data) == 0xc6 || load(data) == 0xc7) { + else if (*data == 0xc6 || *data == 0xc7) { is_load = false; has_imm = true; - operand_size = load(data) == 0xc6 ? 1 : (has_opprefix ? 2 : 4); + operand_size = *data == 0xc6 ? 1 : (has_opprefix ? 2 : 4); data++; } // not a supported MOV instruction @@ -83,7 +78,7 @@ bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { } // process ModR/M byte - uint8_t modrm = load(data); + uint8_t modrm = *data; uint8_t modrm_mod = (modrm & 0b11000000) >> 6; uint8_t modrm_reg = (modrm & 0b00111000) >> 3; uint8_t modrm_rm = (modrm & 0b00000111); @@ -104,7 +99,7 @@ bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { // process optional SIB byte if (modrm_rm == 0b100) { - uint8_t sib = load(data); + uint8_t sib = *data; uint8_t sib_scale = (sib & 0b11000000) >> 6; uint8_t sib_index = (sib & 0b00111000) >> 3; uint8_t sib_base = (sib & 0b00000111); @@ -125,18 +120,18 @@ bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { case 0b00: { // RIP-relative if (modrm_rm == 0b101) { - mov->disp = load(data); + mov->disp = *(uint32_t *)data; data += 4; } } break; case 0b01: { - mov->disp = load(data); + mov->disp = *data; data++; } break; case 0b10: { - mov->disp = load(data); + mov->disp = *(uint32_t *)data; data += 4; } break; } @@ -145,29 +140,29 @@ bool X64Disassembler::DecodeMov(const uint8_t *data, X64Mov *mov) { if (mov->has_imm) { switch (mov->operand_size) { case 1: { - mov->imm = load(data); + mov->imm = *data; data++; } break; case 2: { - mov->imm = load(data); + mov->imm = *(uint16_t *)data; data += 2; } break; case 4: { - mov->imm = load(data); + mov->imm = *(uint32_t *)data; data += 4; } break; case 8: { - mov->imm = load(data); + mov->imm = *(uint64_t *)data; data += 8; } break; } } // calculate total instruction length - mov->length = static_cast(data - start); + mov->length = (int)(data - start); return true; } diff --git a/src/jit/backend/x64/x64_disassembler.h b/src/jit/backend/x64/x64_disassembler.h index 18ace2e3..cce52b47 100644 --- a/src/jit/backend/x64/x64_disassembler.h +++ b/src/jit/backend/x64/x64_disassembler.h @@ -1,14 +1,14 @@ #ifndef X64_DISASSEMBLER_H #define X64_DISASSEMBLER_H +#include #include -namespace re { -namespace jit { -namespace backend { -namespace x64 { +#ifdef __cplusplus +extern "C" { +#endif -struct X64Mov { +typedef struct { int length; bool is_load; bool is_indirect; @@ -22,15 +22,12 @@ struct X64Mov { int scale; int disp; uint64_t imm; -}; +} x64_mov_t; -class X64Disassembler { - public: - static bool DecodeMov(const uint8_t *data, X64Mov *mov); -}; -} -} -} +bool x64_decode_mov(const uint8_t *data, x64_mov_t *mov); + +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/backend/x64/x64_emitter.cc b/src/jit/backend/x64/x64_emitter.cc deleted file mode 100644 index 9612aee8..00000000 --- a/src/jit/backend/x64/x64_emitter.cc +++ /dev/null @@ -1,1359 +0,0 @@ -#include -#include "core/assert.h" -#include "core/math.h" -#include "core/memory.h" -#include "core/profiler.h" -#include "jit/backend/x64/x64_backend.h" -#include "jit/backend/x64/x64_emitter.h" - -using namespace re; -using namespace re::jit; -using namespace re::jit::backend; -using namespace re::jit::backend::x64; -using namespace re::jit::ir; - -const Xbyak::Reg64 arg0(x64_arg0_idx); -const Xbyak::Reg64 arg1(x64_arg1_idx); -const Xbyak::Reg64 arg2(x64_arg2_idx); -const Xbyak::Reg64 tmp0(x64_tmp0_idx); -const Xbyak::Reg64 tmp1(x64_tmp1_idx); - -// callbacks for emitting each IR op -typedef void (*X64Emit)(X64Emitter &, const Instr *); - -static X64Emit x64_emitters[NUM_OPS]; - -#define EMITTER(op) \ - void op(X64Emitter &, const Instr *); \ - static struct _x64_##op##_init { \ - _x64_##op##_init() { \ - x64_emitters[OP_##op] = &op; \ - } \ - } x64_##op##_init; \ - void op(X64Emitter &e, const Instr *instr) - -static bool IsCalleeSaved(const Xbyak::Reg ®) { - if (reg.isXMM()) { - return false; - } - - static bool callee_saved[16] = { - false, // RAX - false, // RCX - false, // RDX - true, // RBX - false, // RSP - true, // RBP -#if PLATFORM_WINDOWS - true, // RSI - true, // RDI -#else - false, // RSI - false, // RDI -#endif - false, // R8 - false, // R9 - false, // R10 - false, // R11 - true, // R12 - true, // R13 - true, // R14 - true, // R15 - }; - - return callee_saved[reg.getIdx()]; -} - -X64Emitter::X64Emitter(const MemoryInterface &memif, void *buffer, - size_t buffer_size) - : CodeGenerator(buffer_size, buffer), memif_(memif) { - // temporary registers aren't tracked to be pushed and popped - CHECK(!IsCalleeSaved(tmp0) && !IsCalleeSaved(tmp1)); - - modified_ = new int[x64_num_registers]; - - Reset(); -} - -X64Emitter::~X64Emitter() { - delete[] modified_; -} - -void X64Emitter::Reset() { - modified_marker_ = 0; - memset(modified_, modified_marker_, sizeof(int) * x64_num_registers); - - // reset codegen buffer - reset(); - - EmitConstants(); -} - -const uint8_t *X64Emitter::Emit(IRBuilder &builder, int *size) { - // PROFILER_RUNTIME("X64Emitter::Emit"); - - const uint8_t *fn = getCurr(); - - int stack_size = 0; - EmitProlog(builder, &stack_size); - EmitBody(builder); - EmitEpilog(builder, stack_size); - - *size = getCurr() - fn; - - return fn; -} - -void X64Emitter::EmitConstants() { - L(xmm_const_[XMM_CONST_ABS_MASK_PS]); - dq(INT64_C(0x7fffffff7fffffff)); - dq(INT64_C(0x7fffffff7fffffff)); - - L(xmm_const_[XMM_CONST_ABS_MASK_PD]); - dq(INT64_C(0x7fffffffffffffff)); - dq(INT64_C(0x7fffffffffffffff)); - - L(xmm_const_[XMM_CONST_SIGN_MASK_PS]); - dq(INT64_C(0x8000000080000000)); - dq(INT64_C(0x8000000080000000)); - - L(xmm_const_[XMM_CONST_SIGN_MASK_PD]); - dq(INT64_C(0x8000000000000000)); - dq(INT64_C(0x8000000000000000)); -} - -void X64Emitter::EmitProlog(IRBuilder &builder, int *out_stack_size) { - int stack_size = STACK_SIZE + builder.locals_size(); - - // stack must be 16 byte aligned - stack_size = align_up(stack_size, 16); - - // add 8 for return address which will be pushed when this is called - stack_size += 8; - - CHECK_EQ((stack_size + 8) % 16, 0); - - // mark which registers have been modified - modified_marker_++; - - for (auto instr : builder.instrs()) { - int i = instr->reg(); - if (i == NO_REGISTER) { - continue; - } - - modified_[i] = modified_marker_; - } - - // push the callee-saved registers which have been modified - int pushed = 2; - - // always used by guest ctx and memory pointers - push(r15); - push(r14); - - for (int i = 0; i < x64_num_registers; i++) { - const Xbyak::Reg ® = - *reinterpret_cast(x64_registers[i].data); - - if (IsCalleeSaved(reg) && modified_[i] == modified_marker_) { - push(reg); - pushed++; - } - } - - // if an odd amount of push instructions are emitted stack_size needs to be - // adjusted to keep the stack aligned - if ((pushed % 2) == 1) { - stack_size += 8; - } - - // adjust stack pointer - sub(rsp, stack_size); - - // copy guest context and memory base to argument registers - mov(r14, reinterpret_cast(memif_.ctx_base)); - mov(r15, reinterpret_cast(memif_.mem_base)); - - *out_stack_size = stack_size; -} - -void X64Emitter::EmitBody(IRBuilder &builder) { - for (auto instr : builder.instrs()) { - X64Emit emit = x64_emitters[instr->op()]; - CHECK(emit, "Failed to find emitter for %s", Opnames[instr->op()]); - - // reset temp count used by GetRegister - num_temps_ = 0; - - emit(*this, instr); - } -} - -void X64Emitter::EmitEpilog(IRBuilder &builder, int stack_size) { - // adjust stack pointer - add(rsp, stack_size); - - // pop callee-saved registers which have been modified - for (int i = x64_num_registers - 1; i >= 0; i--) { - const Xbyak::Reg ® = - *reinterpret_cast(x64_registers[i].data); - - if (IsCalleeSaved(reg) && modified_[i] == modified_marker_) { - pop(reg); - } - } - - // pop r14 and r15 - pop(r14); - pop(r15); - - ret(); -} - -// If the value is a local or constant, copy it to a tempory register, else -// return the register allocated for it. -const Xbyak::Reg X64Emitter::GetRegister(const Value *v) { - if (v->constant()) { - CHECK_LT(num_temps_, 2); - - Xbyak::Reg tmp = num_temps_++ ? tmp1 : tmp0; - - switch (v->type()) { - case VALUE_I8: - tmp = tmp.cvt8(); - break; - case VALUE_I16: - tmp = tmp.cvt16(); - break; - case VALUE_I32: - tmp = tmp.cvt32(); - break; - case VALUE_I64: - // no conversion needed - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - - // copy value to the temporary register - mov(tmp, v->GetZExtValue()); - - return tmp; - } - - int i = v->reg(); - CHECK_NE(i, NO_REGISTER); - - const Xbyak::Reg ® = - *reinterpret_cast(x64_registers[i].data); - CHECK(reg.isREG()); - - switch (v->type()) { - case VALUE_I8: - return reg.cvt8(); - case VALUE_I16: - return reg.cvt16(); - case VALUE_I32: - return reg.cvt32(); - case VALUE_I64: - return reg; - default: - LOG_FATAL("Unexpected value type"); - break; - } -} - -// If the value isn't allocated a XMM register copy it to a temporary XMM, -// register, else return the XMM register allocated for it. -const Xbyak::Xmm X64Emitter::GetXmmRegister(const Value *v) { - if (v->constant()) { - // copy value to the temporary register - if (v->type() == VALUE_F32) { - float val = v->f32(); - mov(eax, load(&val)); - vmovd(xmm1, eax); - } else { - double val = v->f64(); - mov(rax, load(&val)); - vmovq(xmm1, rax); - } - return xmm1; - } - - int i = v->reg(); - CHECK_NE(i, NO_REGISTER); - - const Xbyak::Xmm &xmm = - *reinterpret_cast(x64_registers[i].data); - CHECK(xmm.isXMM()); - return xmm; -} - -const Xbyak::Address X64Emitter::GetXmmConstant(XmmConstant c) { - return ptr[rip + xmm_const_[c]]; -} - -bool X64Emitter::CanEncodeAsImmediate(const Value *v) const { - if (!v->constant()) { - return false; - } - - return v->type() <= VALUE_I32; -} - -EMITTER(LOAD_HOST) { - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (IsFloatType(instr->type())) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - - switch (instr->type()) { - case VALUE_F32: - e.vmovss(result, e.dword[a]); - break; - case VALUE_F64: - e.vmovsd(result, e.qword[a]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else { - const Xbyak::Reg result = e.GetRegister(instr); - - switch (instr->type()) { - case VALUE_I8: - e.mov(result, e.byte[a]); - break; - case VALUE_I16: - e.mov(result, e.word[a]); - break; - case VALUE_I32: - e.mov(result, e.dword[a]); - break; - case VALUE_I64: - e.mov(result, e.qword[a]); - break; - default: - LOG_FATAL("Unexpected load result type"); - break; - } - } -} - -EMITTER(STORE_HOST) { - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (IsFloatType(instr->arg1()->type())) { - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_F32: - e.vmovss(e.dword[a], b); - break; - case VALUE_F64: - e.vmovsd(e.qword[a], b); - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_I8: - e.mov(e.byte[a], b); - break; - case VALUE_I16: - e.mov(e.word[a], b); - break; - case VALUE_I32: - e.mov(e.dword[a], b); - break; - case VALUE_I64: - e.mov(e.qword[a], b); - break; - default: - LOG_FATAL("Unexpected store value type"); - break; - } - } -} - -EMITTER(LOAD_FAST) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - switch (instr->type()) { - case VALUE_I8: - e.mov(result, e.byte[a.cvt64() + e.r15]); - break; - case VALUE_I16: - e.mov(result, e.word[a.cvt64() + e.r15]); - break; - case VALUE_I32: - e.mov(result, e.dword[a.cvt64() + e.r15]); - break; - case VALUE_I64: - e.mov(result, e.qword[a.cvt64() + e.r15]); - break; - default: - LOG_FATAL("Unexpected load result type"); - break; - } -} - -EMITTER(STORE_FAST) { - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_I8: - e.mov(e.byte[a.cvt64() + e.r15], b); - break; - case VALUE_I16: - e.mov(e.word[a.cvt64() + e.r15], b); - break; - case VALUE_I32: - e.mov(e.dword[a.cvt64() + e.r15], b); - break; - case VALUE_I64: - e.mov(e.qword[a.cvt64() + e.r15], b); - break; - default: - LOG_FATAL("Unexpected store value type"); - break; - } -} - -EMITTER(LOAD_SLOW) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - void *fn = nullptr; - switch (instr->type()) { - case VALUE_I8: - fn = reinterpret_cast(e.memif().r8); - break; - case VALUE_I16: - fn = reinterpret_cast(e.memif().r16); - break; - case VALUE_I32: - fn = reinterpret_cast(e.memif().r32); - break; - case VALUE_I64: - fn = reinterpret_cast(e.memif().r64); - break; - default: - LOG_FATAL("Unexpected load result type"); - break; - } - - e.mov(arg0, reinterpret_cast(e.memif().mem_self)); - e.mov(arg1, a); - e.call(reinterpret_cast(fn)); - e.mov(result, e.rax); -} - -EMITTER(STORE_SLOW) { - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - - void *fn = nullptr; - switch (instr->arg1()->type()) { - case VALUE_I8: - fn = reinterpret_cast(e.memif().w8); - break; - case VALUE_I16: - fn = reinterpret_cast(e.memif().w16); - break; - case VALUE_I32: - fn = reinterpret_cast(e.memif().w32); - break; - case VALUE_I64: - fn = reinterpret_cast(e.memif().w64); - break; - default: - LOG_FATAL("Unexpected store value type"); - break; - } - - e.mov(arg0, reinterpret_cast(e.memif().mem_self)); - e.mov(arg1, a); - e.mov(arg2, b); - e.call(reinterpret_cast(fn)); -} - -EMITTER(LOAD_CONTEXT) { - int offset = instr->arg0()->i32(); - - if (IsVectorType(instr->type())) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - - switch (instr->type()) { - case VALUE_V128: - e.movups(result, e.ptr[e.r14 + offset]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else if (IsFloatType(instr->type())) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - - switch (instr->type()) { - case VALUE_F32: - e.vmovss(result, e.dword[e.r14 + offset]); - break; - case VALUE_F64: - e.vmovsd(result, e.qword[e.r14 + offset]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else { - const Xbyak::Reg result = e.GetRegister(instr); - - switch (instr->type()) { - case VALUE_I8: - e.mov(result, e.byte[e.r14 + offset]); - break; - case VALUE_I16: - e.mov(result, e.word[e.r14 + offset]); - break; - case VALUE_I32: - e.mov(result, e.dword[e.r14 + offset]); - break; - case VALUE_I64: - e.mov(result, e.qword[e.r14 + offset]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } -} - -EMITTER(STORE_CONTEXT) { - int offset = instr->arg0()->i32(); - - if (instr->arg1()->constant()) { - switch (instr->arg1()->type()) { - case VALUE_I8: - e.mov(e.byte[e.r14 + offset], instr->arg1()->i8()); - break; - case VALUE_I16: - e.mov(e.word[e.r14 + offset], instr->arg1()->i16()); - break; - case VALUE_I32: - case VALUE_F32: - e.mov(e.dword[e.r14 + offset], instr->arg1()->i32()); - break; - case VALUE_I64: - case VALUE_F64: - e.mov(e.qword[e.r14 + offset], instr->arg1()->i64()); - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } else { - if (IsVectorType(instr->arg1()->type())) { - const Xbyak::Xmm src = e.GetXmmRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_V128: - e.vmovups(e.ptr[e.r14 + offset], src); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else if (IsFloatType(instr->arg1()->type())) { - const Xbyak::Xmm src = e.GetXmmRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_F32: - e.vmovss(e.dword[e.r14 + offset], src); - break; - case VALUE_F64: - e.vmovsd(e.qword[e.r14 + offset], src); - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } else { - const Xbyak::Reg src = e.GetRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_I8: - e.mov(e.byte[e.r14 + offset], src); - break; - case VALUE_I16: - e.mov(e.word[e.r14 + offset], src); - break; - case VALUE_I32: - e.mov(e.dword[e.r14 + offset], src); - break; - case VALUE_I64: - e.mov(e.qword[e.r14 + offset], src); - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } - } -} - -EMITTER(LOAD_LOCAL) { - int offset = STACK_OFFSET_LOCALS + instr->arg0()->i32(); - - if (IsVectorType(instr->type())) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - - switch (instr->type()) { - case VALUE_V128: - e.movups(result, e.ptr[e.rsp + offset]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else if (IsFloatType(instr->type())) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - - switch (instr->type()) { - case VALUE_F32: - e.vmovss(result, e.dword[e.rsp + offset]); - break; - case VALUE_F64: - e.vmovsd(result, e.qword[e.rsp + offset]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else { - const Xbyak::Reg result = e.GetRegister(instr); - - switch (instr->type()) { - case VALUE_I8: - e.mov(result, e.byte[e.rsp + offset]); - break; - case VALUE_I16: - e.mov(result, e.word[e.rsp + offset]); - break; - case VALUE_I32: - e.mov(result, e.dword[e.rsp + offset]); - break; - case VALUE_I64: - e.mov(result, e.qword[e.rsp + offset]); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } -} - -EMITTER(STORE_LOCAL) { - int offset = STACK_OFFSET_LOCALS + instr->arg0()->i32(); - - CHECK(!instr->arg1()->constant()); - - if (IsVectorType(instr->arg1()->type())) { - const Xbyak::Xmm src = e.GetXmmRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_V128: - e.vmovups(e.ptr[e.rsp + offset], src); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } - } else if (IsFloatType(instr->arg1()->type())) { - const Xbyak::Xmm src = e.GetXmmRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_F32: - e.vmovss(e.dword[e.rsp + offset], src); - break; - case VALUE_F64: - e.vmovsd(e.qword[e.rsp + offset], src); - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } else { - const Xbyak::Reg src = e.GetRegister(instr->arg1()); - - switch (instr->arg1()->type()) { - case VALUE_I8: - e.mov(e.byte[e.rsp + offset], src); - break; - case VALUE_I16: - e.mov(e.word[e.rsp + offset], src); - break; - case VALUE_I32: - e.mov(e.dword[e.rsp + offset], src); - break; - case VALUE_I64: - e.mov(e.qword[e.rsp + offset], src); - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } -} - -EMITTER(FTOI) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - switch (instr->type()) { - case VALUE_I32: - CHECK_EQ(instr->arg0()->type(), VALUE_F32); - e.cvttss2si(result, a); - break; - case VALUE_I64: - CHECK_EQ(instr->arg0()->type(), VALUE_F64); - e.cvttsd2si(result, a); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } -} - -EMITTER(ITOF) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - switch (instr->type()) { - case VALUE_F32: - CHECK_EQ(instr->arg0()->type(), VALUE_I32); - e.cvtsi2ss(result, a); - break; - case VALUE_F64: - CHECK_EQ(instr->arg0()->type(), VALUE_I64); - e.cvtsi2sd(result, a); - break; - default: - LOG_FATAL("Unexpected result type"); - break; - } -} - -EMITTER(SEXT) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (a == result) { - // already the correct width - return; - } - - if (result.isBit(64) && a.isBit(32)) { - e.movsxd(result.cvt64(), a); - } else { - e.movsx(result, a); - } -} - -EMITTER(ZEXT) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (a == result) { - // already the correct width - return; - } - - if (result.isBit(64) && a.isBit(32)) { - // mov will automatically zero fill the upper 32-bits - e.mov(result.cvt32(), a); - } else { - e.movzx(result, a); - } -} - -EMITTER(TRUNC) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result.getIdx() == a.getIdx()) { - // noop if already the same register. note, this means the high order bits - // of the result won't be cleared, but I believe that is fine - return; - } - - Xbyak::Reg truncated = a; - switch (instr->type()) { - case VALUE_I8: - truncated = a.cvt8(); - break; - case VALUE_I16: - truncated = a.cvt16(); - break; - case VALUE_I32: - truncated = a.cvt32(); - break; - default: - LOG_FATAL("Unexpected value type"); - } - - if (truncated.isBit(32)) { - // mov will automatically zero fill the upper 32-bits - e.mov(result, truncated); - } else { - e.movzx(result.cvt32(), truncated); - } -} - -EMITTER(FEXT) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - e.cvtss2sd(result, a); -} - -EMITTER(FTRUNC) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - e.cvtsd2ss(result, a); -} - -EMITTER(SELECT) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - const Xbyak::Reg cond = e.GetRegister(instr->arg2()); - - // convert result to Reg32e to please xbyak - CHECK_GE(result.getBit(), 32); - Xbyak::Reg32e result_32e(result.getIdx(), result.getBit()); - - e.test(cond, cond); - if (result_32e != a) { - e.cmovnz(result_32e, a); - } - e.cmovz(result_32e, b); -} - -EMITTER(CMP) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.cmp(a, static_cast(instr->arg1()->GetZExtValue())); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.cmp(a, b); - } - - CmpType cmp = static_cast(instr->arg2()->i32()); - - switch (cmp) { - case CMP_EQ: - e.sete(result); - break; - - case CMP_NE: - e.setne(result); - break; - - case CMP_SGE: - e.setge(result); - break; - - case CMP_SGT: - e.setg(result); - break; - - case CMP_UGE: - e.setae(result); - break; - - case CMP_UGT: - e.seta(result); - break; - - case CMP_SLE: - e.setle(result); - break; - - case CMP_SLT: - e.setl(result); - break; - - case CMP_ULE: - e.setbe(result); - break; - - case CMP_ULT: - e.setb(result); - break; - - default: - LOG_FATAL("Unexpected comparison type"); - } -} - -EMITTER(FCMP) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - if (instr->arg0()->type() == VALUE_F32) { - e.comiss(a, b); - } else { - e.comisd(a, b); - } - - CmpType cmp = static_cast(instr->arg2()->i32()); - - switch (cmp) { - case CMP_EQ: - e.sete(result); - break; - - case CMP_NE: - e.setne(result); - break; - - case CMP_SGE: - e.setae(result); - break; - - case CMP_SGT: - e.seta(result); - break; - - case CMP_SLE: - e.setbe(result); - break; - - case CMP_SLT: - e.setb(result); - break; - - default: - LOG_FATAL("Unexpected comparison type"); - } -} - -EMITTER(ADD) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.add(result, (uint32_t)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.add(result, b); - } -} - -EMITTER(SUB) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.sub(result, (uint32_t)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.sub(result, b); - } -} - -EMITTER(SMUL) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - - if (result != a) { - e.mov(result, a); - } - - e.imul(result, b); -} - -EMITTER(UMUL) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - - if (result != a) { - e.mov(result, a); - } - - e.imul(result, b); -} - -EMITTER(DIV) { - LOG_FATAL("Unsupported"); -} - -EMITTER(NEG) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - e.neg(result); -} - -EMITTER(ABS) { - LOG_FATAL("Unsupported"); - // e.mov(e.rax, *result); - // e.neg(e.rax); - // e.cmovl(reinterpret_cast(result)->cvt32(), e.rax); -} - -EMITTER(FADD) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - if (instr->type() == VALUE_F32) { - e.vaddss(result, a, b); - } else { - e.vaddsd(result, a, b); - } -} - -EMITTER(FSUB) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - if (instr->type() == VALUE_F32) { - e.vsubss(result, a, b); - } else { - e.vsubsd(result, a, b); - } -} - -EMITTER(FMUL) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - if (instr->type() == VALUE_F32) { - e.vmulss(result, a, b); - } else { - e.vmulsd(result, a, b); - } -} - -EMITTER(FDIV) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - if (instr->type() == VALUE_F32) { - e.vdivss(result, a, b); - } else { - e.vdivsd(result, a, b); - } -} - -EMITTER(FNEG) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - if (instr->type() == VALUE_F32) { - e.vxorps(result, a, e.GetXmmConstant(XMM_CONST_SIGN_MASK_PS)); - } else { - e.vxorpd(result, a, e.GetXmmConstant(XMM_CONST_SIGN_MASK_PD)); - } -} - -EMITTER(FABS) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - if (instr->type() == VALUE_F32) { - e.vandps(result, a, e.GetXmmConstant(XMM_CONST_ABS_MASK_PS)); - } else { - e.vandpd(result, a, e.GetXmmConstant(XMM_CONST_ABS_MASK_PD)); - } -} - -EMITTER(SQRT) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - if (instr->type() == VALUE_F32) { - e.vsqrtss(result, a); - } else { - e.vsqrtsd(result, a); - } -} - -EMITTER(VBROADCAST) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - - e.vbroadcastss(result, a); -} - -EMITTER(VADD) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - e.vaddps(result, a, b); -} - -EMITTER(VDOT) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - e.vdpps(result, a, b, 0b11110001); -} - -EMITTER(VMUL) { - const Xbyak::Xmm result = e.GetXmmRegister(instr); - const Xbyak::Xmm a = e.GetXmmRegister(instr->arg0()); - const Xbyak::Xmm b = e.GetXmmRegister(instr->arg1()); - - e.vmulps(result, a, b); -} - -EMITTER(AND) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.and (result, (uint32_t)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.and (result, b); - } -} - -EMITTER(OR) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.or (result, (uint32_t)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.or (result, b); - } -} - -EMITTER(XOR) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.xor (result, (uint32_t)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.xor (result, b); - } -} - -EMITTER(NOT) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - e.not(result); -} - -EMITTER(SHL) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.shl(result, (int)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.mov(e.cl, b); - e.shl(result, e.cl); - } -} - -EMITTER(ASHR) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.sar(result, (int)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.mov(e.cl, b); - e.sar(result, e.cl); - } -} - -EMITTER(LSHR) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - if (result != a) { - e.mov(result, a); - } - - if (e.CanEncodeAsImmediate(instr->arg1())) { - e.shr(result, (int)instr->arg1()->GetZExtValue()); - } else { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); - e.mov(e.cl, b); - e.shr(result, e.cl); - } -} - -EMITTER(ASHD) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg v = e.GetRegister(instr->arg0()); - const Xbyak::Reg n = e.GetRegister(instr->arg1()); - - e.inLocalLabel(); - - if (result != v) { - e.mov(result, v); - } - - // check if we're shifting left or right - e.test(n, 0x80000000); - e.jnz(".shr"); - - // perform shift left - e.mov(e.cl, n); - e.sal(result, e.cl); - e.jmp(".end"); - - // perform right shift - e.L(".shr"); - e.test(n, 0x1f); - e.jz(".shr_overflow"); - e.mov(e.cl, n); - e.neg(e.cl); - e.sar(result, e.cl); - e.jmp(".end"); - - // right shift overflowed - e.L(".shr_overflow"); - e.sar(result, 31); - - // shift is done - e.L(".end"); - - e.outLocalLabel(); -} - -EMITTER(LSHD) { - const Xbyak::Reg result = e.GetRegister(instr); - const Xbyak::Reg v = e.GetRegister(instr->arg0()); - const Xbyak::Reg n = e.GetRegister(instr->arg1()); - - e.inLocalLabel(); - - if (result != v) { - e.mov(result, v); - } - - // check if we're shifting left or right - e.test(n, 0x80000000); - e.jnz(".shr"); - - // perform shift left - e.mov(e.cl, n); - e.shl(result, e.cl); - e.jmp(".end"); - - // perform right shift - e.L(".shr"); - e.test(n, 0x1f); - e.jz(".shr_overflow"); - e.mov(e.cl, n); - e.neg(e.cl); - e.shr(result, e.cl); - e.jmp(".end"); - - // right shift overflowed - e.L(".shr_overflow"); - e.mov(result, 0x0); - - // shift is done - e.L(".end"); - - e.outLocalLabel(); -} - -EMITTER(BRANCH) { - const Xbyak::Reg a = e.GetRegister(instr->arg0()); - - e.mov(e.rax, a); -} - -EMITTER(BRANCH_COND) { - const Xbyak::Reg cond = e.GetRegister(instr->arg0()); - const Xbyak::Reg true_addr = e.GetRegister(instr->arg1()); - const Xbyak::Reg false_addr = e.GetRegister(instr->arg2()); - - e.test(cond, cond); - e.cmovnz(e.eax, true_addr); - e.cmove(e.eax, false_addr); -} - -EMITTER(CALL_EXTERNAL) { - const Xbyak::Reg addr = e.GetRegister(instr->arg0()); - - e.mov(arg0, reinterpret_cast(e.memif().ctx_base)); - if (instr->arg1()) { - const Xbyak::Reg arg = e.GetRegister(instr->arg1()); - e.mov(arg1, arg); - } - e.mov(e.rax, addr); - e.call(e.rax); -} diff --git a/src/jit/backend/x64/x64_emitter.h b/src/jit/backend/x64/x64_emitter.h deleted file mode 100644 index 406b5016..00000000 --- a/src/jit/backend/x64/x64_emitter.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef X64_EMITTER_H -#define X64_EMITTER_H - -#include -#include "jit/backend/backend.h" - -namespace re { -namespace jit { -namespace backend { -namespace x64 { - -enum { -#if PLATFORM_WINDOWS - STACK_SHADOW_SPACE = 32, -#else - STACK_SHADOW_SPACE = 0, -#endif - STACK_OFFSET_LOCALS = STACK_SHADOW_SPACE, - STACK_SIZE = STACK_OFFSET_LOCALS -}; - -enum XmmConstant { - XMM_CONST_ABS_MASK_PS, - XMM_CONST_ABS_MASK_PD, - XMM_CONST_SIGN_MASK_PS, - XMM_CONST_SIGN_MASK_PD, - NUM_XMM_CONST, -}; - -class X64Emitter : public Xbyak::CodeGenerator { - public: - X64Emitter(const MemoryInterface &memif, void *buffer, size_t buffer_size); - ~X64Emitter(); - - const MemoryInterface &memif() { - return memif_; - } - - void Reset(); - - const uint8_t *Emit(ir::IRBuilder &builder, int *size); - - // helpers for the emitter callbacks - const Xbyak::Reg GetRegister(const ir::Value *v); - const Xbyak::Xmm GetXmmRegister(const ir::Value *v); - const Xbyak::Address GetXmmConstant(XmmConstant c); - - bool CanEncodeAsImmediate(const ir::Value *v) const; - - private: - void EmitConstants(); - void EmitProlog(ir::IRBuilder &builder, int *stack_size); - void EmitBody(ir::IRBuilder &builder); - void EmitEpilog(ir::IRBuilder &builder, int stack_size); - - MemoryInterface memif_; - int modified_marker_; - int *modified_; - int num_temps_; - Xbyak::Label xmm_const_[NUM_XMM_CONST]; -}; -} -} -} -} - -#endif diff --git a/src/jit/frontend/frontend.h b/src/jit/frontend/frontend.h index a159c921..4a3e1586 100644 --- a/src/jit/frontend/frontend.h +++ b/src/jit/frontend/frontend.h @@ -1,22 +1,20 @@ #ifndef FRONTEND_H #define FRONTEND_H -#include "jit/ir/ir_builder.h" +struct ir_s; +struct jit_frontend_s; -namespace re { -namespace jit { -namespace frontend { +typedef void (*jit_frontend_translate_code)(struct jit_frontend_s *frontend, + uint32_t guest_addr, + uint8_t *guest_ptr, int flags, + int *size, struct ir_s *ir); +typedef void (*jit_frontend_dump_code)(struct jit_frontend_s *frontend, + uint32_t guest_addr, uint8_t *guest_ptr, + int size); -class Frontend { - public: - virtual ~Frontend() {} - - virtual ir::IRBuilder &TranslateCode(uint32_t guest_addr, uint8_t *guest_ptr, - int flags, int *size) = 0; - virtual void DumpCode(uint32_t guest_addr, uint8_t *guest_ptr, int size) = 0; -}; -} -} -} +typedef struct jit_frontend_s { + jit_frontend_translate_code translate_code; + jit_frontend_dump_code dump_code; +} jit_frontend_t; #endif diff --git a/src/jit/frontend/sh4/sh4_analyzer.cc b/src/jit/frontend/sh4/sh4_analyze.c similarity index 50% rename from src/jit/frontend/sh4/sh4_analyzer.cc rename to src/jit/frontend/sh4/sh4_analyze.c index b8ba6745..a911e0d2 100644 --- a/src/jit/frontend/sh4/sh4_analyzer.cc +++ b/src/jit/frontend/sh4/sh4_analyze.c @@ -1,27 +1,22 @@ #include "core/assert.h" -#include "core/memory.h" -#include "jit/frontend/sh4/sh4_analyzer.h" -#include "jit/frontend/sh4/sh4_disassembler.h" -#include "jit/frontend/sh4/sh4_frontend.h" +#include "jit/frontend/sh4/sh4_analyze.h" +#include "jit/frontend/sh4/sh4_disasm.h" -using namespace re; -using namespace re::jit::frontend::sh4; - -void SH4Analyzer::AnalyzeBlock(uint32_t guest_addr, uint8_t *guest_ptr, - int flags, int *size) { +void sh4_analyze_block(uint32_t guest_addr, uint8_t *guest_ptr, int flags, + int *size) { *size = 0; while (true) { - Instr instr; + sh4_instr_t instr = {}; instr.addr = guest_addr; - instr.opcode = load(guest_ptr); + instr.opcode = *(uint16_t *)guest_ptr; // end block on invalid instruction - if (!SH4Disassembler::Disasm(&instr)) { + if (!sh4_disasm(&instr)) { break; } - int step = (instr.flags & OP_FLAG_DELAYED) ? 4 : 2; + int step = (instr.flags & SH4_FLAG_DELAYED) ? 4 : 2; guest_addr += step; guest_ptr += step; *size += step; @@ -30,7 +25,8 @@ void SH4Analyzer::AnalyzeBlock(uint32_t guest_addr, uint8_t *guest_ptr, // changed, stop emitting since the fpu state is invalidated. also, if // sr has changed, stop emitting as there are interrupts that possibly // need to be handled - if (instr.flags & (OP_FLAG_BRANCH | OP_FLAG_SET_FPSCR | OP_FLAG_SET_SR)) { + if (instr.flags & + (SH4_FLAG_BRANCH | SH4_FLAG_SET_FPSCR | SH4_FLAG_SET_SR)) { break; } diff --git a/src/jit/frontend/sh4/sh4_analyze.h b/src/jit/frontend/sh4/sh4_analyze.h new file mode 100644 index 00000000..2c3a5cb4 --- /dev/null +++ b/src/jit/frontend/sh4/sh4_analyze.h @@ -0,0 +1,25 @@ +#ifndef SH4_ANALYZER_H +#define SH4_ANALYZER_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + SH4_SLOWMEM = 0x1, + SH4_DOUBLE_PR = 0x2, + SH4_DOUBLE_SZ = 0x4, + SH4_SINGLE_INSTR = 0x8, +}; + +void sh4_analyze_block(uint32_t guest_addr, uint8_t *guest_ptr, int flags, + int *size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/jit/frontend/sh4/sh4_analyzer.h b/src/jit/frontend/sh4/sh4_analyzer.h deleted file mode 100644 index 9c7fc3cf..00000000 --- a/src/jit/frontend/sh4/sh4_analyzer.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef SH4_ANALYZER_H -#define SH4_ANALYZER_H - -#include -#include - -namespace re { -namespace jit { -namespace frontend { -namespace sh4 { - -class SH4Analyzer { - public: - static void AnalyzeBlock(uint32_t guest_addr, uint8_t *guest_ptr, int flags, - int *size); -}; -} -} -} -} - -#endif diff --git a/src/jit/frontend/sh4/sh4_builder.cc b/src/jit/frontend/sh4/sh4_builder.cc deleted file mode 100644 index 69605930..00000000 --- a/src/jit/frontend/sh4/sh4_builder.cc +++ /dev/null @@ -1,2200 +0,0 @@ -#include "core/assert.h" -#include "core/memory.h" -#include "core/profiler.h" -#include "jit/frontend/sh4/sh4_analyzer.h" -#include "jit/frontend/sh4/sh4_builder.h" -#include "jit/frontend/sh4/sh4_frontend.h" - -using namespace re; -using namespace re::jit; -using namespace re::jit::frontend; -using namespace re::jit::frontend::sh4; -using namespace re::jit::ir; - -static uint32_t s_fsca_table[0x20000] = { -#include "jit/frontend/sh4/sh4_fsca.inc" -}; - -typedef void (*EmitCallback)(SH4Builder &, const sh4::Instr &); - -#define EMITTER(name) void Emit_OP_##name(SH4Builder &b, const sh4::Instr &i) - -#define SH4_INSTR(name, desc, instr_code, cycles, flags) static EMITTER(name); -#include "jit/frontend/sh4/sh4_instr.inc" -#undef SH4_INSTR - -EmitCallback emit_callbacks[sh4::NUM_OPCODES] = { - nullptr, // OP_INVALID -#define SH4_INSTR(name, desc, instr_code, cycles, flags) &Emit_OP_##name, -#include "jit/frontend/sh4/sh4_instr.inc" -#undef SH4_INSTR -}; - -SH4Builder::SH4Builder(Arena &arena) : IRBuilder(arena) {} - -void SH4Builder::Emit(uint32_t guest_addr, uint8_t *guest_ptr, int size, - int flags) { - // PROFILER_RUNTIME("SH4Builder::Emit"); - - // save off flags for ease of access - flags_ = flags; - - int i = 0; - int guest_cycles = 0; - - while (i < size) { - Instr instr; - instr.addr = guest_addr + i; - instr.opcode = load(guest_ptr + i); - - if (!SH4Disassembler::Disasm(&instr)) { - InvalidInstruction(instr.addr); - break; - } - - i += 2; - guest_cycles += instr.cycles; - - if (instr.flags & OP_FLAG_DELAYED) { - delay_instr_.addr = guest_addr + i; - delay_instr_.opcode = load(guest_ptr + i); - - // instruction must be valid, breakpoints on delay instructions aren't - // currently supported - CHECK(SH4Disassembler::Disasm(&delay_instr_)); - - // delay instruction itself should never have a delay instr - CHECK(!(delay_instr_.flags & OP_FLAG_DELAYED)); - - i += 2; - guest_cycles += delay_instr_.cycles; - } - - (emit_callbacks[instr.op])(*this, instr); - } - - ir::Instr *tail_instr = instrs_.tail(); - - // if the block was terminated before a branch instruction, emit a - // fallthrough branch to the next pc - if (tail_instr->op() != OP_BRANCH && tail_instr->op() != OP_BRANCH_COND) { - Branch(AllocConstant(guest_addr + i)); - } - - // emit block epilog - current_instr_ = tail_instr->prev(); - - // update remaining cycles - Value *num_cycles = - LoadContext(offsetof(sh4_context_t, num_cycles), VALUE_I32); - num_cycles = Sub(num_cycles, AllocConstant(guest_cycles)); - StoreContext(offsetof(sh4_context_t, num_cycles), num_cycles); - - // update num instructions - Value *num_instrs = - LoadContext(offsetof(sh4_context_t, num_instrs), VALUE_I32); - num_instrs = Add(num_instrs, AllocConstant(size >> 1)); - StoreContext(offsetof(sh4_context_t, num_instrs), num_instrs); -} - -ir::Instr *SH4Builder::LoadGuest(Value *addr, ValueType type) { - if (flags_ & SH4_SLOWMEM) { - return LoadSlow(addr, type); - } - - return LoadFast(addr, type); -} - -void SH4Builder::StoreGuest(Value *addr, Value *v) { - if (flags_ & SH4_SLOWMEM) { - StoreSlow(addr, v); - return; - } - - StoreFast(addr, v); -} - -Value *SH4Builder::LoadGPR(int n, ValueType type) { - return LoadContext(offsetof(sh4_context_t, r[n]), type); -} - -void SH4Builder::StoreGPR(int n, Value *v) { - CHECK_EQ(v->type(), VALUE_I32); - return StoreContext(offsetof(sh4_context_t, r[n]), v); -} - -Value *SH4Builder::LoadFPR(int n, ValueType type) { - // swizzle 32-bit loads, see notes in sh4_context.h - if (SizeForType(type) == 4) { - n ^= 1; - } - return LoadContext(offsetof(sh4_context_t, fr[n]), type); -} - -void SH4Builder::StoreFPR(int n, Value *v) { - if (SizeForType(v->type()) == 4) { - n ^= 1; - } - return StoreContext(offsetof(sh4_context_t, fr[n]), v); -} - -Value *SH4Builder::LoadXFR(int n, ValueType type) { - if (SizeForType(type) == 4) { - n ^= 1; - } - return LoadContext(offsetof(sh4_context_t, xf[n]), type); -} - -void SH4Builder::StoreXFR(int n, Value *v) { - if (SizeForType(v->type()) == 4) { - n ^= 1; - } - return StoreContext(offsetof(sh4_context_t, xf[n]), v); -} - -Value *SH4Builder::LoadSR() { - return LoadContext(offsetof(sh4_context_t, sr), VALUE_I32); -} - -void SH4Builder::StoreSR(Value *v) { - CHECK_EQ(v->type(), VALUE_I32); - - Value *sr_updated = - LoadContext(offsetof(sh4_context_t, SRUpdated), VALUE_I64); - Value *old_sr = LoadSR(); - StoreContext(offsetof(sh4_context_t, sr), v); - CallExternal2(sr_updated, ZExt(old_sr, VALUE_I64)); -} - -ir::Value *SH4Builder::LoadT() { - return And(LoadSR(), AllocConstant(T)); -} - -void SH4Builder::StoreT(ir::Value *v) { - Value *sr = LoadSR(); - Value *sr_t = Or(sr, AllocConstant(T)); - Value *sr_not = And(sr, AllocConstant(~T)); - StoreSR(Select(v, sr_t, sr_not)); -} - -Value *SH4Builder::LoadGBR() { - return LoadContext(offsetof(sh4_context_t, gbr), VALUE_I32); -} - -void SH4Builder::StoreGBR(Value *v) { - StoreContext(offsetof(sh4_context_t, gbr), v); -} - -ir::Value *SH4Builder::LoadFPSCR() { - ir::Value *v = LoadContext(offsetof(sh4_context_t, fpscr), VALUE_I32); - v = And(v, AllocConstant(0x003fffff)); - return v; -} - -void SH4Builder::StoreFPSCR(ir::Value *v) { - CHECK_EQ(v->type(), VALUE_I32); - v = And(v, AllocConstant(0x003fffff)); - - Value *fpscr_updated = - LoadContext(offsetof(sh4_context_t, FPSCRUpdated), VALUE_I64); - Value *old_fpscr = LoadFPSCR(); - StoreContext(offsetof(sh4_context_t, fpscr), v); - CallExternal2(fpscr_updated, ZExt(old_fpscr, VALUE_I64)); -} - -ir::Value *SH4Builder::LoadPR() { - return LoadContext(offsetof(sh4_context_t, pr), VALUE_I32); -} - -void SH4Builder::StorePR(ir::Value *v) { - CHECK_EQ(v->type(), VALUE_I32); - StoreContext(offsetof(sh4_context_t, pr), v); -} - -void SH4Builder::InvalidInstruction(uint32_t guest_addr) { - Value *invalid_instruction = - LoadContext(offsetof(sh4_context_t, InvalidInstruction), VALUE_I64); - CallExternal2(invalid_instruction, - AllocConstant(static_cast(guest_addr))); -} - -void SH4Builder::EmitDelayInstr() { - (emit_callbacks[delay_instr_.op])(*this, delay_instr_); -} - -// MOV #imm,Rn -EMITTER(MOVI) { - Value *v = b.AllocConstant((uint32_t)(int32_t)(int8_t)i.imm); - b.StoreGPR(i.Rn, v); -} - -// MOV.W @(disp,PC),Rn -EMITTER(MOVWLPC) { - uint32_t addr = (i.disp * 2) + i.addr + 4; - Value *v = b.LoadGuest(b.AllocConstant(addr), VALUE_I16); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.L @(disp,PC),Rn -EMITTER(MOVLLPC) { - uint32_t addr = (i.disp * 4) + (i.addr & ~3) + 4; - Value *v = b.LoadGuest(b.AllocConstant(addr), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV Rm,Rn -EMITTER(MOV) { - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.B Rm,@Rn -EMITTER(MOVBS) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LoadGPR(i.Rm, VALUE_I8); - b.StoreGuest(addr, v); -} - -// MOV.W Rm,@Rn -EMITTER(MOVWS) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LoadGPR(i.Rm, VALUE_I16); - b.StoreGuest(addr, v); -} - -// MOV.L Rm,@Rn -EMITTER(MOVLS) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGuest(addr, v); -} - -// MOV.B @Rm,Rn -EMITTER(MOVBL) { - Value *v = b.LoadGuest(b.LoadGPR(i.Rm, VALUE_I32), VALUE_I8); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.W @Rm,Rn -EMITTER(MOVWL) { - Value *v = b.LoadGuest(b.LoadGPR(i.Rm, VALUE_I32), VALUE_I16); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.L @Rm,Rn -EMITTER(MOVLL) { - Value *v = b.LoadGuest(b.LoadGPR(i.Rm, VALUE_I32), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.B Rm,@-Rn -EMITTER(MOVBM) { - // decrease Rn by 1 - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Sub(addr, b.AllocConstant(1)); - b.StoreGPR(i.Rn, addr); - - // store Rm at (Rn) - Value *v = b.LoadGPR(i.Rm, VALUE_I8); - b.StoreGuest(addr, v); -} - -// MOV.W Rm,@-Rn -EMITTER(MOVWM) { - // decrease Rn by 2 - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Sub(addr, b.AllocConstant(2)); - b.StoreGPR(i.Rn, addr); - - // store Rm at (Rn) - Value *v = b.LoadGPR(i.Rm, VALUE_I16); - b.StoreGuest(addr, v); -} - -// MOV.L Rm,@-Rn -EMITTER(MOVLM) { - // decrease Rn by 4 - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Sub(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - - // store Rm at (Rn) - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGuest(addr, v); -} - -// MOV.B @Rm+,Rn -EMITTER(MOVBP) { - // store (Rm) at Rn - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I8); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(i.Rn, v); - - // increase Rm by 1 - // FIXME if rm != rn??? - addr = b.Add(addr, b.AllocConstant(1)); - b.StoreGPR(i.Rm, addr); -} - -// MOV.W @Rm+,Rn -EMITTER(MOVWP) { - // store (Rm) at Rn - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I16); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(i.Rn, v); - - // increase Rm by 2 - // FIXME if rm != rn??? - addr = b.Add(addr, b.AllocConstant(2)); - b.StoreGPR(i.Rm, addr); -} - -// MOV.L @Rm+,Rn -EMITTER(MOVLP) { - // store (Rm) at Rn - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreGPR(i.Rn, v); - - // increase Rm by 2 - // FIXME if rm != rn??? - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// MOV.B R0,@(disp,Rn) -EMITTER(MOVBS0D) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp)); - Value *v = b.LoadGPR(0, VALUE_I8); - b.StoreGuest(addr, v); -} - -// MOV.W R0,@(disp,Rn) -EMITTER(MOVWS0D) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 2)); - Value *v = b.LoadGPR(0, VALUE_I16); - b.StoreGuest(addr, v); -} - -// MOV.L Rm,@(disp,Rn) -EMITTER(MOVLSMD) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 4)); - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGuest(addr, v); -} - -// MOV.B @(disp,Rm),R0 -EMITTER(MOVBLD0) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp)); - Value *v = b.LoadGuest(addr, VALUE_I8); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(0, v); -} - -// MOV.W @(disp,Rm),R0 -EMITTER(MOVWLD0) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 2)); - Value *v = b.LoadGuest(addr, VALUE_I16); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(0, v); -} - -// MOV.L @(disp,Rm),Rn -EMITTER(MOVLLDN) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 4)); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.B Rm,@(R0,Rn) -EMITTER(MOVBS0) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGPR(i.Rn, VALUE_I32)); - Value *v = b.LoadGPR(i.Rm, VALUE_I8); - b.StoreGuest(addr, v); -} - -// MOV.W Rm,@(R0,Rn) -EMITTER(MOVWS0) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGPR(i.Rn, VALUE_I32)); - Value *v = b.LoadGPR(i.Rm, VALUE_I16); - b.StoreGuest(addr, v); -} - -// MOV.L Rm,@(R0,Rn) -EMITTER(MOVLS0) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGPR(i.Rn, VALUE_I32)); - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGuest(addr, v); -} - -// MOV.B @(R0,Rm),Rn -EMITTER(MOVBL0) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGPR(i.Rm, VALUE_I32)); - Value *v = b.SExt(b.LoadGuest(addr, VALUE_I8), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.W @(R0,Rm),Rn -EMITTER(MOVWL0) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGPR(i.Rm, VALUE_I32)); - Value *v = b.LoadGuest(addr, VALUE_I16); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.L @(R0,Rm),Rn -EMITTER(MOVLL0) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGPR(i.Rm, VALUE_I32)); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MOV.B R0,@(disp,GBR) -EMITTER(MOVBS0G) { - Value *addr = b.LoadGBR(); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp)); - Value *v = b.LoadGPR(0, VALUE_I8); - b.StoreGuest(addr, v); -} - -// MOV.W R0,@(disp,GBR) -EMITTER(MOVWS0G) { - Value *addr = b.LoadGBR(); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 2)); - Value *v = b.LoadGPR(0, VALUE_I16); - b.StoreGuest(addr, v); -} - -// MOV.L R0,@(disp,GBR) -EMITTER(MOVLS0G) { - Value *addr = b.LoadGBR(); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 4)); - Value *v = b.LoadGPR(0, VALUE_I32); - b.StoreGuest(addr, v); -} - -// MOV.B @(disp,GBR),R0 -EMITTER(MOVBLG0) { - Value *addr = b.LoadGBR(); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp)); - Value *v = b.LoadGuest(addr, VALUE_I8); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(0, v); -} - -// MOV.W @(disp,GBR),R0 -EMITTER(MOVWLG0) { - Value *addr = b.LoadGBR(); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 2)); - Value *v = b.LoadGuest(addr, VALUE_I16); - v = b.SExt(v, VALUE_I32); - b.StoreGPR(0, v); -} - -// MOV.L @(disp,GBR),R0 -EMITTER(MOVLLG0) { - Value *addr = b.LoadGBR(); - addr = b.Add(addr, b.AllocConstant((uint32_t)i.disp * 4)); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreGPR(0, v); -} - -// MOVA (disp,PC),R0 -EMITTER(MOVA) { - uint32_t addr = (i.disp * 4) + (i.addr & ~3) + 4; - b.StoreGPR(0, b.AllocConstant(addr)); -} - -// MOVT Rn -EMITTER(MOVT) { - b.StoreGPR(i.Rn, b.LoadT()); -} - -// SWAP.B Rm,Rn -EMITTER(SWAPB) { - const int nbits = 8; - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - Value *mask = b.AllocConstant((1u << nbits) - 1); - Value *tmp = b.And(b.Xor(v, b.LShr(v, nbits)), mask); - Value *res = b.Xor(v, b.Or(tmp, b.Shl(tmp, nbits))); - b.StoreGPR(i.Rn, res); -} - -// SWAP.W Rm,Rn -EMITTER(SWAPW) { - const int nbits = 16; - Value *v = b.LoadGPR(i.Rm, VALUE_I32); - Value *mask = b.AllocConstant((1u << nbits) - 1); - Value *tmp = b.And(b.Xor(v, b.LShr(v, nbits)), mask); - Value *res = b.Xor(v, b.Or(tmp, b.Shl(tmp, nbits))); - b.StoreGPR(i.Rn, res); -} - -// XTRCT Rm,Rn -EMITTER(XTRCT) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - rn = b.LShr(b.And(rn, b.AllocConstant(0xffff0000)), 16); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - rm = b.Shl(b.And(rm, b.AllocConstant(0xffff)), 16); - b.StoreGPR(i.Rn, b.Or(rn, rm)); -} - -// code cycles t-bit -// 0011 nnnn mmmm 1100 1 - -// ADD Rm,Rn -EMITTER(ADD) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Add(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// code cycles t-bit -// 0111 nnnn iiii iiii 1 - -// ADD #imm,Rn -EMITTER(ADDI) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *imm = b.AllocConstant((uint32_t)(int32_t)(int8_t)i.imm); - Value *v = b.Add(rn, imm); - b.StoreGPR(i.Rn, v); -} - -// code cycles t-bit -// 0011 nnnn mmmm 1110 1 carry -// ADDC Rm,Rn -EMITTER(ADDC) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Add(rn, rm); - v = b.Add(v, b.LoadT()); - b.StoreGPR(i.Rn, v); - - // compute carry flag, taken from Hacker's Delight - Value *and_rnrm = b.And(rn, rm); - Value *or_rnrm = b.Or(rn, rm); - Value *not_v = b.Not(v); - Value *carry = b.And(or_rnrm, not_v); - carry = b.Or(and_rnrm, carry); - b.StoreT(carry); -} - -// code cycles t-bit -// 0011 nnnn mmmm 1111 1 overflow -// ADDV Rm,Rn -EMITTER(ADDV) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Add(rn, rm); - b.StoreGPR(i.Rn, v); - - // compute overflow flag, taken from Hacker's Delight - Value *xor_vrn = b.Xor(v, rn); - Value *xor_vrm = b.Xor(v, rm); - Value *overflow = b.LShr(b.And(xor_vrn, xor_vrm), 31); - b.StoreT(overflow); -} - -// code cycles t-bit -// 1000 1000 iiii iiii 1 comparison result -// CMP/EQ #imm,R0 -EMITTER(CMPEQI) { - Value *imm = b.AllocConstant((uint32_t)(int32_t)(int8_t)i.imm); - Value *r0 = b.LoadGPR(0, VALUE_I32); - b.StoreT(b.CmpEQ(r0, imm)); -} - -// code cycles t-bit -// 0011 nnnn mmmm 0000 1 comparison result -// CMP/EQ Rm,Rn -EMITTER(CMPEQ) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreT(b.CmpEQ(rn, rm)); -} - -// code cycles t-bit -// 0011 nnnn mmmm 0010 1 comparison result -// CMP/HS Rm,Rn -EMITTER(CMPHS) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreT(b.CmpUGE(rn, rm)); -} - -// code cycles t-bit -// 0011 nnnn mmmm 0011 1 comparison result -// CMP/GE Rm,Rn -EMITTER(CMPGE) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreT(b.CmpSGE(rn, rm)); -} - -// code cycles t-bit -// 0011 nnnn mmmm 0110 1 comparison result -// CMP/HI Rm,Rn -EMITTER(CMPHI) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreT(b.CmpUGT(rn, rm)); -} - -// code cycles t-bit -// 0011 nnnn mmmm 0111 1 comparison result -// CMP/GT Rm,Rn -EMITTER(CMPGT) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreT(b.CmpSGT(rn, rm)); -} - -// code cycles t-bit -// 0100 nnnn 0001 0001 1 comparison result -// CMP/PZ Rn -EMITTER(CMPPZ) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - b.StoreT(b.CmpSGE(rn, b.AllocConstant(0))); -} - -// code cycles t-bit -// 0100 nnnn 0001 0101 1 comparison result -// CMP/PL Rn -EMITTER(CMPPL) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - b.StoreT(b.CmpSGT(rn, b.AllocConstant(0))); -} - -// code cycles t-bit -// 0010 nnnn mmmm 1100 1 comparison result -// CMP/STR Rm,Rn -EMITTER(CMPSTR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *diff = b.Xor(rn, rm); - - // if any diff is zero, the bytes match - Value *b4_eq = - b.CmpEQ(b.And(diff, b.AllocConstant(0xff000000)), b.AllocConstant(0)); - Value *b3_eq = - b.CmpEQ(b.And(diff, b.AllocConstant(0x00ff0000)), b.AllocConstant(0)); - Value *b2_eq = - b.CmpEQ(b.And(diff, b.AllocConstant(0x0000ff00)), b.AllocConstant(0)); - Value *b1_eq = - b.CmpEQ(b.And(diff, b.AllocConstant(0x000000ff)), b.AllocConstant(0)); - - b.StoreT(b.Or(b.Or(b.Or(b1_eq, b2_eq), b3_eq), b4_eq)); -} - -// code cycles t-bit -// 0010 nnnn mmmm 0111 1 calculation result -// DIV0S Rm,Rn -EMITTER(DIV0S) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *qm = b.Xor(rn, rm); - - // update Q == M flag - b.StoreContext(offsetof(sh4_context_t, sr_qm), b.Not(qm)); - - // msb of Q ^ M -> T - b.StoreT(b.LShr(qm, 31)); -} - -// code cycles t-bit -// 0000 0000 0001 1001 1 0 -// DIV0U -EMITTER(DIV0U) { // - b.StoreContext(offsetof(sh4_context_t, sr_qm), b.AllocConstant(0x80000000)); - - b.StoreSR(b.And(b.LoadSR(), b.AllocConstant(~T))); -} - -// code cycles t-bit -// 0011 nnnn mmmm 0100 1 calculation result -// DIV1 Rm,Rn -EMITTER(DIV1) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - - // if Q == M, r0 = ~Rm and C = 1; else, r0 = Rm and C = 0 - Value *qm = - b.AShr(b.LoadContext(offsetof(sh4_context_t, sr_qm), VALUE_I32), 31); - Value *r0 = b.Xor(rm, qm); - Value *carry = b.LShr(qm, 31); - - // initialize output bit as (Q == M) ^ Rn - qm = b.Xor(qm, rn); - - // shift Rn left by 1 and add T - rn = b.Shl(rn, 1); - rn = b.Or(rn, b.LoadT()); - - // add or subtract Rm based on r0 and C - Value *rd = b.Add(rn, r0); - rd = b.Add(rd, carry); - b.StoreGPR(i.Rn, rd); - - // if C is cleared, invert output bit - Value *and_rnr0 = b.And(rn, r0); - Value *or_rnr0 = b.Or(rn, r0); - Value *not_rd = b.Not(rd); - carry = b.And(or_rnr0, not_rd); - carry = b.Or(and_rnr0, carry); - carry = b.LShr(carry, 31); - qm = b.Select(carry, qm, b.Not(qm)); - b.StoreContext(offsetof(sh4_context_t, sr_qm), qm); - - // set T to output bit (which happens to be Q == M) - b.StoreT(b.LShr(qm, 31)); -} - -// DMULS.L Rm,Rn -EMITTER(DMULS) { - Value *rn = b.SExt(b.LoadGPR(i.Rn, VALUE_I32), VALUE_I64); - Value *rm = b.SExt(b.LoadGPR(i.Rm, VALUE_I32), VALUE_I64); - - Value *p = b.SMul(rm, rn); - Value *low = b.Trunc(p, VALUE_I32); - Value *high = b.Trunc(b.LShr(p, 32), VALUE_I32); - - b.StoreContext(offsetof(sh4_context_t, macl), low); - b.StoreContext(offsetof(sh4_context_t, mach), high); -} - -// DMULU.L Rm,Rn -EMITTER(DMULU) { - Value *rn = b.ZExt(b.LoadGPR(i.Rn, VALUE_I32), VALUE_I64); - Value *rm = b.ZExt(b.LoadGPR(i.Rm, VALUE_I32), VALUE_I64); - - Value *p = b.UMul(rm, rn); - Value *low = b.Trunc(p, VALUE_I32); - Value *high = b.Trunc(b.LShr(p, 32), VALUE_I32); - - b.StoreContext(offsetof(sh4_context_t, macl), low); - b.StoreContext(offsetof(sh4_context_t, mach), high); -} - -// DT Rn -EMITTER(DT) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.Sub(rn, b.AllocConstant(1)); - b.StoreGPR(i.Rn, v); - b.StoreT(b.CmpEQ(v, b.AllocConstant(0))); -} - -// EXTS.B Rm,Rn -EMITTER(EXTSB) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I8); - Value *v = b.SExt(rm, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// EXTS.W Rm,Rn -EMITTER(EXTSW) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I16); - Value *v = b.SExt(rm, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// EXTU.B Rm,Rn -EMITTER(EXTUB) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I8); - Value *v = b.ZExt(rm, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// EXTU.W Rm,Rn -EMITTER(EXTUW) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I16); - Value *v = b.ZExt(rm, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// MAC.L @Rm+,@Rn+ -EMITTER(MACL) { - LOG_FATAL("MACL not implemented"); -} - -// MAC.W @Rm+,@Rn+ -EMITTER(MACW) { - LOG_FATAL("MACW not implemented"); -} - -// MUL.L Rm,Rn -EMITTER(MULL) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.SMul(rn, rm); - b.StoreContext(offsetof(sh4_context_t, macl), v); -} - -// MULS Rm,Rn -EMITTER(MULS) { - Value *rn = b.SExt(b.LoadGPR(i.Rn, VALUE_I16), VALUE_I32); - Value *rm = b.SExt(b.LoadGPR(i.Rm, VALUE_I16), VALUE_I32); - Value *v = b.SMul(rn, rm); - b.StoreContext(offsetof(sh4_context_t, macl), v); -} - -// MULU Rm,Rn -EMITTER(MULU) { - Value *rn = b.ZExt(b.LoadGPR(i.Rn, VALUE_I16), VALUE_I32); - Value *rm = b.ZExt(b.LoadGPR(i.Rm, VALUE_I16), VALUE_I32); - Value *v = b.UMul(rn, rm); - b.StoreContext(offsetof(sh4_context_t, macl), v); -} - -// NEG Rm,Rn -EMITTER(NEG) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Neg(rm); - b.StoreGPR(i.Rn, v); -} - -// NEGC Rm,Rn -EMITTER(NEGC) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *t = b.LoadT(); - Value *v = b.Sub(b.Neg(rm), t); - b.StoreGPR(i.Rn, v); - Value *carry = b.Or(t, rm); - b.StoreT(carry); -} - -// SUB Rm,Rn -EMITTER(SUB) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Sub(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// SUBC Rm,Rn -EMITTER(SUBC) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Sub(rn, rm); - v = b.Sub(v, b.LoadT()); - b.StoreGPR(i.Rn, v); - - // compute carry flag, taken from Hacker's Delight - Value *l = b.And(b.Not(rn), rm); - Value *r = b.And(b.Or(b.Not(rn), rm), v); - Value *carry = b.Or(l, r); - b.StoreT(carry); -} - -// SUBV Rm,Rn -EMITTER(SUBV) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Sub(rn, rm); - b.StoreGPR(i.Rn, v); - - // compute overflow flag, taken from Hacker's Delight - Value *xor_rnrm = b.Xor(rn, rm); - Value *xor_vrn = b.Xor(v, rn); - Value *overflow = b.LShr(b.And(xor_rnrm, xor_vrn), 31); - b.StoreT(overflow); -} - -// code cycles t-bit -// 0010 nnnn mmmm 1001 1 - -// AND Rm,Rn -EMITTER(AND) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.And(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// code cycles t-bit -// 1100 1001 iiii iiii 1 - -// AND #imm,R0 -EMITTER(ANDI) { - Value *r0 = b.LoadGPR(0, VALUE_I32); - Value *imm = b.AllocConstant((uint32_t)i.imm); - Value *v = b.And(r0, imm); - b.StoreGPR(0, v); -} - -// code cycles t-bit -// 1100 1101 iiii iiii 1 - -// AND.B #imm,@(R0,GBR) -EMITTER(ANDB) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGBR()); - Value *v = b.LoadGuest(addr, VALUE_I8); - v = b.And(v, b.AllocConstant((uint8_t)i.imm)); - b.StoreGuest(addr, v); -} - -// NOT Rm,Rn -EMITTER(NOT) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Not(rm); - b.StoreGPR(i.Rn, v); -} - -// OR Rm,Rn -EMITTER(OR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Or(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// OR #imm,R0 -EMITTER(ORI) { - Value *r0 = b.LoadGPR(0, VALUE_I32); - Value *imm = b.AllocConstant((uint32_t)i.imm); - Value *v = b.Or(r0, imm); - b.StoreGPR(0, v); -} - -// OR.B #imm,@(R0,GBR) -EMITTER(ORB) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGBR()); - Value *v = b.LoadGuest(addr, VALUE_I8); - v = b.Or(v, b.AllocConstant((uint8_t)i.imm)); - b.StoreGuest(addr, v); -} - -// TAS.B @Rn -EMITTER(TAS) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I8); - b.StoreGuest(addr, b.Or(v, b.AllocConstant((uint8_t)0x80))); - b.StoreT(b.CmpEQ(v, b.AllocConstant((uint8_t)0))); -} - -// TST Rm,Rn -EMITTER(TST) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.And(rn, rm); - b.StoreT(b.CmpEQ(v, b.AllocConstant(0))); -} - -// TST #imm,R0 -EMITTER(TSTI) { - Value *r0 = b.LoadGPR(0, VALUE_I32); - Value *imm = b.AllocConstant((uint32_t)i.imm); - Value *v = b.And(r0, imm); - b.StoreT(b.CmpEQ(v, b.AllocConstant((uint32_t)0))); -} - -// TST.B #imm,@(R0,GBR) -EMITTER(TSTB) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGBR()); - Value *data = b.LoadGuest(addr, VALUE_I8); - Value *imm = b.AllocConstant((uint8_t)i.imm); - Value *v = b.And(data, imm); - b.StoreT(b.CmpEQ(v, b.AllocConstant((uint8_t)0))); -} - -// XOR Rm,Rn -EMITTER(XOR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.Xor(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// XOR #imm,R0 -EMITTER(XORI) { - Value *r0 = b.LoadGPR(0, VALUE_I32); - Value *imm = b.AllocConstant((uint32_t)i.imm); - Value *v = b.Xor(r0, imm); - b.StoreGPR(0, v); -} - -// XOR.B #imm,@(R0,GBR) -EMITTER(XORB) { - Value *addr = b.LoadGPR(0, VALUE_I32); - addr = b.Add(addr, b.LoadGBR()); - Value *data = b.LoadGuest(addr, VALUE_I8); - Value *imm = b.AllocConstant((uint8_t)i.imm); - Value *v = b.Xor(data, imm); - b.StoreGuest(addr, v); -} - -// ROTL Rn -EMITTER(ROTL) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_msb = b.And(b.LShr(rn, 31), b.AllocConstant(0x1)); - Value *v = b.Or(b.Shl(rn, 1), rn_msb); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_msb); -} - -// ROTR Rn -EMITTER(ROTR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_lsb = b.And(rn, b.AllocConstant(0x1)); - Value *v = b.Shl(rn_lsb, 31); - v = b.Or(v, b.LShr(rn, 1)); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_lsb); -} - -// ROTCL Rn -EMITTER(ROTCL) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_msb = b.And(b.LShr(rn, 31), b.AllocConstant(0x1)); - Value *v = b.Shl(rn, 1); - v = b.Or(v, b.LoadT()); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_msb); -} - -// ROTCR Rn -EMITTER(ROTCR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_lsb = b.And(rn, b.AllocConstant(0x1)); - Value *v = b.Shl(b.LoadT(), 31); - v = b.Or(v, b.LShr(rn, 1)); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_lsb); -} - -// SHAD Rm,Rn -EMITTER(SHAD) { - // when Rm >= 0, Rn << Rm - // when Rm < 0, Rn >> Rm - // when shifting right > 32, Rn = (Rn >= 0 ? 0 : -1) - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.AShd(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// SHAL Rn (same as SHLL) -EMITTER(SHAL) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_msb = b.And(b.LShr(rn, 31), b.AllocConstant(0x1)); - Value *v = b.Shl(rn, 1); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_msb); -} - -// SHAR Rn -EMITTER(SHAR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_lsb = b.And(rn, b.AllocConstant(0x1)); - Value *v = b.AShr(rn, 1); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_lsb); -} - -// SHLD Rm,Rn -EMITTER(SHLD) { - // when Rm >= 0, Rn << Rm - // when Rm < 0, Rn >> Rm - // when shifting right >= 32, Rn = 0 - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LShd(rn, rm); - b.StoreGPR(i.Rn, v); -} - -// SHLL Rn (same as SHAL) -EMITTER(SHLL) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_msb = b.And(b.LShr(rn, 31), b.AllocConstant(0x1)); - Value *v = b.Shl(rn, 1); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_msb); -} - -// SHLR Rn -EMITTER(SHLR) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *rn_lsb = b.And(rn, b.AllocConstant(0x1)); - Value *v = b.LShr(rn, 1); - b.StoreGPR(i.Rn, v); - b.StoreT(rn_lsb); -} - -// SHLL2 Rn -EMITTER(SHLL2) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.Shl(rn, 2); - b.StoreGPR(i.Rn, v); -} - -// SHLR2 Rn -EMITTER(SHLR2) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LShr(rn, 2); - b.StoreGPR(i.Rn, v); -} - -// SHLL8 Rn -EMITTER(SHLL8) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.Shl(rn, 8); - b.StoreGPR(i.Rn, v); -} - -// SHLR8 Rn -EMITTER(SHLR8) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LShr(rn, 8); - b.StoreGPR(i.Rn, v); -} - -// SHLL16 Rn -EMITTER(SHLL16) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.Shl(rn, 16); - b.StoreGPR(i.Rn, v); -} - -// SHLR16 Rn -EMITTER(SHLR16) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - Value *v = b.LShr(rn, 16); - b.StoreGPR(i.Rn, v); -} - -// code cycles t-bit -// 1000 1011 dddd dddd 3/1 - -// BF disp -EMITTER(BF) { - uint32_t dest_addr = ((int8_t)i.disp * 2) + i.addr + 4; - Value *cond = b.LoadT(); - b.BranchCond(cond, b.AllocConstant(i.addr + 2), b.AllocConstant(dest_addr)); -} - -// code cycles t-bit -// 1000 1111 dddd dddd 3/1 - -// BFS disp -EMITTER(BFS) { - Value *cond = b.LoadT(); - b.EmitDelayInstr(); - uint32_t dest_addr = ((int8_t)i.disp * 2) + i.addr + 4; - b.BranchCond(cond, b.AllocConstant(i.addr + 4), b.AllocConstant(dest_addr)); -} - -// code cycles t-bit -// 1000 1001 dddd dddd 3/1 - -// BT disp -EMITTER(BT) { - uint32_t dest_addr = ((int8_t)i.disp * 2) + i.addr + 4; - Value *cond = b.LoadT(); - b.BranchCond(cond, b.AllocConstant(dest_addr), b.AllocConstant(i.addr + 2)); -} - -// code cycles t-bit -// 1000 1101 dddd dddd 2/1 - -// BTS disp -EMITTER(BTS) { - Value *cond = b.LoadT(); - b.EmitDelayInstr(); - uint32_t dest_addr = ((int8_t)i.disp * 2) + i.addr + 4; - b.BranchCond(cond, b.AllocConstant(dest_addr), b.AllocConstant(i.addr + 4)); -} - -// code cycles t-bit -// 1010 dddd dddd dddd 2 - -// BRA disp -EMITTER(BRA) { - b.EmitDelayInstr(); - int32_t disp = ((i.disp & 0xfff) << 20) >> - 20; // 12-bit displacement must be sign extended - uint32_t dest_addr = (disp * 2) + i.addr + 4; - b.Branch(b.AllocConstant(dest_addr)); -} - -// code cycles t-bit -// 0000 mmmm 0010 0011 2 - -// BRAF Rn -EMITTER(BRAF) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - b.EmitDelayInstr(); - Value *dest_addr = b.Add(b.AllocConstant(i.addr + 4), rn); - b.Branch(dest_addr); -} - -// code cycles t-bit -// 1011 dddd dddd dddd 2 - -// BSR disp -EMITTER(BSR) { - b.EmitDelayInstr(); - int32_t disp = ((i.disp & 0xfff) << 20) >> - 20; // 12-bit displacement must be sign extended - uint32_t ret_addr = i.addr + 4; - uint32_t dest_addr = ret_addr + disp * 2; - b.StorePR(b.AllocConstant(ret_addr)); - b.Branch(b.AllocConstant(dest_addr)); -} - -// code cycles t-bit -// 0000 mmmm 0000 0011 2 - -// BSRF Rn -EMITTER(BSRF) { - Value *rn = b.LoadGPR(i.Rn, VALUE_I32); - b.EmitDelayInstr(); - Value *ret_addr = b.AllocConstant(i.addr + 4); - Value *dest_addr = b.Add(rn, ret_addr); - b.StorePR(ret_addr); - b.Branch(dest_addr); -} - -// JMP @Rm -EMITTER(JMP) { - Value *dest_addr = b.LoadGPR(i.Rn, VALUE_I32); - b.EmitDelayInstr(); - b.Branch(dest_addr); -} - -// JSR @Rn -EMITTER(JSR) { - Value *dest_addr = b.LoadGPR(i.Rn, VALUE_I32); - b.EmitDelayInstr(); - Value *ret_addr = b.AllocConstant(i.addr + 4); - b.StorePR(ret_addr); - b.Branch(dest_addr); -} - -// RTS -EMITTER(RTS) { - Value *dest_addr = b.LoadPR(); - b.EmitDelayInstr(); - b.Branch(dest_addr); -} - -// code cycles t-bit -// 0000 0000 0010 1000 1 - -// CLRMAC -EMITTER(CLRMAC) { - b.StoreContext(offsetof(sh4_context_t, mach), b.AllocConstant(0)); - b.StoreContext(offsetof(sh4_context_t, macl), b.AllocConstant(0)); -} - -EMITTER(CLRS) { - Value *sr = b.LoadSR(); - sr = b.And(sr, b.AllocConstant(~S)); - b.StoreSR(sr); -} - -// code cycles t-bit -// 0000 0000 0000 1000 1 - -// CLRT -EMITTER(CLRT) { - b.StoreT(b.AllocConstant(0)); -} - -// LDC Rm,SR -EMITTER(LDCSR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreSR(rm); -} - -// LDC Rm,GBR -EMITTER(LDCGBR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGBR(rm); -} - -// LDC Rm,VBR -EMITTER(LDCVBR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, vbr), rm); -} - -// LDC Rm,SSR -EMITTER(LDCSSR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, ssr), rm); -} - -// LDC Rm,SPC -EMITTER(LDCSPC) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, spc), rm); -} - -// LDC Rm,DBR -EMITTER(LDCDBR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, dbr), rm); -} - -// LDC.L Rm,Rn_BANK -EMITTER(LDCRBANK) { - int reg = i.Rn & 0x7; - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, ralt) + reg * 4, rm); -} - -// LDC.L @Rm+,SR -EMITTER(LDCMSR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreSR(v); - // reload Rm, sr store could have swapped banks - addr = b.LoadGPR(i.Rm, VALUE_I32); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDC.L @Rm+,GBR -EMITTER(LDCMGBR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreGBR(v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDC.L @Rm+,VBR -EMITTER(LDCMVBR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, vbr), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDC.L @Rm+,SSR -EMITTER(LDCMSSR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, ssr), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDC.L @Rm+,SPC -EMITTER(LDCMSPC) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, spc), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDC.L @Rm+,DBR -EMITTER(LDCMDBR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, dbr), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDC.L @Rm+,Rn_BANK -EMITTER(LDCMRBANK) { - int reg = i.Rn & 0x7; - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreGPR(i.Rm, b.Add(addr, b.AllocConstant(4))); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, ralt) + reg * 4, v); -} - -// LDS Rm,MACH -EMITTER(LDSMACH) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, mach), rm); -} - -// LDS Rm,MACL -EMITTER(LDSMACL) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, macl), rm); -} - -// LDS Rm,PR -EMITTER(LDSPR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StorePR(rm); -} - -// LDS.L @Rm+,MACH -EMITTER(LDSMMACH) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, mach), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDS.L @Rm+,MACL -EMITTER(LDSMMACL) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, macl), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDS.L @Rm+,PR -EMITTER(LDSMPR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StorePR(v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// MOVCA.L R0,@Rn -EMITTER(MOVCAL) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - Value *r0 = b.LoadGPR(0, VALUE_I32); - b.StoreGuest(addr, r0); -} - -// NOP -EMITTER(NOP) {} - -// OCBI -EMITTER(OCBI) {} - -// OCBP -EMITTER(OCBP) {} - -// OCBWB -EMITTER(OCBWB) {} - -// PREF @Rn -EMITTER(PREF) { - Value *prefetch = b.LoadContext(offsetof(sh4_context_t, Prefetch), VALUE_I64); - Value *addr = b.ZExt(b.LoadGPR(i.Rn, VALUE_I32), VALUE_I64); - b.CallExternal2(prefetch, addr); -} - -// RTE -EMITTER(RTE) { - Value *spc = b.LoadContext(offsetof(sh4_context_t, spc), VALUE_I32); - Value *ssr = b.LoadContext(offsetof(sh4_context_t, ssr), VALUE_I32); - b.StoreSR(ssr); - b.EmitDelayInstr(); - b.Branch(spc); -} - -// SETS -EMITTER(SETS) { - b.StoreSR(b.Or(b.LoadSR(), b.AllocConstant(S))); -} - -// SETT -EMITTER(SETT) { - b.StoreT(b.AllocConstant(1)); -} - -// SLEEP -EMITTER(SLEEP) { - LOG_FATAL("SLEEP not implemented"); -} - -// STC SR,Rn -EMITTER(STCSR) { - Value *v = b.LoadSR(); - b.StoreGPR(i.Rn, v); -} - -// STC GBR,Rn -EMITTER(STCGBR) { - Value *v = b.LoadGBR(); - b.StoreGPR(i.Rn, v); -} - -// STC VBR,Rn -EMITTER(STCVBR) { - Value *v = b.LoadContext(offsetof(sh4_context_t, vbr), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STC SSR,Rn -EMITTER(STCSSR) { - Value *v = b.LoadContext(offsetof(sh4_context_t, ssr), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STC SPC,Rn -EMITTER(STCSPC) { - Value *v = b.LoadContext(offsetof(sh4_context_t, spc), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STC SGR,Rn -EMITTER(STCSGR) { - Value *v = b.LoadContext(offsetof(sh4_context_t, sgr), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STC DBR,Rn -EMITTER(STCDBR) { - Value *v = b.LoadContext(offsetof(sh4_context_t, dbr), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STC Rm_BANK,Rn -EMITTER(STCRBANK) { - int reg = i.Rm & 0x7; - Value *v = b.LoadContext(offsetof(sh4_context_t, ralt) + reg * 4, VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STC.L SR,@-Rn -EMITTER(STCMSR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadSR(); - b.StoreGuest(addr, v); -} - -// STC.L GBR,@-Rn -EMITTER(STCMGBR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadGBR(); - b.StoreGuest(addr, v); -} - -// STC.L VBR,@-Rn -EMITTER(STCMVBR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadContext(offsetof(sh4_context_t, vbr), VALUE_I32); - b.StoreGuest(addr, v); -} - -// STC.L SSR,@-Rn -EMITTER(STCMSSR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadContext(offsetof(sh4_context_t, ssr), VALUE_I32); - b.StoreGuest(addr, v); -} - -// STC.L SPC,@-Rn -EMITTER(STCMSPC) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadContext(offsetof(sh4_context_t, spc), VALUE_I32); - b.StoreGuest(addr, v); -} - -// STC.L SGR,@-Rn -EMITTER(STCMSGR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadContext(offsetof(sh4_context_t, sgr), VALUE_I32); - b.StoreGuest(addr, v); -} - -// STC.L DBR,@-Rn -EMITTER(STCMDBR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadContext(offsetof(sh4_context_t, dbr), VALUE_I32); - b.StoreGuest(addr, v); -} - -// STC.L Rm_BANK,@-Rn -EMITTER(STCMRBANK) { - int reg = i.Rm & 0x7; - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *v = b.LoadContext(offsetof(sh4_context_t, ralt) + reg * 4, VALUE_I32); - b.StoreGuest(addr, v); -} - -// STS MACH,Rn -EMITTER(STSMACH) { - Value *v = b.LoadContext(offsetof(sh4_context_t, mach), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STS MACL,Rn -EMITTER(STSMACL) { - Value *v = b.LoadContext(offsetof(sh4_context_t, macl), VALUE_I32); - b.StoreGPR(i.Rn, v); -} - -// STS PR,Rn -EMITTER(STSPR) { - Value *v = b.LoadPR(); - b.StoreGPR(i.Rn, v); -} - -// STS.L MACH,@-Rn -EMITTER(STSMMACH) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - - Value *mach = b.LoadContext(offsetof(sh4_context_t, mach), VALUE_I32); - b.StoreGuest(addr, mach); -} - -// STS.L MACL,@-Rn -EMITTER(STSMMACL) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - - Value *macl = b.LoadContext(offsetof(sh4_context_t, macl), VALUE_I32); - b.StoreGuest(addr, macl); -} - -// STS.L PR,@-Rn -EMITTER(STSMPR) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - - Value *pr = b.LoadPR(); - b.StoreGuest(addr, pr); -} - -// TRAPA #imm -EMITTER(TRAPA) { - LOG_FATAL("TRAPA not implemented"); -} - -// FLDI0 FRn 1111nnnn10001101 -EMITTER(FLDI0) { - b.StoreFPR(i.Rn, b.AllocConstant(0)); -} - -// FLDI1 FRn 1111nnnn10011101 -EMITTER(FLDI1) { - b.StoreFPR(i.Rn, b.AllocConstant(0x3F800000)); -} - -// FMOV FRm,FRn 1111nnnnmmmm1100 -// FMOV DRm,DRn 1111nnn0mmm01100 -// FMOV XDm,DRn 1111nnn0mmm11100 -// FMOV DRm,XDn 1111nnn1mmm01100 -// FMOV XDm,XDn 1111nnn1mmm11100 -EMITTER(FMOV) { - if (b.flags() & SH4_DOUBLE_SZ) { - if (i.Rm & 1) { - Value *rm = b.LoadXFR(i.Rm & 0xe, VALUE_I64); - if (i.Rn & 1) { - b.StoreXFR(i.Rn & 0xe, rm); - } else { - b.StoreFPR(i.Rn, rm); - } - } else { - Value *rm = b.LoadFPR(i.Rm, VALUE_I64); - if (i.Rn & 1) { - b.StoreXFR(i.Rn & 0xe, rm); - } else { - b.StoreFPR(i.Rn, rm); - } - } - } else { - b.StoreFPR(i.Rn, b.LoadFPR(i.Rm, VALUE_I32)); - } -} - -// FMOV.S @Rm,FRn 1111nnnnmmmm1000 -// FMOV @Rm,DRn 1111nnn0mmmm1000 -// FMOV @Rm,XDn 1111nnn1mmmm1000 -EMITTER(FMOV_LOAD) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - - if (b.flags() & SH4_DOUBLE_SZ) { - Value *v_low = b.LoadGuest(addr, VALUE_I32); - Value *v_high = b.LoadGuest(b.Add(addr, b.AllocConstant(4)), VALUE_I32); - if (i.Rn & 1) { - b.StoreXFR(i.Rn & 0xe, v_low); - b.StoreXFR(i.Rn, v_high); - } else { - b.StoreFPR(i.Rn, v_low); - b.StoreFPR(i.Rn | 0x1, v_high); - } - } else { - b.StoreFPR(i.Rn, b.LoadGuest(addr, VALUE_I32)); - } -} - -// FMOV.S @(R0,Rm),FRn 1111nnnnmmmm0110 -// FMOV @(R0,Rm),DRn 1111nnn0mmmm0110 -// FMOV @(R0,Rm),XDn 1111nnn1mmmm0110 -EMITTER(FMOV_INDEX_LOAD) { - Value *addr = b.Add(b.LoadGPR(0, VALUE_I32), b.LoadGPR(i.Rm, VALUE_I32)); - - if (b.flags() & SH4_DOUBLE_SZ) { - Value *v_low = b.LoadGuest(addr, VALUE_I32); - Value *v_high = b.LoadGuest(b.Add(addr, b.AllocConstant(4)), VALUE_I32); - if (i.Rn & 1) { - b.StoreXFR(i.Rn & 0xe, v_low); - b.StoreXFR(i.Rn, v_high); - } else { - b.StoreFPR(i.Rn, v_low); - b.StoreFPR(i.Rn | 0x1, v_high); - } - } else { - b.StoreFPR(i.Rn, b.LoadGuest(addr, VALUE_I32)); - } -} - -// FMOV.S FRm,@Rn 1111nnnnmmmm1010 -// FMOV DRm,@Rn 1111nnnnmmm01010 -// FMOV XDm,@Rn 1111nnnnmmm11010 -EMITTER(FMOV_STORE) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - - if (b.flags() & SH4_DOUBLE_SZ) { - Value *addr_low = addr; - Value *addr_high = b.Add(addr, b.AllocConstant(4)); - if (i.Rm & 1) { - b.StoreGuest(addr_low, b.LoadXFR(i.Rm & 0xe, VALUE_I32)); - b.StoreGuest(addr_high, b.LoadXFR(i.Rm, VALUE_I32)); - } else { - b.StoreGuest(addr_low, b.LoadFPR(i.Rm, VALUE_I32)); - b.StoreGuest(addr_high, b.LoadFPR(i.Rm | 0x1, VALUE_I32)); - } - } else { - b.StoreGuest(addr, b.LoadFPR(i.Rm, VALUE_I32)); - } -} - -// FMOV.S FRm,@(R0,Rn) 1111nnnnmmmm0111 -// FMOV DRm,@(R0,Rn) 1111nnnnmmm00111 -// FMOV XDm,@(R0,Rn) 1111nnnnmmm10111 -EMITTER(FMOV_INDEX_STORE) { - Value *addr = b.Add(b.LoadGPR(0, VALUE_I32), b.LoadGPR(i.Rn, VALUE_I32)); - - if (b.flags() & SH4_DOUBLE_SZ) { - Value *addr_low = addr; - Value *addr_high = b.Add(addr, b.AllocConstant(4)); - if (i.Rm & 1) { - b.StoreGuest(addr_low, b.LoadXFR(i.Rm & 0xe, VALUE_I32)); - b.StoreGuest(addr_high, b.LoadXFR(i.Rm, VALUE_I32)); - } else { - b.StoreGuest(addr_low, b.LoadFPR(i.Rm, VALUE_I32)); - b.StoreGuest(addr_high, b.LoadFPR(i.Rm | 0x1, VALUE_I32)); - } - } else { - b.StoreGuest(addr, b.LoadFPR(i.Rm, VALUE_I32)); - } -} - -// FMOV.S FRm,@-Rn 1111nnnnmmmm1011 -// FMOV DRm,@-Rn 1111nnnnmmm01011 -// FMOV XDm,@-Rn 1111nnnnmmm11011 -EMITTER(FMOV_SAVE) { - if (b.flags() & SH4_DOUBLE_SZ) { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(8)); - b.StoreGPR(i.Rn, addr); - - Value *addr_low = addr; - Value *addr_high = b.Add(addr, b.AllocConstant(4)); - - if (i.Rm & 1) { - b.StoreGuest(addr_low, b.LoadXFR(i.Rm & 0xe, VALUE_I32)); - b.StoreGuest(addr_high, b.LoadXFR(i.Rm, VALUE_I32)); - } else { - b.StoreGuest(addr_low, b.LoadFPR(i.Rm, VALUE_I32)); - b.StoreGuest(addr_high, b.LoadFPR(i.Rm | 0x1, VALUE_I32)); - } - } else { - Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - b.StoreGuest(addr, b.LoadFPR(i.Rm, VALUE_I32)); - } -} - -// FMOV.S @Rm+,FRn 1111nnnnmmmm1001 -// FMOV @Rm+,DRn 1111nnn0mmmm1001 -// FMOV @Rm+,XDn 1111nnn1mmmm1001 -EMITTER(FMOV_RESTORE) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - - if (b.flags() & SH4_DOUBLE_SZ) { - Value *v_low = b.LoadGuest(addr, VALUE_I32); - Value *v_high = b.LoadGuest(b.Add(addr, b.AllocConstant(4)), VALUE_I32); - if (i.Rn & 1) { - b.StoreXFR(i.Rn & 0xe, v_low); - b.StoreXFR(i.Rn, v_high); - } else { - b.StoreFPR(i.Rn, v_low); - b.StoreFPR(i.Rn | 0x1, v_high); - } - b.StoreGPR(i.Rm, b.Add(addr, b.AllocConstant(8))); - } else { - b.StoreFPR(i.Rn, b.LoadGuest(addr, VALUE_I32)); - b.StoreGPR(i.Rm, b.Add(addr, b.AllocConstant(4))); - } -} - -// FLDS FRm,FPUL 1111mmmm00011101 -EMITTER(FLDS) { - Value *rn = b.LoadFPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, fpul), rn); -} - -// FSTS FPUL,FRn 1111nnnn00001101 -EMITTER(FSTS) { - Value *fpul = b.LoadContext(offsetof(sh4_context_t, fpul), VALUE_I32); - b.StoreFPR(i.Rn, fpul); -} - -// FABS FRn PR=0 1111nnnn01011101 -// FABS DRn PR=1 1111nnn001011101 -EMITTER(FABS) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - Value *v = b.FAbs(b.LoadFPR(n, VALUE_F64)); - b.StoreFPR(n, v); - } else { - Value *v = b.FAbs(b.LoadFPR(i.Rn, VALUE_F32)); - b.StoreFPR(i.Rn, v); - } -} - -// FSRRA FRn PR=0 1111nnnn01111101 -EMITTER(FSRRA) { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *v = b.FDiv(b.AllocConstant(1.0f), b.Sqrt(frn)); - b.StoreFPR(i.Rn, v); -} - -// FADD FRm,FRn PR=0 1111nnnnmmmm0000 -// FADD DRm,DRn PR=1 1111nnn0mmm00000 -EMITTER(FADD) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - int m = i.Rm & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *v = b.FAdd(drn, drm); - b.StoreFPR(n, v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *v = b.FAdd(frn, frm); - b.StoreFPR(i.Rn, v); - } -} - -// FCMP/EQ FRm,FRn PR=0 1111nnnnmmmm0100 -// FCMP/EQ DRm,DRn PR=1 1111nnn0mmm00100 -EMITTER(FCMPEQ) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - int m = i.Rm & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *v = b.FCmpEQ(drn, drm); - b.StoreT(v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *v = b.FCmpEQ(frn, frm); - b.StoreT(v); - } -} - -// FCMP/GT FRm,FRn PR=0 1111nnnnmmmm0101 -// FCMP/GT DRm,DRn PR=1 1111nnn0mmm00101 -EMITTER(FCMPGT) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - int m = i.Rm & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *v = b.FCmpGT(drn, drm); - b.StoreT(v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *v = b.FCmpGT(frn, frm); - b.StoreT(v); - } -} - -// FDIV FRm,FRn PR=0 1111nnnnmmmm0011 -// FDIV DRm,DRn PR=1 1111nnn0mmm00011 -EMITTER(FDIV) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - int m = i.Rm & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *v = b.FDiv(drn, drm); - b.StoreFPR(n, v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *v = b.FDiv(frn, frm); - b.StoreFPR(i.Rn, v); - } -} - -// FLOAT FPUL,FRn PR=0 1111nnnn00101101 -// FLOAT FPUL,DRn PR=1 1111nnn000101101 -EMITTER(FLOAT) { - Value *fpul = b.LoadContext(offsetof(sh4_context_t, fpul), VALUE_I32); - - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - Value *v = b.IToF(b.SExt(fpul, VALUE_I64), VALUE_F64); - b.StoreFPR(n, v); - } else { - Value *v = b.IToF(fpul, VALUE_F32); - b.StoreFPR(i.Rn, v); - } -} - -// FMAC FR0,FRm,FRn PR=0 1111nnnnmmmm1110 -EMITTER(FMAC) { - CHECK(!(b.flags() & SH4_DOUBLE_PR)); - - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *fr0 = b.LoadFPR(0, VALUE_F32); - Value *v = b.FAdd(b.FMul(fr0, frm), frn); - b.StoreFPR(i.Rn, v); -} - -// FMUL FRm,FRn PR=0 1111nnnnmmmm0010 -// FMUL DRm,DRn PR=1 1111nnn0mmm00010 -EMITTER(FMUL) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - int m = i.Rm & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *v = b.FMul(drn, drm); - b.StoreFPR(n, v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *v = b.FMul(frn, frm); - b.StoreFPR(i.Rn, v); - } -} - -// FNEG FRn PR=0 1111nnnn01001101 -// FNEG DRn PR=1 1111nnn001001101 -EMITTER(FNEG) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *v = b.FNeg(drn); - b.StoreFPR(n, v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *v = b.FNeg(frn); - b.StoreFPR(i.Rn, v); - } -} - -// FSQRT FRn PR=0 1111nnnn01101101 -// FSQRT DRn PR=1 1111nnnn01101101 -EMITTER(FSQRT) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *v = b.Sqrt(drn); - b.StoreFPR(n, v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *v = b.Sqrt(frn); - b.StoreFPR(i.Rn, v); - } -} - -// FSUB FRm,FRn PR=0 1111nnnnmmmm0001 -// FSUB DRm,DRn PR=1 1111nnn0mmm00001 -EMITTER(FSUB) { - if (b.flags() & SH4_DOUBLE_PR) { - int n = i.Rn & 0xe; - int m = i.Rm & 0xe; - Value *drn = b.LoadFPR(n, VALUE_F64); - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *v = b.FSub(drn, drm); - b.StoreFPR(n, v); - } else { - Value *frn = b.LoadFPR(i.Rn, VALUE_F32); - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *v = b.FSub(frn, frm); - b.StoreFPR(i.Rn, v); - } -} - -// FTRC FRm,FPUL PR=0 1111mmmm00111101 -// FTRC DRm,FPUL PR=1 1111mmm000111101 -EMITTER(FTRC) { - if (b.flags() & SH4_DOUBLE_PR) { - int m = i.Rm & 0xe; - Value *drm = b.LoadFPR(m, VALUE_F64); - Value *dpv = b.Trunc(b.FToI(drm, VALUE_I64), VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, fpul), dpv); - } else { - Value *frm = b.LoadFPR(i.Rm, VALUE_F32); - Value *spv = b.FToI(frm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, fpul), spv); - } -} - -// FCNVDS DRm,FPUL PR=1 1111mmm010111101 -EMITTER(FCNVDS) { - CHECK(b.flags() & SH4_DOUBLE_PR); - - // TODO rounding modes? - - int m = i.Rm & 0xe; - Value *dpv = b.LoadFPR(m, VALUE_F64); - Value *spv = b.FTrunc(dpv, VALUE_F32); - b.StoreContext(offsetof(sh4_context_t, fpul), spv); -} - -// FCNVSD FPUL, DRn PR=1 1111nnn010101101 -EMITTER(FCNVSD) { - CHECK(b.flags() & SH4_DOUBLE_PR); - - // TODO rounding modes? - - Value *spv = b.LoadContext(offsetof(sh4_context_t, fpul), VALUE_F32); - Value *dpv = b.FExt(spv, VALUE_F64); - int n = i.Rn & 0xe; - b.StoreFPR(n, dpv); -} - -// LDS Rm,FPSCR -EMITTER(LDSFPSCR) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreFPSCR(rm); -} - -// LDS Rm,FPUL -EMITTER(LDSFPUL) { - Value *rm = b.LoadGPR(i.Rm, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, fpul), rm); -} - -// LDS.L @Rm+,FPSCR -EMITTER(LDSMFPSCR) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreFPSCR(v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// LDS.L @Rm+,FPUL -EMITTER(LDSMFPUL) { - Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - Value *v = b.LoadGuest(addr, VALUE_I32); - b.StoreContext(offsetof(sh4_context_t, fpul), v); - addr = b.Add(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rm, addr); -} - -// STS FPSCR,Rn -EMITTER(STSFPSCR) { - Value *fpscr = b.LoadFPSCR(); - b.StoreGPR(i.Rn, fpscr); -} - -// STS FPUL,Rn -EMITTER(STSFPUL) { - Value *fpul = b.LoadContext(offsetof(sh4_context_t, fpul), VALUE_I32); - b.StoreGPR(i.Rn, fpul); -} - -// STS.L FPSCR,@-Rn -EMITTER(STSMFPSCR) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Sub(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - b.StoreGuest(addr, b.LoadFPSCR()); -} - -// STS.L FPUL,@-Rn -EMITTER(STSMFPUL) { - Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - addr = b.Sub(addr, b.AllocConstant(4)); - b.StoreGPR(i.Rn, addr); - Value *fpul = b.LoadContext(offsetof(sh4_context_t, fpul), VALUE_I32); - b.StoreGuest(addr, fpul); -} - -// FIPR FVm,FVn PR=0 1111nnmm11101101 -EMITTER(FIPR) { - int m = i.Rm << 2; - int n = i.Rn << 2; - - Value *fvn = b.LoadFPR(n, VALUE_V128); - Value *fvm = b.LoadFPR(m, VALUE_V128); - Value *dp = b.VDot(fvn, fvm, VALUE_F32); - b.StoreFPR(n + 3, dp); -} - -// FSCA FPUL,DRn PR=0 1111nnn011111101 -EMITTER(FSCA) { - int n = i.Rn << 1; - - Value *fpul = b.LoadContext(offsetof(sh4_context_t, fpul), VALUE_I16); - fpul = b.ZExt(fpul, VALUE_I64); - - Value *fsca_table = b.AllocConstant(reinterpret_cast(s_fsca_table)); - Value *fsca_offset = b.Shl(fpul, 3); - Value *addr = b.Add(fsca_table, fsca_offset); - - b.StoreFPR(n, b.LoadHost(addr, VALUE_F32)); - b.StoreFPR(n + 1, - b.LoadHost(b.Add(addr, b.AllocConstant(INT64_C(4))), VALUE_F32)); -} - -// FTRV XMTRX,FVn PR=0 1111nn0111111101 -EMITTER(FTRV) { - int n = i.Rn << 2; - - Value *col0 = b.LoadXFR(0, VALUE_V128); - Value *row0 = b.VBroadcast(b.LoadFPR(n + 0, VALUE_F32)); - Value *result = b.VMul(col0, row0, VALUE_F32); - - Value *col1 = b.LoadXFR(4, VALUE_V128); - Value *row1 = b.VBroadcast(b.LoadFPR(n + 1, VALUE_F32)); - result = b.VAdd(result, b.VMul(col1, row1, VALUE_F32), VALUE_F32); - - Value *col2 = b.LoadXFR(8, VALUE_V128); - Value *row2 = b.VBroadcast(b.LoadFPR(n + 2, VALUE_F32)); - result = b.VAdd(result, b.VMul(col2, row2, VALUE_F32), VALUE_F32); - - Value *col3 = b.LoadXFR(12, VALUE_V128); - Value *row3 = b.VBroadcast(b.LoadFPR(n + 3, VALUE_F32)); - result = b.VAdd(result, b.VMul(col3, row3, VALUE_F32), VALUE_F32); - - b.StoreFPR(n, result); -} - -// FRCHG 1111101111111101 -EMITTER(FRCHG) { - Value *fpscr = b.LoadFPSCR(); - Value *v = b.Xor(fpscr, b.AllocConstant(FR)); - b.StoreFPSCR(v); -} - -// FSCHG 1111001111111101 -EMITTER(FSCHG) { - Value *fpscr = b.LoadFPSCR(); - Value *v = b.Xor(fpscr, b.AllocConstant(SZ)); - b.StoreFPSCR(v); -} diff --git a/src/jit/frontend/sh4/sh4_builder.h b/src/jit/frontend/sh4/sh4_builder.h deleted file mode 100644 index 19d1b919..00000000 --- a/src/jit/frontend/sh4/sh4_builder.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef SH4_BUILDER_H -#define SH4_BUILDER_H - -#include "jit/frontend/sh4/sh4_context.h" -#include "jit/frontend/sh4/sh4_disassembler.h" -#include "jit/frontend/sh4/sh4_frontend.h" -#include "jit/ir/ir_builder.h" - -namespace re { -namespace jit { -namespace frontend { -namespace sh4 { - -class SH4Builder : public ir::IRBuilder { - public: - SH4Builder(Arena &arena); - - int flags() { - return flags_; - } - - void Emit(uint32_t guest_addr, uint8_t *guest_ptr, int size, int flags); - - ir::Instr *LoadGuest(ir::Value *addr, ir::ValueType type); - void StoreGuest(ir::Value *addr, ir::Value *v); - ir::Value *LoadGPR(int n, ir::ValueType type); - void StoreGPR(int n, ir::Value *v); - ir::Value *LoadFPR(int n, ir::ValueType type); - void StoreFPR(int n, ir::Value *v); - ir::Value *LoadXFR(int n, ir::ValueType type); - void StoreXFR(int n, ir::Value *v); - ir::Value *LoadSR(); - void StoreSR(ir::Value *v); - ir::Value *LoadT(); - void StoreT(ir::Value *v); - ir::Value *LoadGBR(); - void StoreGBR(ir::Value *v); - ir::Value *LoadFPSCR(); - void StoreFPSCR(ir::Value *v); - ir::Value *LoadPR(); - void StorePR(ir::Value *v); - - void InvalidInstruction(uint32_t guest_addr); - void EmitDelayInstr(); - - private: - Instr delay_instr_; - int flags_; -}; -} -} -} -} - -#endif diff --git a/src/jit/frontend/sh4/sh4_disassembler.cc b/src/jit/frontend/sh4/sh4_disasm.c similarity index 52% rename from src/jit/frontend/sh4/sh4_disassembler.cc rename to src/jit/frontend/sh4/sh4_disasm.c index df65ae21..ccc9de0c 100644 --- a/src/jit/frontend/sh4/sh4_disassembler.cc +++ b/src/jit/frontend/sh4/sh4_disasm.c @@ -1,13 +1,9 @@ #include "core/assert.h" -#include "core/memory.h" #include "core/string.h" -#include "jit/frontend/sh4/sh4_disassembler.h" +#include "jit/frontend/sh4/sh4_disasm.h" -using namespace re; -using namespace re::jit::frontend::sh4; - -struct InstrType { - Op op; +typedef struct { + sh4_op_t op; const char *desc; const char *sig; int cycles; @@ -17,20 +13,20 @@ struct InstrType { uint16_t disp_mask, disp_shift; uint16_t rm_mask, rm_shift; uint16_t rn_mask, rn_shift; -}; +} sh4_opdef_t; -static InstrType s_instrs[NUM_OPCODES] = { - {OP_INVALID, nullptr, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -#define SH4_INSTR(name, desc, sig, cycles, flags) \ - { OP_##name, desc, #sig, cycles, flags, 0, 0, 0, 0, 0, 0, 0, 0, 0 } \ +static sh4_opdef_t s_opdefs[NUM_SH4_OPS] = { + {SH4_OP_INVALID, NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +#define SH4_INSTR(name, desc, sig, cycles, flags) \ + { SH4_OP_##name, desc, #sig, cycles, flags, 0, 0, 0, 0, 0, 0, 0, 0, 0 } \ , #include "jit/frontend/sh4/sh4_instr.inc" #undef SH4_INSTR }; -static InstrType *s_instr_lookup[UINT16_MAX] = {}; +static sh4_opdef_t *s_opdef_lookup[UINT16_MAX] = {}; -static void GetArgMask(const char *instr_code, char c, uint16_t *mask, - uint16_t *shift) { +static void sh4_arg_mask(const char *instr_code, char c, uint16_t *mask, + uint16_t *shift) { size_t len = strlen(instr_code); if (mask) *mask = 0; @@ -41,12 +37,12 @@ static void GetArgMask(const char *instr_code, char c, uint16_t *mask, if (mask) *mask |= (1 << (len - i - 1)); if (shift) - *shift = static_cast(len - i - 1); + *shift = (uint16_t)(len - i - 1); } } } -static void InitInstrTables() { +static void sh4_init_opdefs() { static bool initialized = false; if (initialized) { @@ -57,14 +53,14 @@ static void InitInstrTables() { // finalize type information by extracting argument encoding information // from signatures - for (int i = 1 /* skip OP_INVALID */; i < NUM_OPCODES; i++) { - InstrType *type = &s_instrs[i]; + for (int i = 1 /* skip SH4_OP_INVALID */; i < NUM_SH4_OPS; i++) { + sh4_opdef_t *def = &s_opdefs[i]; - GetArgMask(type->sig, 'i', &type->imm_mask, &type->imm_shift); - GetArgMask(type->sig, 'd', &type->disp_mask, &type->disp_shift); - GetArgMask(type->sig, 'm', &type->rm_mask, &type->rm_shift); - GetArgMask(type->sig, 'n', &type->rn_mask, &type->rn_shift); - GetArgMask(type->sig, 0, &type->opcode_mask, NULL); + sh4_arg_mask(def->sig, 'i', &def->imm_mask, &def->imm_shift); + sh4_arg_mask(def->sig, 'd', &def->disp_mask, &def->disp_shift); + sh4_arg_mask(def->sig, 'm', &def->rm_mask, &def->rm_shift); + sh4_arg_mask(def->sig, 'n', &def->rn_mask, &def->rn_shift); + sh4_arg_mask(def->sig, 0, &def->opcode_mask, NULL); } // initialize lookup table @@ -74,13 +70,13 @@ static void InitInstrTables() { for (int z = 0; z < 0x10; z++) { uint16_t value = w + x + y + z; - for (int i = 1 /* skip OP_INVALID */; i < NUM_OPCODES; i++) { - InstrType *type = &s_instrs[i]; - uint16_t arg_mask = type->imm_mask | type->disp_mask | - type->rm_mask | type->rn_mask; + for (int i = 1 /* skip SH4_OP_INVALID */; i < NUM_SH4_OPS; i++) { + sh4_opdef_t *def = &s_opdefs[i]; + uint16_t arg_mask = + def->imm_mask | def->disp_mask | def->rm_mask | def->rn_mask; - if ((value & ~arg_mask) == type->opcode_mask) { - s_instr_lookup[value] = type; + if ((value & ~arg_mask) == def->opcode_mask) { + s_opdef_lookup[value] = def; break; } } @@ -90,30 +86,32 @@ static void InitInstrTables() { } } -bool SH4Disassembler::Disasm(Instr *i) { - InitInstrTables(); +bool sh4_disasm(sh4_instr_t *i) { + sh4_init_opdefs(); - InstrType *type = s_instr_lookup[i->opcode]; + sh4_opdef_t *def = s_opdef_lookup[i->opcode]; - if (!type) { - i->op = OP_INVALID; + if (!def) { + i->op = SH4_OP_INVALID; return false; } - i->op = type->op; - i->cycles = type->cycles; - i->flags = type->flags; - i->Rm = (i->opcode & type->rm_mask) >> type->rm_shift; - i->Rn = (i->opcode & type->rn_mask) >> type->rn_shift; - i->disp = (i->opcode & type->disp_mask) >> type->disp_shift; - i->imm = (i->opcode & type->imm_mask) >> type->imm_shift; + i->op = def->op; + i->cycles = def->cycles; + i->flags = def->flags; + i->Rm = (i->opcode & def->rm_mask) >> def->rm_shift; + i->Rn = (i->opcode & def->rn_mask) >> def->rn_shift; + i->disp = (i->opcode & def->disp_mask) >> def->disp_shift; + i->imm = (i->opcode & def->imm_mask) >> def->imm_shift; return true; } -void SH4Disassembler::Format(const Instr &i, char *buffer, size_t buffer_size) { - if (i.op == OP_INVALID) { - snprintf(buffer, buffer_size, "%08x .word 0x%04x", i.addr, i.opcode); +void sh4_format(const sh4_instr_t *i, char *buffer, size_t buffer_size) { + sh4_init_opdefs(); + + if (i->op == SH4_OP_INVALID) { + snprintf(buffer, buffer_size, "%08x .word 0x%04x", i->addr, i->opcode); return; } @@ -123,7 +121,7 @@ void SH4Disassembler::Format(const Instr &i, char *buffer, size_t buffer_size) { uint32_t pcmask; // copy initial formatted description - snprintf(buffer, buffer_size, "%08x %s", i.addr, s_instrs[i.op].desc); + snprintf(buffer, buffer_size, "%08x %s", i->addr, s_opdefs[i->op].desc); // used by mov operators with displacements if (strnstr(buffer, ".b", buffer_size)) { @@ -141,70 +139,70 @@ void SH4Disassembler::Format(const Instr &i, char *buffer, size_t buffer_size) { } // (disp:4,rn) - value_len = snprintf(value, sizeof(value), "(0x%x,rn)", i.disp * movsize); + value_len = snprintf(value, sizeof(value), "(0x%x,rn)", i->disp * movsize); CHECK_EQ(strnrep(buffer, buffer_size, "(disp:4,rn)", 11, value, value_len), 0); // (disp:4,rm) - value_len = snprintf(value, sizeof(value), "(0x%x,rm)", i.disp * movsize); + value_len = snprintf(value, sizeof(value), "(0x%x,rm)", i->disp * movsize); CHECK_EQ(strnrep(buffer, buffer_size, "(disp:4,rm)", 11, value, value_len), 0); // (disp:8,gbr) - value_len = snprintf(value, sizeof(value), "(0x%x,gbr)", i.disp * movsize); + value_len = snprintf(value, sizeof(value), "(0x%x,gbr)", i->disp * movsize); CHECK_EQ(strnrep(buffer, buffer_size, "(disp:8,gbr)", 12, value, value_len), 0); // (disp:8,pc) value_len = snprintf(value, sizeof(value), "(0x%08x)", - (i.disp * movsize) + (i.addr & pcmask) + 4); + (i->disp * movsize) + (i->addr & pcmask) + 4); CHECK_EQ(strnrep(buffer, buffer_size, "(disp:8,pc)", 11, value, value_len), 0); // disp:8 value_len = snprintf(value, sizeof(value), "0x%08x", - ((int8_t)i.disp * 2) + i.addr + 4); + ((int8_t)i->disp * 2) + i->addr + 4); CHECK_EQ(strnrep(buffer, buffer_size, "disp:8", 6, value, value_len), 0); // disp:12 value_len = snprintf(value, sizeof(value), "0x%08x", - ((((int32_t)(i.disp & 0xfff) << 20) >> 20) * 2) + i.addr + 4); + ((((int32_t)(i->disp & 0xfff) << 20) >> 20) * 2) + i->addr + 4); CHECK_EQ(strnrep(buffer, buffer_size, "disp:12", 7, value, value_len), 0); // drm - value_len = snprintf(value, sizeof(value), "dr%d", i.Rm); + value_len = snprintf(value, sizeof(value), "dr%d", i->Rm); CHECK_EQ(strnrep(buffer, buffer_size, "drm", 3, value, value_len), 0); // drn - value_len = snprintf(value, sizeof(value), "dr%d", i.Rn); + value_len = snprintf(value, sizeof(value), "dr%d", i->Rn); CHECK_EQ(strnrep(buffer, buffer_size, "drn", 3, value, value_len), 0); // frm - value_len = snprintf(value, sizeof(value), "fr%d", i.Rm); + value_len = snprintf(value, sizeof(value), "fr%d", i->Rm); CHECK_EQ(strnrep(buffer, buffer_size, "frm", 3, value, value_len), 0); // frn - value_len = snprintf(value, sizeof(value), "fr%d", i.Rn); + value_len = snprintf(value, sizeof(value), "fr%d", i->Rn); CHECK_EQ(strnrep(buffer, buffer_size, "frn", 3, value, value_len), 0); // fvm - value_len = snprintf(value, sizeof(value), "fv%d", i.Rm); + value_len = snprintf(value, sizeof(value), "fv%d", i->Rm); CHECK_EQ(strnrep(buffer, buffer_size, "fvm", 3, value, value_len), 0); // fvn - value_len = snprintf(value, sizeof(value), "fv%d", i.Rn); + value_len = snprintf(value, sizeof(value), "fv%d", i->Rn); CHECK_EQ(strnrep(buffer, buffer_size, "fvn", 3, value, value_len), 0); // rm - value_len = snprintf(value, sizeof(value), "r%d", i.Rm); + value_len = snprintf(value, sizeof(value), "r%d", i->Rm); CHECK_EQ(strnrep(buffer, buffer_size, "rm", 2, value, value_len), 0); // rn - value_len = snprintf(value, sizeof(value), "r%d", i.Rn); + value_len = snprintf(value, sizeof(value), "r%d", i->Rn); CHECK_EQ(strnrep(buffer, buffer_size, "rn", 2, value, value_len), 0); // #imm8 - value_len = snprintf(value, sizeof(value), "0x%02x", i.imm); + value_len = snprintf(value, sizeof(value), "0x%02x", i->imm); CHECK_EQ(strnrep(buffer, buffer_size, "#imm8", 5, value, value_len), 0); } diff --git a/src/jit/frontend/sh4/sh4_disasm.h b/src/jit/frontend/sh4/sh4_disasm.h new file mode 100644 index 00000000..112eb6e8 --- /dev/null +++ b/src/jit/frontend/sh4/sh4_disasm.h @@ -0,0 +1,49 @@ +#ifndef SH4_DISASSEMBLER_H +#define SH4_DISASSEMBLER_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + SH4_OP_INVALID, +#define SH4_INSTR(name, desc, instr_code, cycles, flags) SH4_OP_##name, +#include "jit/frontend/sh4/sh4_instr.inc" +#undef SH4_INSTR + NUM_SH4_OPS, +} sh4_op_t; + +typedef enum { + SH4_FLAG_BRANCH = 0x1, + SH4_FLAG_CONDITIONAL = 0x2, + SH4_FLAG_DELAYED = 0x4, + SH4_FLAG_SET_T = 0x8, + SH4_FLAG_SET_FPSCR = 0x10, + SH4_FLAG_SET_SR = 0x20, +} sh4_flag_t; + +typedef struct { + uint32_t addr; + uint16_t opcode; + + sh4_op_t op; + int cycles; + int flags; + uint16_t Rm; + uint16_t Rn; + uint16_t disp; + uint16_t imm; +} sh4_instr_t; + +bool sh4_disasm(sh4_instr_t *i); +void sh4_format(const sh4_instr_t *i, char *buffer, size_t buffer_size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/jit/frontend/sh4/sh4_disassembler.h b/src/jit/frontend/sh4/sh4_disassembler.h deleted file mode 100644 index ff28732d..00000000 --- a/src/jit/frontend/sh4/sh4_disassembler.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef SH4_DISASSEMBLER_H -#define SH4_DISASSEMBLER_H - -#include -#include - -namespace re { -namespace jit { -namespace frontend { -namespace sh4 { - -enum Op { - OP_INVALID, -#define SH4_INSTR(name, desc, instr_code, cycles, flags) OP_##name, -#include "jit/frontend/sh4/sh4_instr.inc" -#undef SH4_INSTR - NUM_OPCODES, -}; - -enum OpFlag { - OP_FLAG_BRANCH = 0x1, - OP_FLAG_CONDITIONAL = 0x2, - OP_FLAG_DELAYED = 0x4, - OP_FLAG_SET_T = 0x8, - OP_FLAG_SET_FPSCR = 0x10, - OP_FLAG_SET_SR = 0x20, -}; - -struct Instr { - uint32_t addr; - uint16_t opcode; - - Op op; - int cycles; - int flags; - uint16_t Rm; - uint16_t Rn; - uint16_t disp; - uint16_t imm; -}; - -class SH4Disassembler { - public: - static bool Disasm(Instr *i); - static void Format(const Instr &i, char *buffer, size_t buffer_size); -}; -} -} -} -} - -#endif diff --git a/src/jit/frontend/sh4/sh4_frontend.c b/src/jit/frontend/sh4/sh4_frontend.c new file mode 100644 index 00000000..13081444 --- /dev/null +++ b/src/jit/frontend/sh4/sh4_frontend.c @@ -0,0 +1,66 @@ +#include "jit/frontend/sh4/sh4_analyze.h" +#include "jit/frontend/sh4/sh4_disasm.h" +#include "jit/frontend/sh4/sh4_frontend.h" +#include "jit/frontend/sh4/sh4_translate.h" +#include "jit/frontend/frontend.h" +#include "jit/ir/ir.h" + +typedef struct sh4_frontend_s { jit_frontend_t base; } sh4_frontend_t; + +static void sh4_frontend_translate_code(sh4_frontend_t *frontend, + uint32_t guest_addr, uint8_t *guest_ptr, + int flags, int *size, ir_t *ir) { + // get the block size + sh4_analyze_block(guest_addr, guest_ptr, flags, size); + + // emit IR for the SH4 code + sh4_translate(guest_addr, guest_ptr, *size, flags, ir); +} + +static void sh4_frontend_dump_code(sh4_frontend_t *frontend, + uint32_t guest_addr, uint8_t *guest_ptr, + int size) { + char buffer[128]; + + int i = 0; + + while (i < size) { + sh4_instr_t instr = {}; + instr.addr = guest_addr + i; + instr.opcode = *(uint16_t *)(guest_ptr + i); + sh4_disasm(&instr); + + sh4_format(&instr, buffer, sizeof(buffer)); + LOG_INFO(buffer); + + i += 2; + + if (instr.flags & SH4_FLAG_DELAYED) { + sh4_instr_t delay = {}; + delay.addr = guest_addr + i; + delay.opcode = *(uint16_t *)(guest_ptr + i); + sh4_disasm(&delay); + + sh4_format(&delay, buffer, sizeof(buffer)); + LOG_INFO(buffer); + + i += 2; + } + } +} + +jit_frontend_t *sh4_frontend_create() { + sh4_frontend_t *frontend = calloc(1, sizeof(sh4_frontend_t)); + + frontend->base.translate_code = + (jit_frontend_translate_code)&sh4_frontend_translate_code; + frontend->base.dump_code = (jit_frontend_dump_code)&sh4_frontend_dump_code; + + return (jit_frontend_t *)frontend; +} + +void sh4_frontend_destroy(jit_frontend_t *jit_frontend) { + sh4_frontend_t *frontend = (sh4_frontend_t *)jit_frontend; + + free(frontend); +} diff --git a/src/jit/frontend/sh4/sh4_frontend.cc b/src/jit/frontend/sh4/sh4_frontend.cc deleted file mode 100644 index b0de3e3b..00000000 --- a/src/jit/frontend/sh4/sh4_frontend.cc +++ /dev/null @@ -1,54 +0,0 @@ -#include "core/memory.h" -#include "jit/frontend/sh4/sh4_analyzer.h" -#include "jit/frontend/sh4/sh4_builder.h" -#include "jit/frontend/sh4/sh4_frontend.h" - -using namespace re::jit; -using namespace re::jit::frontend::sh4; -using namespace re::jit::ir; - -SH4Frontend::SH4Frontend() : arena_(4096) {} - -IRBuilder &SH4Frontend::TranslateCode(uint32_t guest_addr, uint8_t *guest_ptr, - int flags, int *size) { - // get the block size - SH4Analyzer::AnalyzeBlock(guest_addr, guest_ptr, flags, size); - - // emit IR for the SH4 code - arena_.Reset(); - SH4Builder *builder = arena_.Alloc(); - new (builder) SH4Builder(arena_); - builder->Emit(guest_addr, guest_ptr, *size, flags); - - return *builder; -} - -void SH4Frontend::DumpCode(uint32_t guest_addr, uint8_t *guest_ptr, int size) { - char buffer[128]; - - int i = 0; - - while (i < size) { - Instr instr; - instr.addr = guest_addr + i; - instr.opcode = load(guest_ptr + i); - SH4Disassembler::Disasm(&instr); - - SH4Disassembler::Format(instr, buffer, sizeof(buffer)); - LOG_INFO(buffer); - - i += 2; - - if (instr.flags & OP_FLAG_DELAYED) { - Instr delay; - delay.addr = guest_addr + i; - delay.opcode = load(guest_ptr + i); - SH4Disassembler::Disasm(&delay); - - SH4Disassembler::Format(delay, buffer, sizeof(buffer)); - LOG_INFO(buffer); - - i += 2; - } - } -} diff --git a/src/jit/frontend/sh4/sh4_frontend.h b/src/jit/frontend/sh4/sh4_frontend.h index 41c5cdbd..e757e96a 100644 --- a/src/jit/frontend/sh4/sh4_frontend.h +++ b/src/jit/frontend/sh4/sh4_frontend.h @@ -1,35 +1,17 @@ #ifndef SH4_FRONTEND_H #define SH4_FRONTEND_H -#include "core/arena.h" -#include "jit/frontend/frontend.h" +#ifdef __cplusplus +extern "C" { +#endif -namespace re { -namespace jit { -namespace frontend { -namespace sh4 { +struct jit_frontend_s; -enum SH4BlockFlags { - SH4_SLOWMEM = 0x1, - SH4_DOUBLE_PR = 0x2, - SH4_DOUBLE_SZ = 0x4, - SH4_SINGLE_INSTR = 0x8, -}; +struct jit_frontend_s *sh4_frontend_create(); +void sh4_frontend_destroy(struct jit_frontend_s *frontend); -class SH4Frontend : public Frontend { - public: - SH4Frontend(); - - ir::IRBuilder &TranslateCode(uint32_t guest_addr, uint8_t *guest_ptr, - int flags, int *size); - void DumpCode(uint32_t guest_addr, uint8_t *guest_ptr, int size); - - private: - Arena arena_; -}; -} -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/frontend/sh4/sh4_instr.inc b/src/jit/frontend/sh4/sh4_instr.inc index f54f5f98..791dd3ac 100644 --- a/src/jit/frontend/sh4/sh4_instr.inc +++ b/src/jit/frontend/sh4/sh4_instr.inc @@ -47,23 +47,23 @@ SH4_INSTR(XTRCT, "xtrct rm, rn", 0010nnnnmmmm1101, 1, 0) // arithmetric operation instructions SH4_INSTR(ADD, "add rm, rn", 0011nnnnmmmm1100, 1, 0) SH4_INSTR(ADDI, "add #imm8, rn", 0111nnnniiiiiiii, 1, 0) -SH4_INSTR(ADDC, "addc rm, rn", 0011nnnnmmmm1110, 1, OP_FLAG_SET_T) -SH4_INSTR(ADDV, "addv rm, rn", 0011nnnnmmmm1111, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPEQI, "cmp/eq #imm8, r0", 10001000iiiiiiii, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPEQ, "cmp/eq rm, rn", 0011nnnnmmmm0000, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPHS, "cmp/hs rm, rn", 0011nnnnmmmm0010, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPGE, "cmp/ge rm, rn", 0011nnnnmmmm0011, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPHI, "cmp/hi rm, rn", 0011nnnnmmmm0110, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPGT, "cmp/gt rm, rn", 0011nnnnmmmm0111, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPPZ, "cmp/pz rn", 0100nnnn00010001, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPPL, "cmp/pl rn", 0100nnnn00010101, 1, OP_FLAG_SET_T) -SH4_INSTR(CMPSTR, "cmp/str rm, rn", 0010nnnnmmmm1100, 1, OP_FLAG_SET_T) -SH4_INSTR(DIV0S, "div0s rm, rn", 0010nnnnmmmm0111, 1, OP_FLAG_SET_T) -SH4_INSTR(DIV0U, "div0u", 0000000000011001, 1, OP_FLAG_SET_T) -SH4_INSTR(DIV1, "div1 rm, rn", 0011nnnnmmmm0100, 1, OP_FLAG_SET_T) +SH4_INSTR(ADDC, "addc rm, rn", 0011nnnnmmmm1110, 1, SH4_FLAG_SET_T) +SH4_INSTR(ADDV, "addv rm, rn", 0011nnnnmmmm1111, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPEQI, "cmp/eq #imm8, r0", 10001000iiiiiiii, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPEQ, "cmp/eq rm, rn", 0011nnnnmmmm0000, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPHS, "cmp/hs rm, rn", 0011nnnnmmmm0010, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPGE, "cmp/ge rm, rn", 0011nnnnmmmm0011, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPHI, "cmp/hi rm, rn", 0011nnnnmmmm0110, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPGT, "cmp/gt rm, rn", 0011nnnnmmmm0111, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPPZ, "cmp/pz rn", 0100nnnn00010001, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPPL, "cmp/pl rn", 0100nnnn00010101, 1, SH4_FLAG_SET_T) +SH4_INSTR(CMPSTR, "cmp/str rm, rn", 0010nnnnmmmm1100, 1, SH4_FLAG_SET_T) +SH4_INSTR(DIV0S, "div0s rm, rn", 0010nnnnmmmm0111, 1, SH4_FLAG_SET_T) +SH4_INSTR(DIV0U, "div0u", 0000000000011001, 1, SH4_FLAG_SET_T) +SH4_INSTR(DIV1, "div1 rm, rn", 0011nnnnmmmm0100, 1, SH4_FLAG_SET_T) SH4_INSTR(DMULS, "dmuls.l rm, rn", 0011nnnnmmmm1101, 2, 0) SH4_INSTR(DMULU, "dmulu.l rm, rn", 0011nnnnmmmm0101, 2, 0) -SH4_INSTR(DT, "dt rn", 0100nnnn00010000, 1, OP_FLAG_SET_T) +SH4_INSTR(DT, "dt rn", 0100nnnn00010000, 1, SH4_FLAG_SET_T) SH4_INSTR(EXTSB, "exts.b rm, rn", 0110nnnnmmmm1110, 1, 0) SH4_INSTR(EXTSW, "exts.w rm, rn", 0110nnnnmmmm1111, 1, 0) SH4_INSTR(EXTUB, "extu.b rm, rn", 0110nnnnmmmm1100, 1, 0) @@ -74,10 +74,10 @@ SH4_INSTR(MULL, "mul.l rm, rn", 0000nnnnmmmm0111, 2, 0) SH4_INSTR(MULS, "muls rm, rn", 0010nnnnmmmm1111, 2, 0) SH4_INSTR(MULU, "mulu rm, rn", 0010nnnnmmmm1110, 2, 0) SH4_INSTR(NEG, "neg rm, rn", 0110nnnnmmmm1011, 1, 0) -SH4_INSTR(NEGC, "negc rm, rn", 0110nnnnmmmm1010, 1, OP_FLAG_SET_T) +SH4_INSTR(NEGC, "negc rm, rn", 0110nnnnmmmm1010, 1, SH4_FLAG_SET_T) SH4_INSTR(SUB, "sub rm, rn", 0011nnnnmmmm1000, 1, 0) -SH4_INSTR(SUBC, "subc rm, rn", 0011nnnnmmmm1010, 1, OP_FLAG_SET_T) -SH4_INSTR(SUBV, "subv rm, rn", 0011nnnnmmmm1011, 1, OP_FLAG_SET_T) +SH4_INSTR(SUBC, "subc rm, rn", 0011nnnnmmmm1010, 1, SH4_FLAG_SET_T) +SH4_INSTR(SUBV, "subv rm, rn", 0011nnnnmmmm1011, 1, SH4_FLAG_SET_T) // logic operation instructions @@ -88,26 +88,26 @@ SH4_INSTR(NOT, "not rm, rn", 0110nnnnmmmm0111, 1, 0) SH4_INSTR(OR, "or rm, rn", 0010nnnnmmmm1011, 1, 0) SH4_INSTR(ORI, "or #imm8, r0", 11001011iiiiiiii, 1, 0) SH4_INSTR(ORB, "or.b #imm8, @(r0,gbr)", 11001111iiiiiiii, 4, 0) -SH4_INSTR(TAS, "tas.b @rn", 0100nnnn00011011, 5, OP_FLAG_SET_T) -SH4_INSTR(TST, "tst rm, rn", 0010nnnnmmmm1000, 1, OP_FLAG_SET_T) -SH4_INSTR(TSTI, "tst #imm8, r0", 11001000iiiiiiii, 1, OP_FLAG_SET_T) -SH4_INSTR(TSTB, "tst.b #imm8, @(r0,gbr)", 11001100iiiiiiii, 3, OP_FLAG_SET_T) +SH4_INSTR(TAS, "tas.b @rn", 0100nnnn00011011, 5, SH4_FLAG_SET_T) +SH4_INSTR(TST, "tst rm, rn", 0010nnnnmmmm1000, 1, SH4_FLAG_SET_T) +SH4_INSTR(TSTI, "tst #imm8, r0", 11001000iiiiiiii, 1, SH4_FLAG_SET_T) +SH4_INSTR(TSTB, "tst.b #imm8, @(r0,gbr)", 11001100iiiiiiii, 3, SH4_FLAG_SET_T) SH4_INSTR(XOR, "xor rm, rn", 0010nnnnmmmm1010, 1, 0) SH4_INSTR(XORI, "xor #imm8, r0", 11001010iiiiiiii, 1, 0) SH4_INSTR(XORB, "xor.b #imm8, @(r0,gbr)", 11001110iiiiiiii, 4, 0) // shift instructions -SH4_INSTR(ROTL, "rotl rn", 0100nnnn00000100, 1, OP_FLAG_SET_T) -SH4_INSTR(ROTR, "rotr rn", 0100nnnn00000101, 1, OP_FLAG_SET_T) -SH4_INSTR(ROTCL, "rotcl rn", 0100nnnn00100100, 1, OP_FLAG_SET_T) -SH4_INSTR(ROTCR, "rotcr rn", 0100nnnn00100101, 1, OP_FLAG_SET_T) +SH4_INSTR(ROTL, "rotl rn", 0100nnnn00000100, 1, SH4_FLAG_SET_T) +SH4_INSTR(ROTR, "rotr rn", 0100nnnn00000101, 1, SH4_FLAG_SET_T) +SH4_INSTR(ROTCL, "rotcl rn", 0100nnnn00100100, 1, SH4_FLAG_SET_T) +SH4_INSTR(ROTCR, "rotcr rn", 0100nnnn00100101, 1, SH4_FLAG_SET_T) SH4_INSTR(SHAD, "shad rm, rn", 0100nnnnmmmm1100, 1, 0) -SH4_INSTR(SHAL, "shal rn", 0100nnnn00100000, 1, OP_FLAG_SET_T) -SH4_INSTR(SHAR, "shar rn", 0100nnnn00100001, 1, OP_FLAG_SET_T) +SH4_INSTR(SHAL, "shal rn", 0100nnnn00100000, 1, SH4_FLAG_SET_T) +SH4_INSTR(SHAR, "shar rn", 0100nnnn00100001, 1, SH4_FLAG_SET_T) SH4_INSTR(SHLD, "shld rm, rn", 0100nnnnmmmm1101, 1, 0) -SH4_INSTR(SHLL, "shll rn", 0100nnnn00000000, 1, OP_FLAG_SET_T) -SH4_INSTR(SHLR, "shlr rn", 0100nnnn00000001, 1, OP_FLAG_SET_T) +SH4_INSTR(SHLL, "shll rn", 0100nnnn00000000, 1, SH4_FLAG_SET_T) +SH4_INSTR(SHLR, "shlr rn", 0100nnnn00000001, 1, SH4_FLAG_SET_T) SH4_INSTR(SHLL2, "shll2 rn", 0100nnnn00001000, 1, 0) SH4_INSTR(SHLR2, "shlr2 rn", 0100nnnn00001001, 1, 0) SH4_INSTR(SHLL8, "shll8 rn", 0100nnnn00011000, 1, 0) @@ -119,31 +119,31 @@ SH4_INSTR(SHLR16, "shlr16 rn", 0100nnnn00101001, 1, 0) // branch instructions // can we sign extend bdisp12 in sh4_instr code, not inside of sh4_builder // then, we can reuse some more of these disp* types -SH4_INSTR(BF, "bf disp:8", 10001011dddddddd, 1, OP_FLAG_CONDITIONAL | OP_FLAG_BRANCH) -SH4_INSTR(BFS, "bfs disp:8", 10001111dddddddd, 1, OP_FLAG_CONDITIONAL | OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(BT, "bt disp:8", 10001001dddddddd, 1, OP_FLAG_CONDITIONAL | OP_FLAG_BRANCH) -SH4_INSTR(BTS, "bts disp:8", 10001101dddddddd, 1, OP_FLAG_CONDITIONAL | OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(BRA, "bra disp:12", 1010dddddddddddd, 1, OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(BRAF, "braf rn", 0000nnnn00100011, 2, OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(BSR, "bsr disp:12", 1011dddddddddddd, 1, OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(BSRF, "bsrf rn", 0000nnnn00000011, 2, OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(JMP, "jmp @rm", 0100nnnn00101011, 2, OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(JSR, "jsr @rn", 0100nnnn00001011, 2, OP_FLAG_BRANCH | OP_FLAG_DELAYED) -SH4_INSTR(RTS, "rts", 0000000000001011, 2, OP_FLAG_BRANCH | OP_FLAG_DELAYED) +SH4_INSTR(BF, "bf disp:8", 10001011dddddddd, 1, SH4_FLAG_CONDITIONAL | SH4_FLAG_BRANCH) +SH4_INSTR(BFS, "bfs disp:8", 10001111dddddddd, 1, SH4_FLAG_CONDITIONAL | SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(BT, "bt disp:8", 10001001dddddddd, 1, SH4_FLAG_CONDITIONAL | SH4_FLAG_BRANCH) +SH4_INSTR(BTS, "bts disp:8", 10001101dddddddd, 1, SH4_FLAG_CONDITIONAL | SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(BRA, "bra disp:12", 1010dddddddddddd, 1, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(BRAF, "braf rn", 0000nnnn00100011, 2, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(BSR, "bsr disp:12", 1011dddddddddddd, 1, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(BSRF, "bsrf rn", 0000nnnn00000011, 2, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(JMP, "jmp @rm", 0100nnnn00101011, 2, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(JSR, "jsr @rn", 0100nnnn00001011, 2, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) +SH4_INSTR(RTS, "rts", 0000000000001011, 2, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) // system control instructions SH4_INSTR(CLRMAC, "clrmac", 0000000000101000, 1, 0) SH4_INSTR(CLRS, "clrs", 0000000001001000, 1, 0) -SH4_INSTR(CLRT, "clrt", 0000000000001000, 1, OP_FLAG_SET_T) -SH4_INSTR(LDCSR, "ldc rm, sr", 0100mmmm00001110, 4, OP_FLAG_SET_SR) +SH4_INSTR(CLRT, "clrt", 0000000000001000, 1, SH4_FLAG_SET_T) +SH4_INSTR(LDCSR, "ldc rm, sr", 0100mmmm00001110, 4, SH4_FLAG_SET_SR) SH4_INSTR(LDCGBR, "ldc rm, gbr", 0100mmmm00011110, 3, 0) SH4_INSTR(LDCVBR, "ldc rm, vbr", 0100mmmm00101110, 1, 0) SH4_INSTR(LDCSSR, "ldc rm, ssr", 0100mmmm00111110, 1, 0) SH4_INSTR(LDCSPC, "ldc rm, spc", 0100mmmm01001110, 1, 0) SH4_INSTR(LDCDBR, "ldc rm, dbr", 0100mmmm11111010, 1, 0) SH4_INSTR(LDCRBANK, "ldc.l rm, rn_bank", 0100mmmm1nnn1110, 1, 0) -SH4_INSTR(LDCMSR, "ldc.l @rm+, sr", 0100mmmm00000111, 4, OP_FLAG_SET_SR) +SH4_INSTR(LDCMSR, "ldc.l @rm+, sr", 0100mmmm00000111, 4, SH4_FLAG_SET_SR) SH4_INSTR(LDCMGBR, "ldc.l @rm+, gbr", 0100mmmm00010111, 3, 0) SH4_INSTR(LDCMVBR, "ldc.l @rm+, vbr", 0100mmmm00100111, 1, 0) SH4_INSTR(LDCMSSR, "ldc.l @rm+, ssr", 0100mmmm00110111, 1, 0) @@ -162,9 +162,9 @@ SH4_INSTR(OCBI, "ocbi", 0000nnnn10010011, 1, 0) SH4_INSTR(OCBP, "ocbp", 0000nnnn10100011, 1, 0) SH4_INSTR(OCBWB, "ocbwb", 0000nnnn10110011, 1, 0) SH4_INSTR(PREF, "pref @rn", 0000nnnn10000011, 1, 0) -SH4_INSTR(RTE, "rte", 0000000000101011, 5, OP_FLAG_BRANCH | OP_FLAG_DELAYED) +SH4_INSTR(RTE, "rte", 0000000000101011, 5, SH4_FLAG_BRANCH | SH4_FLAG_DELAYED) SH4_INSTR(SETS, "sets", 0000000001011000, 1, 0) -SH4_INSTR(SETT, "sett", 0000000000011000, 1, OP_FLAG_SET_T) +SH4_INSTR(SETT, "sett", 0000000000011000, 1, SH4_FLAG_SET_T) SH4_INSTR(SLEEP, "sleep", 0000000000011011, 4, 0) SH4_INSTR(STCSR, "stc sr, rn", 0000nnnn00000010, 2, 0) SH4_INSTR(STCGBR, "stc gbr, rn", 0000nnnn00010010, 2, 0) @@ -188,7 +188,7 @@ SH4_INSTR(STSPR, "sts pr, rn", 0000nnnn00101010, 2, 0) SH4_INSTR(STSMMACH, "sts.l mach, @-rn", 0100nnnn00000010, 1, 0) SH4_INSTR(STSMMACL, "sts.l macl, @-rn", 0100nnnn00010010, 1, 0) SH4_INSTR(STSMPR, "sts.l pr, @-rn", 0100nnnn00100010, 2, 0) -SH4_INSTR(TRAPA, "trapa #imm8", 11000011iiiiiiii, 7, OP_FLAG_BRANCH) +SH4_INSTR(TRAPA, "trapa #imm8", 11000011iiiiiiii, 7, SH4_FLAG_BRANCH) // floating-point single and double precision instructions @@ -206,8 +206,8 @@ SH4_INSTR(FSTS, "fsts fpul, frn", 1111nnnn00001101, 1, 0) SH4_INSTR(FABS, "fabs frn", 1111nnnn01011101, 1, 0) SH4_INSTR(FSRRA, "fsrra frn", 1111nnnn01111101, 1, 0) SH4_INSTR(FADD, "fadd frm, frn", 1111nnnnmmmm0000, 1, 0) -SH4_INSTR(FCMPEQ, "fcmp/eq frm, frn", 1111nnnnmmmm0100, 2, OP_FLAG_SET_T) -SH4_INSTR(FCMPGT, "fcmp/gt frm, frn", 1111nnnnmmmm0101, 2, OP_FLAG_SET_T) +SH4_INSTR(FCMPEQ, "fcmp/eq frm, frn", 1111nnnnmmmm0100, 2, SH4_FLAG_SET_T) +SH4_INSTR(FCMPGT, "fcmp/gt frm, frn", 1111nnnnmmmm0101, 2, SH4_FLAG_SET_T) SH4_INSTR(FDIV, "fdiv frm, frn", 1111nnnnmmmm0011, 1, 0) SH4_INSTR(FLOAT, "float fpul, frn", 1111nnnn00101101, 1, 0) SH4_INSTR(FMAC, "fmac fr0, frm, frn", 1111nnnnmmmm1110, 1, 0) @@ -225,9 +225,9 @@ SH4_INSTR(FCNVSD, "fcnvsd fpul, drn", 1111nnnn10101101, 1, 0) // floating-point control instructions -SH4_INSTR(LDSFPSCR, "lds rm, fpscr", 0100mmmm01101010, 1, OP_FLAG_SET_FPSCR) +SH4_INSTR(LDSFPSCR, "lds rm, fpscr", 0100mmmm01101010, 1, SH4_FLAG_SET_FPSCR) SH4_INSTR(LDSFPUL, "lds rm, fpul", 0100mmmm01011010, 1, 0) -SH4_INSTR(LDSMFPSCR, "lds.l @rm+, fpscr", 0100mmmm01100110, 1, OP_FLAG_SET_FPSCR) +SH4_INSTR(LDSMFPSCR, "lds.l @rm+, fpscr", 0100mmmm01100110, 1, SH4_FLAG_SET_FPSCR) SH4_INSTR(LDSMFPUL, "lds.l @rm+, fpul", 0100mmmm01010110, 1, 0) SH4_INSTR(STSFPSCR, "sts fpscr, rn", 0000nnnn01101010, 1, 0) SH4_INSTR(STSFPUL, "sts fpul, rn", 0000nnnn01011010, 1, 0) @@ -240,5 +240,5 @@ SH4_INSTR(STSMFPUL, "sts.l fpul, @-rn", 0100nnnn01010010, 1, 0) SH4_INSTR(FIPR, "fipr fvm, fvn", 1111nnmm11101101, 1, 0) SH4_INSTR(FSCA, "fsca fpul, drn", 1111nnn011111101, 1, 0) SH4_INSTR(FTRV, "ftrv xmtrx, fvn", 1111nn0111111101, 1, 0) -SH4_INSTR(FRCHG, "frchg", 1111101111111101, 1, OP_FLAG_SET_FPSCR) -SH4_INSTR(FSCHG, "fschg", 1111001111111101, 1, OP_FLAG_SET_FPSCR) +SH4_INSTR(FRCHG, "frchg", 1111101111111101, 1, SH4_FLAG_SET_FPSCR) +SH4_INSTR(FSCHG, "fschg", 1111001111111101, 1, SH4_FLAG_SET_FPSCR) diff --git a/src/jit/frontend/sh4/sh4_translate.c b/src/jit/frontend/sh4/sh4_translate.c new file mode 100644 index 00000000..9fffec94 --- /dev/null +++ b/src/jit/frontend/sh4/sh4_translate.c @@ -0,0 +1,2229 @@ +#include "core/assert.h" +#include "core/profiler.h" +#include "jit/frontend/sh4/sh4_analyze.h" +#include "jit/frontend/sh4/sh4_context.h" +#include "jit/frontend/sh4/sh4_disasm.h" +#include "jit/frontend/sh4/sh4_frontend.h" +#include "jit/frontend/sh4/sh4_translate.h" +#include "jit/ir/ir.h" + +// +// fsca estimate lookup table +// +static uint32_t s_fsca_table[0x20000] = { +#include "jit/frontend/sh4/sh4_fsca.inc" +}; + +// +// callbacks for translating each sh4 op +// +typedef void (*emit_cb)(ir_t *, int, const sh4_instr_t *, const sh4_instr_t *); + +#define EMITTER(name) \ + void sh4_emit_OP_##name(ir_t *ir, int flags, const sh4_instr_t *i, \ + const sh4_instr_t *delay) + +#define SH4_INSTR(name, desc, instr_code, cycles, flags) static EMITTER(name); +#include "jit/frontend/sh4/sh4_instr.inc" +#undef SH4_INSTR + +static emit_cb emit_callbacks[NUM_SH4_OPS] = { + NULL, // SH4_OP_INVALID +#define SH4_INSTR(name, desc, instr_code, cycles, flags) &sh4_emit_OP_##name, +#include "jit/frontend/sh4/sh4_instr.inc" +#undef SH4_INSTR +}; + +// helper functions for accessing the sh4 context, macros are used to cut +// down on copy and paste +#define load_guest(addr, type) \ + ((flags & SH4_SLOWMEM) ? ir_load_slow(ir, addr, type) \ + : ir_load_fast(ir, addr, type)) + +#define store_guest(addr, v) \ + do { \ + ((flags & SH4_SLOWMEM) ? ir_store_slow(ir, addr, v) \ + : ir_store_fast(ir, addr, v)); \ + } while (0) + +#define load_gpr(n, type) \ + ir_load_context(ir, offsetof(sh4_context_t, r[n]), type) + +#define store_gpr(n, v) \ + do { \ + CHECK_EQ(v->type, VALUE_I32); \ + ir_store_context(ir, offsetof(sh4_context_t, r[n]), v); \ + } while (0) + +// swizzle 32-bit fp loads, see notes in sh4_context.h +#define swizzle_fpr(n, type) (ir_type_size(type) == 4 ? (n ^ 1) : n) + +#define load_fpr(n, type) \ + ({ \ + int tmp = swizzle_fpr(n, type); \ + ir_load_context(ir, offsetof(sh4_context_t, fr[tmp]), type); \ + }) + +#define store_fpr(n, v) \ + do { \ + int tmp = swizzle_fpr(n, v->type); \ + ir_store_context(ir, offsetof(sh4_context_t, fr[tmp]), v); \ + } while (0) + +#define load_xfr(n, type) \ + ({ \ + int tmp = swizzle_fpr(n, type); \ + ir_load_context(ir, offsetof(sh4_context_t, xf[tmp]), type); \ + }) + +#define store_xfr(n, v) \ + do { \ + int tmp = swizzle_fpr(n, v->type); \ + ir_store_context(ir, offsetof(sh4_context_t, xf[tmp]), v); \ + } while (0) + +#define load_sr() (ir_load_context(ir, offsetof(sh4_context_t, sr), VALUE_I32)) + +#define store_sr(v) \ + do { \ + CHECK_EQ(v->type, VALUE_I32); \ + ir_value_t *sr_updated = \ + ir_load_context(ir, offsetof(sh4_context_t, SRUpdated), VALUE_I64); \ + ir_value_t *old_sr = load_sr(); \ + ir_store_context(ir, offsetof(sh4_context_t, sr), v); \ + ir_call_external_2(ir, sr_updated, ir_zext(ir, old_sr, VALUE_I64)); \ + } while (0) + +#define load_t() ir_and(ir, load_sr(), ir_alloc_i32(ir, T)) + +#define store_t(v) \ + do { \ + ir_value_t *sr = load_sr(); \ + ir_value_t *sr_t = ir_or(ir, sr, ir_alloc_i32(ir, T)); \ + ir_value_t *sr_not = ir_and(ir, sr, ir_alloc_i32(ir, ~T)); \ + store_sr(ir_select(ir, v, sr_t, sr_not)); \ + } while (0) + +#define load_gbr() ir_load_context(ir, offsetof(sh4_context_t, gbr), VALUE_I32) + +#define store_gbr(v) \ + do { \ + ir_store_context(ir, offsetof(sh4_context_t, gbr), v); \ + } while (0) + +#define load_fpscr() \ + ({ \ + ir_value_t *v = \ + ir_load_context(ir, offsetof(sh4_context_t, fpscr), VALUE_I32); \ + ir_and(ir, v, ir_alloc_i32(ir, 0x003fffff)); \ + }) + +#define store_fpscr(v) \ + do { \ + CHECK_EQ(v->type, VALUE_I32); \ + v = ir_and(ir, v, ir_alloc_i32(ir, 0x003fffff)); \ + \ + ir_value_t *fpscr_updated = \ + ir_load_context(ir, offsetof(sh4_context_t, FPSCRUpdated), VALUE_I64); \ + ir_value_t *old_fpscr = load_fpscr(); \ + ir_store_context(ir, offsetof(sh4_context_t, fpscr), v); \ + ir_call_external_2(ir, fpscr_updated, ir_zext(ir, old_fpscr, VALUE_I64)); \ + } while (0) + +#define load_pr() ir_load_context(ir, offsetof(sh4_context_t, pr), VALUE_I32); + +#define store_pr(v) \ + do { \ + CHECK_EQ(v->type, VALUE_I32); \ + ir_store_context(ir, offsetof(sh4_context_t, pr), v); \ + } while (0) + +#define emit_delay_instr() sh4_emit_instr(ir, flags, delay, NULL) + +static void sh4_invalid_instr(ir_t *ir, uint32_t guest_addr) { + ir_value_t *invalid_instruction = ir_load_context( + ir, offsetof(sh4_context_t, InvalidInstruction), VALUE_I64); + ir_call_external_2(ir, invalid_instruction, + ir_alloc_i64(ir, (int64_t)guest_addr)); +} + +static void sh4_emit_instr(ir_t *ir, int flags, const sh4_instr_t *instr, + const sh4_instr_t *delay) { + (emit_callbacks[instr->op])(ir, flags, instr, delay); +} + +// MOV #imm,Rn +EMITTER(MOVI) { + ir_value_t *v = ir_alloc_i32(ir, (int32_t)(int8_t)i->imm); + store_gpr(i->Rn, v); +} + +// MOV.W @(disp,PC),Rn +EMITTER(MOVWLPC) { + uint32_t addr = (i->disp * 2) + i->addr + 4; + ir_value_t *v = load_guest(ir_alloc_i32(ir, addr), VALUE_I16); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.L @(disp,PC),Rn +EMITTER(MOVLLPC) { + uint32_t addr = (i->disp * 4) + (i->addr & ~3) + 4; + ir_value_t *v = load_guest(ir_alloc_i32(ir, addr), VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV Rm,Rn +EMITTER(MOV) { + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.B Rm,@Rn +EMITTER(MOVBS) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = load_gpr(i->Rm, VALUE_I8); + store_guest(addr, v); +} + +// MOV.W Rm,@Rn +EMITTER(MOVWS) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = load_gpr(i->Rm, VALUE_I16); + store_guest(addr, v); +} + +// MOV.L Rm,@Rn +EMITTER(MOVLS) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + store_guest(addr, v); +} + +// MOV.B @Rm,Rn +EMITTER(MOVBL) { + ir_value_t *v = load_guest(load_gpr(i->Rm, VALUE_I32), VALUE_I8); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.W @Rm,Rn +EMITTER(MOVWL) { + ir_value_t *v = load_guest(load_gpr(i->Rm, VALUE_I32), VALUE_I16); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.L @Rm,Rn +EMITTER(MOVLL) { + ir_value_t *v = load_guest(load_gpr(i->Rm, VALUE_I32), VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.B Rm,@-Rn +EMITTER(MOVBM) { + // decrease Rn by 1 + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_sub(ir, addr, ir_alloc_i32(ir, 1)); + store_gpr(i->Rn, addr); + + // store Rm at (Rn) + ir_value_t *v = load_gpr(i->Rm, VALUE_I8); + store_guest(addr, v); +} + +// MOV.W Rm,@-Rn +EMITTER(MOVWM) { + // decrease Rn by 2 + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_sub(ir, addr, ir_alloc_i32(ir, 2)); + store_gpr(i->Rn, addr); + + // store Rm at (Rn) + ir_value_t *v = load_gpr(i->Rm, VALUE_I16); + store_guest(addr, v); +} + +// MOV.L Rm,@-Rn +EMITTER(MOVLM) { + // decrease Rn by 4 + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_sub(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + + // store Rm at (Rn) + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + store_guest(addr, v); +} + +// MOV.B @Rm+,Rn +EMITTER(MOVBP) { + // store (Rm) at Rn + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I8); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(i->Rn, v); + + // increase Rm by 1 + // FIXME if rm != rn??? + addr = ir_add(ir, addr, ir_alloc_i32(ir, 1)); + store_gpr(i->Rm, addr); +} + +// MOV.W @Rm+,Rn +EMITTER(MOVWP) { + // store (Rm) at Rn + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I16); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(i->Rn, v); + + // increase Rm by 2 + // FIXME if rm != rn??? + addr = ir_add(ir, addr, ir_alloc_i32(ir, 2)); + store_gpr(i->Rm, addr); +} + +// MOV.L @Rm+,Rn +EMITTER(MOVLP) { + // store (Rm) at Rn + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_gpr(i->Rn, v); + + // increase Rm by 2 + // FIXME if rm != rn??? + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// MOV.B R0,@(disp,Rn) +EMITTER(MOVBS0D) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp)); + ir_value_t *v = load_gpr(0, VALUE_I8); + store_guest(addr, v); +} + +// MOV.W R0,@(disp,Rn) +EMITTER(MOVWS0D) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 2)); + ir_value_t *v = load_gpr(0, VALUE_I16); + store_guest(addr, v); +} + +// MOV.L Rm,@(disp,Rn) +EMITTER(MOVLSMD) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 4)); + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + store_guest(addr, v); +} + +// MOV.B @(disp,Rm),R0 +EMITTER(MOVBLD0) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp)); + ir_value_t *v = load_guest(addr, VALUE_I8); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(0, v); +} + +// MOV.W @(disp,Rm),R0 +EMITTER(MOVWLD0) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 2)); + ir_value_t *v = load_guest(addr, VALUE_I16); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(0, v); +} + +// MOV.L @(disp,Rm),Rn +EMITTER(MOVLLDN) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 4)); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.B Rm,@(R0,Rn) +EMITTER(MOVBS0) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gpr(i->Rn, VALUE_I32)); + ir_value_t *v = load_gpr(i->Rm, VALUE_I8); + store_guest(addr, v); +} + +// MOV.W Rm,@(R0,Rn) +EMITTER(MOVWS0) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gpr(i->Rn, VALUE_I32)); + ir_value_t *v = load_gpr(i->Rm, VALUE_I16); + store_guest(addr, v); +} + +// MOV.L Rm,@(R0,Rn) +EMITTER(MOVLS0) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gpr(i->Rn, VALUE_I32)); + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + store_guest(addr, v); +} + +// MOV.B @(R0,Rm),Rn +EMITTER(MOVBL0) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gpr(i->Rm, VALUE_I32)); + ir_value_t *v = ir_sext(ir, load_guest(addr, VALUE_I8), VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.W @(R0,Rm),Rn +EMITTER(MOVWL0) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gpr(i->Rm, VALUE_I32)); + ir_value_t *v = load_guest(addr, VALUE_I16); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.L @(R0,Rm),Rn +EMITTER(MOVLL0) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gpr(i->Rm, VALUE_I32)); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MOV.B R0,@(disp,GBR) +EMITTER(MOVBS0G) { + ir_value_t *addr = load_gbr(); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp)); + ir_value_t *v = load_gpr(0, VALUE_I8); + store_guest(addr, v); +} + +// MOV.W R0,@(disp,GBR) +EMITTER(MOVWS0G) { + ir_value_t *addr = load_gbr(); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 2)); + ir_value_t *v = load_gpr(0, VALUE_I16); + store_guest(addr, v); +} + +// MOV.L R0,@(disp,GBR) +EMITTER(MOVLS0G) { + ir_value_t *addr = load_gbr(); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 4)); + ir_value_t *v = load_gpr(0, VALUE_I32); + store_guest(addr, v); +} + +// MOV.B @(disp,GBR),R0 +EMITTER(MOVBLG0) { + ir_value_t *addr = load_gbr(); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp)); + ir_value_t *v = load_guest(addr, VALUE_I8); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(0, v); +} + +// MOV.W @(disp,GBR),R0 +EMITTER(MOVWLG0) { + ir_value_t *addr = load_gbr(); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 2)); + ir_value_t *v = load_guest(addr, VALUE_I16); + v = ir_sext(ir, v, VALUE_I32); + store_gpr(0, v); +} + +// MOV.L @(disp,GBR),R0 +EMITTER(MOVLLG0) { + ir_value_t *addr = load_gbr(); + addr = ir_add(ir, addr, ir_alloc_i32(ir, i->disp * 4)); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_gpr(0, v); +} + +// MOVA (disp,PC),R0 +EMITTER(MOVA) { + uint32_t addr = (i->disp * 4) + (i->addr & ~3) + 4; + store_gpr(0, ir_alloc_i32(ir, addr)); +} + +// MOVT Rn +EMITTER(MOVT) { + store_gpr(i->Rn, load_t()); +} + +// SWAP.B Rm,Rn +EMITTER(SWAPB) { + const int nbits = 8; + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + ir_value_t *mask = ir_alloc_i32(ir, (1u << nbits) - 1); + ir_value_t *tmp = ir_and(ir, ir_xor(ir, v, ir_lshri(ir, v, nbits)), mask); + ir_value_t *res = ir_xor(ir, v, ir_or(ir, tmp, ir_shli(ir, tmp, nbits))); + store_gpr(i->Rn, res); +} + +// SWAP.W Rm,Rn +EMITTER(SWAPW) { + const int nbits = 16; + ir_value_t *v = load_gpr(i->Rm, VALUE_I32); + ir_value_t *mask = ir_alloc_i32(ir, (1u << nbits) - 1); + ir_value_t *tmp = ir_and(ir, ir_xor(ir, v, ir_lshri(ir, v, nbits)), mask); + ir_value_t *res = ir_xor(ir, v, ir_or(ir, tmp, ir_shli(ir, tmp, nbits))); + store_gpr(i->Rn, res); +} + +// XTRCT Rm,Rn +EMITTER(XTRCT) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + rn = ir_lshri(ir, ir_and(ir, rn, ir_alloc_i32(ir, 0xffff0000)), 16); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + rm = ir_shli(ir, ir_and(ir, rm, ir_alloc_i32(ir, 0x0000ffff)), 16); + store_gpr(i->Rn, ir_or(ir, rn, rm)); +} + +// code cycles t-bit +// 0011 nnnn mmmm 1100 1 - +// ADD Rm,Rn +EMITTER(ADD) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_add(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// code cycles t-bit +// 0111 nnnn iiii iiii 1 - +// ADD #imm,Rn +EMITTER(ADDI) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *imm = ir_alloc_i32(ir, (int32_t)(int8_t)i->imm); + ir_value_t *v = ir_add(ir, rn, imm); + store_gpr(i->Rn, v); +} + +// code cycles t-bit +// 0011 nnnn mmmm 1110 1 carry +// ADDC Rm,Rn +EMITTER(ADDC) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_add(ir, rn, rm); + v = ir_add(ir, v, load_t()); + store_gpr(i->Rn, v); + + // compute carry flag, taken from Hacker's Delight + ir_value_t *and_rnrm = ir_and(ir, rn, rm); + ir_value_t *or_rnrm = ir_or(ir, rn, rm); + ir_value_t *not_v = ir_not(ir, v); + ir_value_t *carry = ir_and(ir, or_rnrm, not_v); + carry = ir_or(ir, and_rnrm, carry); + store_t(carry); +} + +// code cycles t-bit +// 0011 nnnn mmmm 1111 1 overflow +// ADDV Rm,Rn +EMITTER(ADDV) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_add(ir, rn, rm); + store_gpr(i->Rn, v); + + // compute overflow flag, taken from Hacker's Delight + ir_value_t *xor_vrn = ir_xor(ir, v, rn); + ir_value_t *xor_vrm = ir_xor(ir, v, rm); + ir_value_t *overflow = ir_lshri(ir, ir_and(ir, xor_vrn, xor_vrm), 31); + store_t(overflow); +} + +// code cycles t-bit +// 1000 1000 iiii iiii 1 comparison result +// CMP/EQ #imm,R0 +EMITTER(CMPEQI) { + ir_value_t *imm = ir_alloc_i32(ir, (int32_t)(int8_t)i->imm); + ir_value_t *r0 = load_gpr(0, VALUE_I32); + store_t(ir_cmp_eq(ir, r0, imm)); +} + +// code cycles t-bit +// 0011 nnnn mmmm 0000 1 comparison result +// CMP/EQ Rm,Rn +EMITTER(CMPEQ) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_t(ir_cmp_eq(ir, rn, rm)); +} + +// code cycles t-bit +// 0011 nnnn mmmm 0010 1 comparison result +// CMP/HS Rm,Rn +EMITTER(CMPHS) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_t(ir_cmp_uge(ir, rn, rm)); +} + +// code cycles t-bit +// 0011 nnnn mmmm 0011 1 comparison result +// CMP/GE Rm,Rn +EMITTER(CMPGE) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_t(ir_cmp_sge(ir, rn, rm)); +} + +// code cycles t-bit +// 0011 nnnn mmmm 0110 1 comparison result +// CMP/HI Rm,Rn +EMITTER(CMPHI) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_t(ir_cmp_ugt(ir, rn, rm)); +} + +// code cycles t-bit +// 0011 nnnn mmmm 0111 1 comparison result +// CMP/GT Rm,Rn +EMITTER(CMPGT) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_t(ir_cmp_sgt(ir, rn, rm)); +} + +// code cycles t-bit +// 0100 nnnn 0001 0001 1 comparison result +// CMP/PZ Rn +EMITTER(CMPPZ) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + store_t(ir_cmp_sge(ir, rn, ir_alloc_i32(ir, 0))); +} + +// code cycles t-bit +// 0100 nnnn 0001 0101 1 comparison result +// CMP/PL Rn +EMITTER(CMPPL) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + store_t(ir_cmp_sgt(ir, rn, ir_alloc_i32(ir, 0))); +} + +// code cycles t-bit +// 0010 nnnn mmmm 1100 1 comparison result +// CMP/STR Rm,Rn +EMITTER(CMPSTR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *diff = ir_xor(ir, rn, rm); + + // if any diff is zero, the bytes match + ir_value_t *b4_eq = ir_cmp_eq( + ir, ir_and(ir, diff, ir_alloc_i32(ir, 0xff000000)), ir_alloc_i32(ir, 0)); + ir_value_t *b3_eq = ir_cmp_eq( + ir, ir_and(ir, diff, ir_alloc_i32(ir, 0x00ff0000)), ir_alloc_i32(ir, 0)); + ir_value_t *b2_eq = ir_cmp_eq( + ir, ir_and(ir, diff, ir_alloc_i32(ir, 0x0000ff00)), ir_alloc_i32(ir, 0)); + ir_value_t *b1_eq = ir_cmp_eq( + ir, ir_and(ir, diff, ir_alloc_i32(ir, 0x000000ff)), ir_alloc_i32(ir, 0)); + + store_t(ir_or(ir, ir_or(ir, ir_or(ir, b1_eq, b2_eq), b3_eq), b4_eq)); +} + +// code cycles t-bit +// 0010 nnnn mmmm 0111 1 calculation result +// DIV0S Rm,Rn +EMITTER(DIV0S) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *qm = ir_xor(ir, rn, rm); + + // update Q == M flag + ir_store_context(ir, offsetof(sh4_context_t, sr_qm), ir_not(ir, qm)); + + // msb of Q ^ M -> T + store_t(ir_lshri(ir, qm, 31)); +} + +// code cycles t-bit +// 0000 0000 0001 1001 1 0 +// DIV0U +EMITTER(DIV0U) { // + ir_store_context(ir, offsetof(sh4_context_t, sr_qm), + ir_alloc_i32(ir, 0x80000000)); + + store_sr(ir_and(ir, load_sr(), ir_alloc_i32(ir, ~T))); +} + +// code cycles t-bit +// 0011 nnnn mmmm 0100 1 calculation result +// DIV1 Rm,Rn +EMITTER(DIV1) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + + // if Q == M, r0 = ~Rm and C = 1; else, r0 = Rm and C = 0 + ir_value_t *qm = ir_ashri( + ir, ir_load_context(ir, offsetof(sh4_context_t, sr_qm), VALUE_I32), 31); + ir_value_t *r0 = ir_xor(ir, rm, qm); + ir_value_t *carry = ir_lshri(ir, qm, 31); + + // initialize output bit as (Q == M) ^ Rn + qm = ir_xor(ir, qm, rn); + + // shift Rn left by 1 and add T + rn = ir_shli(ir, rn, 1); + rn = ir_or(ir, rn, load_t()); + + // add or subtract Rm based on r0 and C + ir_value_t *rd = ir_add(ir, rn, r0); + rd = ir_add(ir, rd, carry); + store_gpr(i->Rn, rd); + + // if C is cleared, invert output bit + ir_value_t *and_rnr0 = ir_and(ir, rn, r0); + ir_value_t *or_rnr0 = ir_or(ir, rn, r0); + ir_value_t *not_rd = ir_not(ir, rd); + carry = ir_and(ir, or_rnr0, not_rd); + carry = ir_or(ir, and_rnr0, carry); + carry = ir_lshri(ir, carry, 31); + qm = ir_select(ir, carry, qm, ir_not(ir, qm)); + ir_store_context(ir, offsetof(sh4_context_t, sr_qm), qm); + + // set T to output bit (which happens to be Q == M) + store_t(ir_lshri(ir, qm, 31)); +} + +// DMULS.L Rm,Rn +EMITTER(DMULS) { + ir_value_t *rn = ir_sext(ir, load_gpr(i->Rn, VALUE_I32), VALUE_I64); + ir_value_t *rm = ir_sext(ir, load_gpr(i->Rm, VALUE_I32), VALUE_I64); + + ir_value_t *p = ir_smul(ir, rm, rn); + ir_value_t *low = ir_trunc(ir, p, VALUE_I32); + ir_value_t *high = ir_trunc(ir, ir_lshri(ir, p, 32), VALUE_I32); + + ir_store_context(ir, offsetof(sh4_context_t, macl), low); + ir_store_context(ir, offsetof(sh4_context_t, mach), high); +} + +// DMULU.L Rm,Rn +EMITTER(DMULU) { + ir_value_t *rn = ir_zext(ir, load_gpr(i->Rn, VALUE_I32), VALUE_I64); + ir_value_t *rm = ir_zext(ir, load_gpr(i->Rm, VALUE_I32), VALUE_I64); + + ir_value_t *p = ir_umul(ir, rm, rn); + ir_value_t *low = ir_trunc(ir, p, VALUE_I32); + ir_value_t *high = ir_trunc(ir, ir_lshri(ir, p, 32), VALUE_I32); + + ir_store_context(ir, offsetof(sh4_context_t, macl), low); + ir_store_context(ir, offsetof(sh4_context_t, mach), high); +} + +// DT Rn +EMITTER(DT) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_sub(ir, rn, ir_alloc_i32(ir, 1)); + store_gpr(i->Rn, v); + store_t(ir_cmp_eq(ir, v, ir_alloc_i32(ir, 0))); +} + +// EXTS.B Rm,Rn +EMITTER(EXTSB) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I8); + ir_value_t *v = ir_sext(ir, rm, VALUE_I32); + store_gpr(i->Rn, v); +} + +// EXTS.W Rm,Rn +EMITTER(EXTSW) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I16); + ir_value_t *v = ir_sext(ir, rm, VALUE_I32); + store_gpr(i->Rn, v); +} + +// EXTU.B Rm,Rn +EMITTER(EXTUB) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I8); + ir_value_t *v = ir_zext(ir, rm, VALUE_I32); + store_gpr(i->Rn, v); +} + +// EXTU.W Rm,Rn +EMITTER(EXTUW) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I16); + ir_value_t *v = ir_zext(ir, rm, VALUE_I32); + store_gpr(i->Rn, v); +} + +// MAC.L @Rm+,@Rn+ +EMITTER(MACL) { + LOG_FATAL("MACL not implemented"); +} + +// MAC.W @Rm+,@Rn+ +EMITTER(MACW) { + LOG_FATAL("MACW not implemented"); +} + +// MUL.L Rm,Rn +EMITTER(MULL) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_smul(ir, rn, rm); + ir_store_context(ir, offsetof(sh4_context_t, macl), v); +} + +// MULS Rm,Rn +EMITTER(MULS) { + ir_value_t *rn = ir_sext(ir, load_gpr(i->Rn, VALUE_I16), VALUE_I32); + ir_value_t *rm = ir_sext(ir, load_gpr(i->Rm, VALUE_I16), VALUE_I32); + ir_value_t *v = ir_smul(ir, rn, rm); + ir_store_context(ir, offsetof(sh4_context_t, macl), v); +} + +// MULU Rm,Rn +EMITTER(MULU) { + ir_value_t *rn = ir_zext(ir, load_gpr(i->Rn, VALUE_I16), VALUE_I32); + ir_value_t *rm = ir_zext(ir, load_gpr(i->Rm, VALUE_I16), VALUE_I32); + ir_value_t *v = ir_umul(ir, rn, rm); + ir_store_context(ir, offsetof(sh4_context_t, macl), v); +} + +// NEG Rm,Rn +EMITTER(NEG) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_neg(ir, rm); + store_gpr(i->Rn, v); +} + +// NEGC Rm,Rn +EMITTER(NEGC) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *t = load_t(); + ir_value_t *v = ir_sub(ir, ir_neg(ir, rm), t); + store_gpr(i->Rn, v); + ir_value_t *carry = ir_or(ir, t, rm); + store_t(carry); +} + +// SUB Rm,Rn +EMITTER(SUB) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_sub(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// SUBC Rm,Rn +EMITTER(SUBC) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_sub(ir, rn, rm); + v = ir_sub(ir, v, load_t()); + store_gpr(i->Rn, v); + + // compute carry flag, taken from Hacker's Delight + ir_value_t *l = ir_and(ir, ir_not(ir, rn), rm); + ir_value_t *r = ir_and(ir, ir_or(ir, ir_not(ir, rn), rm), v); + ir_value_t *carry = ir_or(ir, l, r); + store_t(carry); +} + +// SUBV Rm,Rn +EMITTER(SUBV) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_sub(ir, rn, rm); + store_gpr(i->Rn, v); + + // compute overflow flag, taken from Hacker's Delight + ir_value_t *xor_rnrm = ir_xor(ir, rn, rm); + ir_value_t *xor_vrn = ir_xor(ir, v, rn); + ir_value_t *overflow = ir_lshri(ir, ir_and(ir, xor_rnrm, xor_vrn), 31); + store_t(overflow); +} + +// code cycles t-bit +// 0010 nnnn mmmm 1001 1 - +// AND Rm,Rn +EMITTER(AND) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_and(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// code cycles t-bit +// 1100 1001 iiii iiii 1 - +// AND #imm,R0 +EMITTER(ANDI) { + ir_value_t *r0 = load_gpr(0, VALUE_I32); + ir_value_t *imm = ir_alloc_i32(ir, i->imm); + ir_value_t *v = ir_and(ir, r0, imm); + store_gpr(0, v); +} + +// code cycles t-bit +// 1100 1101 iiii iiii 1 - +// AND.B #imm,@(R0,GBR) +EMITTER(ANDB) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gbr()); + ir_value_t *v = load_guest(addr, VALUE_I8); + v = ir_and(ir, v, ir_alloc_i8(ir, (int8_t)i->imm)); + store_guest(addr, v); +} + +// NOT Rm,Rn +EMITTER(NOT) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_not(ir, rm); + store_gpr(i->Rn, v); +} + +// OR Rm,Rn +EMITTER(OR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_or(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// OR #imm,R0 +EMITTER(ORI) { + ir_value_t *r0 = load_gpr(0, VALUE_I32); + ir_value_t *imm = ir_alloc_i32(ir, i->imm); + ir_value_t *v = ir_or(ir, r0, imm); + store_gpr(0, v); +} + +// OR.B #imm,@(R0,GBR) +EMITTER(ORB) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gbr()); + ir_value_t *v = load_guest(addr, VALUE_I8); + v = ir_or(ir, v, ir_alloc_i8(ir, (int8_t)i->imm)); + store_guest(addr, v); +} + +// TAS.B @Rn +EMITTER(TAS) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I8); + store_guest(addr, ir_or(ir, v, ir_alloc_i8(ir, 0x80))); + store_t(ir_cmp_eq(ir, v, ir_alloc_i8(ir, 0))); +} + +// TST Rm,Rn +EMITTER(TST) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_and(ir, rn, rm); + store_t(ir_cmp_eq(ir, v, ir_alloc_i32(ir, 0))); +} + +// TST #imm,R0 +EMITTER(TSTI) { + ir_value_t *r0 = load_gpr(0, VALUE_I32); + ir_value_t *imm = ir_alloc_i32(ir, i->imm); + ir_value_t *v = ir_and(ir, r0, imm); + store_t(ir_cmp_eq(ir, v, ir_alloc_i32(ir, 0))); +} + +// TST.B #imm,@(R0,GBR) +EMITTER(TSTB) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gbr()); + ir_value_t *data = load_guest(addr, VALUE_I8); + ir_value_t *imm = ir_alloc_i8(ir, (int8_t)i->imm); + ir_value_t *v = ir_and(ir, data, imm); + store_t(ir_cmp_eq(ir, v, ir_alloc_i8(ir, 0))); +} + +// XOR Rm,Rn +EMITTER(XOR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_xor(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// XOR #imm,R0 +EMITTER(XORI) { + ir_value_t *r0 = load_gpr(0, VALUE_I32); + ir_value_t *imm = ir_alloc_i32(ir, i->imm); + ir_value_t *v = ir_xor(ir, r0, imm); + store_gpr(0, v); +} + +// XOR.B #imm,@(R0,GBR) +EMITTER(XORB) { + ir_value_t *addr = load_gpr(0, VALUE_I32); + addr = ir_add(ir, addr, load_gbr()); + ir_value_t *data = load_guest(addr, VALUE_I8); + ir_value_t *imm = ir_alloc_i8(ir, (int8_t)i->imm); + ir_value_t *v = ir_xor(ir, data, imm); + store_guest(addr, v); +} + +// ROTL Rn +EMITTER(ROTL) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_msb = ir_and(ir, ir_lshri(ir, rn, 31), ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_or(ir, ir_shli(ir, rn, 1), rn_msb); + store_gpr(i->Rn, v); + store_t(rn_msb); +} + +// ROTR Rn +EMITTER(ROTR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_lsb = ir_and(ir, rn, ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_shli(ir, rn_lsb, 31); + v = ir_or(ir, v, ir_lshri(ir, rn, 1)); + store_gpr(i->Rn, v); + store_t(rn_lsb); +} + +// ROTCL Rn +EMITTER(ROTCL) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_msb = ir_and(ir, ir_lshri(ir, rn, 31), ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_shli(ir, rn, 1); + v = ir_or(ir, v, load_t()); + store_gpr(i->Rn, v); + store_t(rn_msb); +} + +// ROTCR Rn +EMITTER(ROTCR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_lsb = ir_and(ir, rn, ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_shli(ir, load_t(), 31); + v = ir_or(ir, v, ir_lshri(ir, rn, 1)); + store_gpr(i->Rn, v); + store_t(rn_lsb); +} + +// SHAD Rm,Rn +EMITTER(SHAD) { + // when Rm >= 0, Rn << Rm + // when Rm < 0, Rn >> Rm + // when shifting right > 32, Rn = (Rn >= 0 ? 0 : -1) + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_ashd(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// SHAL Rn (same as SHLL) +EMITTER(SHAL) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_msb = ir_and(ir, ir_lshri(ir, rn, 31), ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_shli(ir, rn, 1); + store_gpr(i->Rn, v); + store_t(rn_msb); +} + +// SHAR Rn +EMITTER(SHAR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_lsb = ir_and(ir, rn, ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_ashri(ir, rn, 1); + store_gpr(i->Rn, v); + store_t(rn_lsb); +} + +// SHLD Rm,Rn +EMITTER(SHLD) { + // when Rm >= 0, Rn << Rm + // when Rm < 0, Rn >> Rm + // when shifting right >= 32, Rn = 0 + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = ir_lshd(ir, rn, rm); + store_gpr(i->Rn, v); +} + +// SHLL Rn (same as SHAL) +EMITTER(SHLL) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_msb = ir_and(ir, ir_lshri(ir, rn, 31), ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_shli(ir, rn, 1); + store_gpr(i->Rn, v); + store_t(rn_msb); +} + +// SHLR Rn +EMITTER(SHLR) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *rn_lsb = ir_and(ir, rn, ir_alloc_i32(ir, 0x1)); + ir_value_t *v = ir_lshri(ir, rn, 1); + store_gpr(i->Rn, v); + store_t(rn_lsb); +} + +// SHLL2 Rn +EMITTER(SHLL2) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_shli(ir, rn, 2); + store_gpr(i->Rn, v); +} + +// SHLR2 Rn +EMITTER(SHLR2) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_lshri(ir, rn, 2); + store_gpr(i->Rn, v); +} + +// SHLL8 Rn +EMITTER(SHLL8) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_shli(ir, rn, 8); + store_gpr(i->Rn, v); +} + +// SHLR8 Rn +EMITTER(SHLR8) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_lshri(ir, rn, 8); + store_gpr(i->Rn, v); +} + +// SHLL16 Rn +EMITTER(SHLL16) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_shli(ir, rn, 16); + store_gpr(i->Rn, v); +} + +// SHLR16 Rn +EMITTER(SHLR16) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + ir_value_t *v = ir_lshri(ir, rn, 16); + store_gpr(i->Rn, v); +} + +// code cycles t-bit +// 1000 1011 dddd dddd 3/1 - +// BF disp +EMITTER(BF) { + uint32_t dest_addr = ((int8_t)i->disp * 2) + i->addr + 4; + ir_value_t *cond = load_t(); + ir_branch_cond(ir, cond, ir_alloc_i32(ir, i->addr + 2), + ir_alloc_i32(ir, dest_addr)); +} + +// code cycles t-bit +// 1000 1111 dddd dddd 3/1 - +// BFS disp +EMITTER(BFS) { + ir_value_t *cond = load_t(); + emit_delay_instr(); + uint32_t dest_addr = ((int8_t)i->disp * 2) + i->addr + 4; + ir_branch_cond(ir, cond, ir_alloc_i32(ir, i->addr + 4), + ir_alloc_i32(ir, dest_addr)); +} + +// code cycles t-bit +// 1000 1001 dddd dddd 3/1 - +// BT disp +EMITTER(BT) { + uint32_t dest_addr = ((int8_t)i->disp * 2) + i->addr + 4; + ir_value_t *cond = load_t(); + ir_branch_cond(ir, cond, ir_alloc_i32(ir, dest_addr), + ir_alloc_i32(ir, i->addr + 2)); +} + +// code cycles t-bit +// 1000 1101 dddd dddd 2/1 - +// BTS disp +EMITTER(BTS) { + ir_value_t *cond = load_t(); + emit_delay_instr(); + uint32_t dest_addr = ((int8_t)i->disp * 2) + i->addr + 4; + ir_branch_cond(ir, cond, ir_alloc_i32(ir, dest_addr), + ir_alloc_i32(ir, i->addr + 4)); +} + +// code cycles t-bit +// 1010 dddd dddd dddd 2 - +// BRA disp +EMITTER(BRA) { + emit_delay_instr(); + int32_t disp = ((i->disp & 0xfff) << 20) >> + 20; // 12-bit displacement must be sign extended + uint32_t dest_addr = (disp * 2) + i->addr + 4; + ir_branch(ir, ir_alloc_i32(ir, dest_addr)); +} + +// code cycles t-bit +// 0000 mmmm 0010 0011 2 - +// BRAF Rn +EMITTER(BRAF) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + emit_delay_instr(); + ir_value_t *dest_addr = ir_add(ir, ir_alloc_i32(ir, i->addr + 4), rn); + ir_branch(ir, dest_addr); +} + +// code cycles t-bit +// 1011 dddd dddd dddd 2 - +// BSR disp +EMITTER(BSR) { + emit_delay_instr(); + int32_t disp = ((i->disp & 0xfff) << 20) >> + 20; // 12-bit displacement must be sign extended + uint32_t ret_addr = i->addr + 4; + uint32_t dest_addr = ret_addr + disp * 2; + store_pr(ir_alloc_i32(ir, ret_addr)); + ir_branch(ir, ir_alloc_i32(ir, dest_addr)); +} + +// code cycles t-bit +// 0000 mmmm 0000 0011 2 - +// BSRF Rn +EMITTER(BSRF) { + ir_value_t *rn = load_gpr(i->Rn, VALUE_I32); + emit_delay_instr(); + ir_value_t *ret_addr = ir_alloc_i32(ir, i->addr + 4); + ir_value_t *dest_addr = ir_add(ir, rn, ret_addr); + store_pr(ret_addr); + ir_branch(ir, dest_addr); +} + +// JMP @Rm +EMITTER(JMP) { + ir_value_t *dest_addr = load_gpr(i->Rn, VALUE_I32); + emit_delay_instr(); + ir_branch(ir, dest_addr); +} + +// JSR @Rn +EMITTER(JSR) { + ir_value_t *dest_addr = load_gpr(i->Rn, VALUE_I32); + emit_delay_instr(); + ir_value_t *ret_addr = ir_alloc_i32(ir, i->addr + 4); + store_pr(ret_addr); + ir_branch(ir, dest_addr); +} + +// RTS +EMITTER(RTS) { + ir_value_t *dest_addr = load_pr(); + emit_delay_instr(); + ir_branch(ir, dest_addr); +} + +// code cycles t-bit +// 0000 0000 0010 1000 1 - +// CLRMAC +EMITTER(CLRMAC) { + ir_store_context(ir, offsetof(sh4_context_t, mach), ir_alloc_i32(ir, 0)); + ir_store_context(ir, offsetof(sh4_context_t, macl), ir_alloc_i32(ir, 0)); +} + +EMITTER(CLRS) { + ir_value_t *sr = load_sr(); + sr = ir_and(ir, sr, ir_alloc_i32(ir, ~S)); + store_sr(sr); +} + +// code cycles t-bit +// 0000 0000 0000 1000 1 - +// CLRT +EMITTER(CLRT) { + store_t(ir_alloc_i32(ir, 0)); +} + +// LDC Rm,SR +EMITTER(LDCSR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_sr(rm); +} + +// LDC Rm,GBR +EMITTER(LDCGBR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_gbr(rm); +} + +// LDC Rm,VBR +EMITTER(LDCVBR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, vbr), rm); +} + +// LDC Rm,SSR +EMITTER(LDCSSR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, ssr), rm); +} + +// LDC Rm,SPC +EMITTER(LDCSPC) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, spc), rm); +} + +// LDC Rm,DBR +EMITTER(LDCDBR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, dbr), rm); +} + +// LDC.L Rm,Rn_BANK +EMITTER(LDCRBANK) { + int reg = i->Rn & 0x7; + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, ralt) + reg * 4, rm); +} + +// LDC.L @Rm+,SR +EMITTER(LDCMSR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_sr(v); + // reload Rm, sr store could have swapped banks + addr = load_gpr(i->Rm, VALUE_I32); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDC.L @Rm+,GBR +EMITTER(LDCMGBR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_gbr(v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDC.L @Rm+,VBR +EMITTER(LDCMVBR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, vbr), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDC.L @Rm+,SSR +EMITTER(LDCMSSR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, ssr), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDC.L @Rm+,SPC +EMITTER(LDCMSPC) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, spc), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDC.L @Rm+,DBR +EMITTER(LDCMDBR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, dbr), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDC.L @Rm+,Rn_BANK +EMITTER(LDCMRBANK) { + int reg = i->Rn & 0x7; + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + store_gpr(i->Rm, ir_add(ir, addr, ir_alloc_i32(ir, 4))); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, ralt) + reg * 4, v); +} + +// LDS Rm,MACH +EMITTER(LDSMACH) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, mach), rm); +} + +// LDS Rm,MACL +EMITTER(LDSMACL) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, macl), rm); +} + +// LDS Rm,PR +EMITTER(LDSPR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_pr(rm); +} + +// LDS.L @Rm+,MACH +EMITTER(LDSMMACH) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, mach), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDS.L @Rm+,MACL +EMITTER(LDSMMACL) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, macl), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDS.L @Rm+,PR +EMITTER(LDSMPR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_pr(v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// MOVCA.L R0,@Rn +EMITTER(MOVCAL) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + ir_value_t *r0 = load_gpr(0, VALUE_I32); + store_guest(addr, r0); +} + +// NOP +EMITTER(NOP) {} + +// OCBI +EMITTER(OCBI) {} + +// OCBP +EMITTER(OCBP) {} + +// OCBWB +EMITTER(OCBWB) {} + +// PREF @Rn +EMITTER(PREF) { + ir_value_t *prefetch = + ir_load_context(ir, offsetof(sh4_context_t, Prefetch), VALUE_I64); + ir_value_t *addr = ir_zext(ir, load_gpr(i->Rn, VALUE_I32), VALUE_I64); + ir_call_external_2(ir, prefetch, addr); +} + +// RTE +EMITTER(RTE) { + ir_value_t *spc = + ir_load_context(ir, offsetof(sh4_context_t, spc), VALUE_I32); + ir_value_t *ssr = + ir_load_context(ir, offsetof(sh4_context_t, ssr), VALUE_I32); + store_sr(ssr); + emit_delay_instr(); + ir_branch(ir, spc); +} + +// SETS +EMITTER(SETS) { + store_sr(ir_or(ir, load_sr(), ir_alloc_i32(ir, S))); +} + +// SETT +EMITTER(SETT) { + store_t(ir_alloc_i32(ir, 1)); +} + +// SLEEP +EMITTER(SLEEP) { + LOG_FATAL("SLEEP not implemented"); +} + +// STC SR,Rn +EMITTER(STCSR) { + ir_value_t *v = load_sr(); + store_gpr(i->Rn, v); +} + +// STC GBR,Rn +EMITTER(STCGBR) { + ir_value_t *v = load_gbr(); + store_gpr(i->Rn, v); +} + +// STC VBR,Rn +EMITTER(STCVBR) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, vbr), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STC SSR,Rn +EMITTER(STCSSR) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, ssr), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STC SPC,Rn +EMITTER(STCSPC) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, spc), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STC SGR,Rn +EMITTER(STCSGR) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, sgr), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STC DBR,Rn +EMITTER(STCDBR) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, dbr), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STC Rm_BANK,Rn +EMITTER(STCRBANK) { + int reg = i->Rm & 0x7; + ir_value_t *v = + ir_load_context(ir, offsetof(sh4_context_t, ralt) + reg * 4, VALUE_I32); + store_gpr(i->Rn, v); +} + +// STC.L SR,@-Rn +EMITTER(STCMSR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = load_sr(); + store_guest(addr, v); +} + +// STC.L GBR,@-Rn +EMITTER(STCMGBR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = load_gbr(); + store_guest(addr, v); +} + +// STC.L VBR,@-Rn +EMITTER(STCMVBR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, vbr), VALUE_I32); + store_guest(addr, v); +} + +// STC.L SSR,@-Rn +EMITTER(STCMSSR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, ssr), VALUE_I32); + store_guest(addr, v); +} + +// STC.L SPC,@-Rn +EMITTER(STCMSPC) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, spc), VALUE_I32); + store_guest(addr, v); +} + +// STC.L SGR,@-Rn +EMITTER(STCMSGR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, sgr), VALUE_I32); + store_guest(addr, v); +} + +// STC.L DBR,@-Rn +EMITTER(STCMDBR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, dbr), VALUE_I32); + store_guest(addr, v); +} + +// STC.L Rm_BANK,@-Rn +EMITTER(STCMRBANK) { + int reg = i->Rm & 0x7; + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *v = + ir_load_context(ir, offsetof(sh4_context_t, ralt) + reg * 4, VALUE_I32); + store_guest(addr, v); +} + +// STS MACH,Rn +EMITTER(STSMACH) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, mach), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STS MACL,Rn +EMITTER(STSMACL) { + ir_value_t *v = ir_load_context(ir, offsetof(sh4_context_t, macl), VALUE_I32); + store_gpr(i->Rn, v); +} + +// STS PR,Rn +EMITTER(STSPR) { + ir_value_t *v = load_pr(); + store_gpr(i->Rn, v); +} + +// STS.L MACH,@-Rn +EMITTER(STSMMACH) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + + ir_value_t *mach = + ir_load_context(ir, offsetof(sh4_context_t, mach), VALUE_I32); + store_guest(addr, mach); +} + +// STS.L MACL,@-Rn +EMITTER(STSMMACL) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + + ir_value_t *macl = + ir_load_context(ir, offsetof(sh4_context_t, macl), VALUE_I32); + store_guest(addr, macl); +} + +// STS.L PR,@-Rn +EMITTER(STSMPR) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + + ir_value_t *pr = load_pr(); + store_guest(addr, pr); +} + +// TRAPA #imm +EMITTER(TRAPA) { + LOG_FATAL("TRAPA not implemented"); +} + +// FLDI0 FRn 1111nnnn10001101 +EMITTER(FLDI0) { + store_fpr(i->Rn, ir_alloc_i32(ir, 0)); +} + +// FLDI1 FRn 1111nnnn10011101 +EMITTER(FLDI1) { + store_fpr(i->Rn, ir_alloc_i32(ir, 0x3F800000)); +} + +// FMOV FRm,FRn 1111nnnnmmmm1100 +// FMOV DRm,DRn 1111nnn0mmm01100 +// FMOV XDm,DRn 1111nnn0mmm11100 +// FMOV DRm,XDn 1111nnn1mmm01100 +// FMOV XDm,XDn 1111nnn1mmm11100 +EMITTER(FMOV) { + if (flags & SH4_DOUBLE_SZ) { + if (i->Rm & 1) { + ir_value_t *rm = load_xfr(i->Rm & 0xe, VALUE_I64); + if (i->Rn & 1) { + store_xfr(i->Rn & 0xe, rm); + } else { + store_fpr(i->Rn, rm); + } + } else { + ir_value_t *rm = load_fpr(i->Rm, VALUE_I64); + if (i->Rn & 1) { + store_xfr(i->Rn & 0xe, rm); + } else { + store_fpr(i->Rn, rm); + } + } + } else { + store_fpr(i->Rn, load_fpr(i->Rm, VALUE_I32)); + } +} + +// FMOV.S @Rm,FRn 1111nnnnmmmm1000 +// FMOV @Rm,DRn 1111nnn0mmmm1000 +// FMOV @Rm,XDn 1111nnn1mmmm1000 +EMITTER(FMOV_LOAD) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + + if (flags & SH4_DOUBLE_SZ) { + ir_value_t *v_low = load_guest(addr, VALUE_I32); + ir_value_t *v_high = + load_guest(ir_add(ir, addr, ir_alloc_i32(ir, 4)), VALUE_I32); + if (i->Rn & 1) { + store_xfr(i->Rn & 0xe, v_low); + store_xfr(i->Rn, v_high); + } else { + store_fpr(i->Rn, v_low); + store_fpr(i->Rn | 0x1, v_high); + } + } else { + store_fpr(i->Rn, load_guest(addr, VALUE_I32)); + } +} + +// FMOV.S @(R0,Rm),FRn 1111nnnnmmmm0110 +// FMOV @(R0,Rm),DRn 1111nnn0mmmm0110 +// FMOV @(R0,Rm),XDn 1111nnn1mmmm0110 +EMITTER(FMOV_INDEX_LOAD) { + ir_value_t *addr = + ir_add(ir, load_gpr(0, VALUE_I32), load_gpr(i->Rm, VALUE_I32)); + + if (flags & SH4_DOUBLE_SZ) { + ir_value_t *v_low = load_guest(addr, VALUE_I32); + ir_value_t *v_high = + load_guest(ir_add(ir, addr, ir_alloc_i32(ir, 4)), VALUE_I32); + if (i->Rn & 1) { + store_xfr(i->Rn & 0xe, v_low); + store_xfr(i->Rn, v_high); + } else { + store_fpr(i->Rn, v_low); + store_fpr(i->Rn | 0x1, v_high); + } + } else { + store_fpr(i->Rn, load_guest(addr, VALUE_I32)); + } +} + +// FMOV.S FRm,@Rn 1111nnnnmmmm1010 +// FMOV DRm,@Rn 1111nnnnmmm01010 +// FMOV XDm,@Rn 1111nnnnmmm11010 +EMITTER(FMOV_STORE) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + + if (flags & SH4_DOUBLE_SZ) { + ir_value_t *addr_low = addr; + ir_value_t *addr_high = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + if (i->Rm & 1) { + store_guest(addr_low, load_xfr(i->Rm & 0xe, VALUE_I32)); + store_guest(addr_high, load_xfr(i->Rm, VALUE_I32)); + } else { + store_guest(addr_low, load_fpr(i->Rm, VALUE_I32)); + store_guest(addr_high, load_fpr(i->Rm | 0x1, VALUE_I32)); + } + } else { + store_guest(addr, load_fpr(i->Rm, VALUE_I32)); + } +} + +// FMOV.S FRm,@(R0,Rn) 1111nnnnmmmm0111 +// FMOV DRm,@(R0,Rn) 1111nnnnmmm00111 +// FMOV XDm,@(R0,Rn) 1111nnnnmmm10111 +EMITTER(FMOV_INDEX_STORE) { + ir_value_t *addr = + ir_add(ir, load_gpr(0, VALUE_I32), load_gpr(i->Rn, VALUE_I32)); + + if (flags & SH4_DOUBLE_SZ) { + ir_value_t *addr_low = addr; + ir_value_t *addr_high = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + if (i->Rm & 1) { + store_guest(addr_low, load_xfr(i->Rm & 0xe, VALUE_I32)); + store_guest(addr_high, load_xfr(i->Rm, VALUE_I32)); + } else { + store_guest(addr_low, load_fpr(i->Rm, VALUE_I32)); + store_guest(addr_high, load_fpr(i->Rm | 0x1, VALUE_I32)); + } + } else { + store_guest(addr, load_fpr(i->Rm, VALUE_I32)); + } +} + +// FMOV.S FRm,@-Rn 1111nnnnmmmm1011 +// FMOV DRm,@-Rn 1111nnnnmmm01011 +// FMOV XDm,@-Rn 1111nnnnmmm11011 +EMITTER(FMOV_SAVE) { + if (flags & SH4_DOUBLE_SZ) { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 8)); + store_gpr(i->Rn, addr); + + ir_value_t *addr_low = addr; + ir_value_t *addr_high = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + + if (i->Rm & 1) { + store_guest(addr_low, load_xfr(i->Rm & 0xe, VALUE_I32)); + store_guest(addr_high, load_xfr(i->Rm, VALUE_I32)); + } else { + store_guest(addr_low, load_fpr(i->Rm, VALUE_I32)); + store_guest(addr_high, load_fpr(i->Rm | 0x1, VALUE_I32)); + } + } else { + ir_value_t *addr = + ir_sub(ir, load_gpr(i->Rn, VALUE_I32), ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + store_guest(addr, load_fpr(i->Rm, VALUE_I32)); + } +} + +// FMOV.S @Rm+,FRn 1111nnnnmmmm1001 +// FMOV @Rm+,DRn 1111nnn0mmmm1001 +// FMOV @Rm+,XDn 1111nnn1mmmm1001 +EMITTER(FMOV_RESTORE) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + + if (flags & SH4_DOUBLE_SZ) { + ir_value_t *v_low = load_guest(addr, VALUE_I32); + ir_value_t *v_high = + load_guest(ir_add(ir, addr, ir_alloc_i32(ir, 4)), VALUE_I32); + if (i->Rn & 1) { + store_xfr(i->Rn & 0xe, v_low); + store_xfr(i->Rn, v_high); + } else { + store_fpr(i->Rn, v_low); + store_fpr(i->Rn | 0x1, v_high); + } + store_gpr(i->Rm, ir_add(ir, addr, ir_alloc_i32(ir, 8))); + } else { + store_fpr(i->Rn, load_guest(addr, VALUE_I32)); + store_gpr(i->Rm, ir_add(ir, addr, ir_alloc_i32(ir, 4))); + } +} + +// FLDS FRm,FPUL 1111mmmm00011101 +EMITTER(FLDS) { + ir_value_t *rn = load_fpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, fpul), rn); +} + +// FSTS FPUL,FRn 1111nnnn00001101 +EMITTER(FSTS) { + ir_value_t *fpul = + ir_load_context(ir, offsetof(sh4_context_t, fpul), VALUE_I32); + store_fpr(i->Rn, fpul); +} + +// FABS FRn PR=0 1111nnnn01011101 +// FABS DRn PR=1 1111nnn001011101 +EMITTER(FABS) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + ir_value_t *v = ir_fabs(ir, load_fpr(n, VALUE_F64)); + store_fpr(n, v); + } else { + ir_value_t *v = ir_fabs(ir, load_fpr(i->Rn, VALUE_F32)); + store_fpr(i->Rn, v); + } +} + +// FSRRA FRn PR=0 1111nnnn01111101 +EMITTER(FSRRA) { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *v = ir_fdiv(ir, ir_alloc_f32(ir, 1.0f), ir_sqrt(ir, frn)); + store_fpr(i->Rn, v); +} + +// FADD FRm,FRn PR=0 1111nnnnmmmm0000 +// FADD DRm,DRn PR=1 1111nnn0mmm00000 +EMITTER(FADD) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + int m = i->Rm & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *v = ir_fadd(ir, drn, drm); + store_fpr(n, v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *v = ir_fadd(ir, frn, frm); + store_fpr(i->Rn, v); + } +} + +// FCMP/EQ FRm,FRn PR=0 1111nnnnmmmm0100 +// FCMP/EQ DRm,DRn PR=1 1111nnn0mmm00100 +EMITTER(FCMPEQ) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + int m = i->Rm & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *v = ir_fcmp_eq(ir, drn, drm); + store_t(v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *v = ir_fcmp_eq(ir, frn, frm); + store_t(v); + } +} + +// FCMP/GT FRm,FRn PR=0 1111nnnnmmmm0101 +// FCMP/GT DRm,DRn PR=1 1111nnn0mmm00101 +EMITTER(FCMPGT) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + int m = i->Rm & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *v = ir_fcmp_gt(ir, drn, drm); + store_t(v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *v = ir_fcmp_gt(ir, frn, frm); + store_t(v); + } +} + +// FDIV FRm,FRn PR=0 1111nnnnmmmm0011 +// FDIV DRm,DRn PR=1 1111nnn0mmm00011 +EMITTER(FDIV) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + int m = i->Rm & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *v = ir_fdiv(ir, drn, drm); + store_fpr(n, v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *v = ir_fdiv(ir, frn, frm); + store_fpr(i->Rn, v); + } +} + +// FLOAT FPUL,FRn PR=0 1111nnnn00101101 +// FLOAT FPUL,DRn PR=1 1111nnn000101101 +EMITTER(FLOAT) { + ir_value_t *fpul = + ir_load_context(ir, offsetof(sh4_context_t, fpul), VALUE_I32); + + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + ir_value_t *v = ir_itof(ir, ir_sext(ir, fpul, VALUE_I64), VALUE_F64); + store_fpr(n, v); + } else { + ir_value_t *v = ir_itof(ir, fpul, VALUE_F32); + store_fpr(i->Rn, v); + } +} + +// FMAC FR0,FRm,FRn PR=0 1111nnnnmmmm1110 +EMITTER(FMAC) { + CHECK(!(flags & SH4_DOUBLE_PR)); + + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *fr0 = load_fpr(0, VALUE_F32); + ir_value_t *v = ir_fadd(ir, ir_fmul(ir, fr0, frm), frn); + store_fpr(i->Rn, v); +} + +// FMUL FRm,FRn PR=0 1111nnnnmmmm0010 +// FMUL DRm,DRn PR=1 1111nnn0mmm00010 +EMITTER(FMUL) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + int m = i->Rm & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *v = ir_fmul(ir, drn, drm); + store_fpr(n, v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *v = ir_fmul(ir, frn, frm); + store_fpr(i->Rn, v); + } +} + +// FNEG FRn PR=0 1111nnnn01001101 +// FNEG DRn PR=1 1111nnn001001101 +EMITTER(FNEG) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *v = ir_fneg(ir, drn); + store_fpr(n, v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *v = ir_fneg(ir, frn); + store_fpr(i->Rn, v); + } +} + +// FSQRT FRn PR=0 1111nnnn01101101 +// FSQRT DRn PR=1 1111nnnn01101101 +EMITTER(FSQRT) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *v = ir_sqrt(ir, drn); + store_fpr(n, v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *v = ir_sqrt(ir, frn); + store_fpr(i->Rn, v); + } +} + +// FSUB FRm,FRn PR=0 1111nnnnmmmm0001 +// FSUB DRm,DRn PR=1 1111nnn0mmm00001 +EMITTER(FSUB) { + if (flags & SH4_DOUBLE_PR) { + int n = i->Rn & 0xe; + int m = i->Rm & 0xe; + ir_value_t *drn = load_fpr(n, VALUE_F64); + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *v = ir_fsub(ir, drn, drm); + store_fpr(n, v); + } else { + ir_value_t *frn = load_fpr(i->Rn, VALUE_F32); + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *v = ir_fsub(ir, frn, frm); + store_fpr(i->Rn, v); + } +} + +// FTRC FRm,FPUL PR=0 1111mmmm00111101 +// FTRC DRm,FPUL PR=1 1111mmm000111101 +EMITTER(FTRC) { + if (flags & SH4_DOUBLE_PR) { + int m = i->Rm & 0xe; + ir_value_t *drm = load_fpr(m, VALUE_F64); + ir_value_t *dpv = ir_trunc(ir, ir_ftoi(ir, drm, VALUE_I64), VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, fpul), dpv); + } else { + ir_value_t *frm = load_fpr(i->Rm, VALUE_F32); + ir_value_t *spv = ir_ftoi(ir, frm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, fpul), spv); + } +} + +// FCNVDS DRm,FPUL PR=1 1111mmm010111101 +EMITTER(FCNVDS) { + CHECK(flags & SH4_DOUBLE_PR); + + // TODO rounding modes? + + int m = i->Rm & 0xe; + ir_value_t *dpv = load_fpr(m, VALUE_F64); + ir_value_t *spv = ir_ftrunc(ir, dpv, VALUE_F32); + ir_store_context(ir, offsetof(sh4_context_t, fpul), spv); +} + +// FCNVSD FPUL, DRn PR=1 1111nnn010101101 +EMITTER(FCNVSD) { + CHECK(flags & SH4_DOUBLE_PR); + + // TODO rounding modes? + + ir_value_t *spv = + ir_load_context(ir, offsetof(sh4_context_t, fpul), VALUE_F32); + ir_value_t *dpv = ir_fext(ir, spv, VALUE_F64); + int n = i->Rn & 0xe; + store_fpr(n, dpv); +} + +// LDS Rm,FPSCR +EMITTER(LDSFPSCR) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + store_fpscr(rm); +} + +// LDS Rm,FPUL +EMITTER(LDSFPUL) { + ir_value_t *rm = load_gpr(i->Rm, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, fpul), rm); +} + +// LDS.L @Rm+,FPSCR +EMITTER(LDSMFPSCR) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + store_fpscr(v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// LDS.L @Rm+,FPUL +EMITTER(LDSMFPUL) { + ir_value_t *addr = load_gpr(i->Rm, VALUE_I32); + ir_value_t *v = load_guest(addr, VALUE_I32); + ir_store_context(ir, offsetof(sh4_context_t, fpul), v); + addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rm, addr); +} + +// STS FPSCR,Rn +EMITTER(STSFPSCR) { + ir_value_t *fpscr = load_fpscr(); + store_gpr(i->Rn, fpscr); +} + +// STS FPUL,Rn +EMITTER(STSFPUL) { + ir_value_t *fpul = + ir_load_context(ir, offsetof(sh4_context_t, fpul), VALUE_I32); + store_gpr(i->Rn, fpul); +} + +// STS.L FPSCR,@-Rn +EMITTER(STSMFPSCR) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_sub(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + store_guest(addr, load_fpscr()); +} + +// STS.L FPUL,@-Rn +EMITTER(STSMFPUL) { + ir_value_t *addr = load_gpr(i->Rn, VALUE_I32); + addr = ir_sub(ir, addr, ir_alloc_i32(ir, 4)); + store_gpr(i->Rn, addr); + ir_value_t *fpul = + ir_load_context(ir, offsetof(sh4_context_t, fpul), VALUE_I32); + store_guest(addr, fpul); +} + +// FIPR FVm,FVn PR=0 1111nnmm11101101 +EMITTER(FIPR) { + int m = i->Rm << 2; + int n = i->Rn << 2; + + ir_value_t *fvn = load_fpr(n, VALUE_V128); + ir_value_t *fvm = load_fpr(m, VALUE_V128); + ir_value_t *dp = ir_vdot(ir, fvn, fvm, VALUE_F32); + store_fpr(n + 3, dp); +} + +// FSCA FPUL,DRn PR=0 1111nnn011111101 +EMITTER(FSCA) { + int n = i->Rn << 1; + + ir_value_t *fpul = + ir_load_context(ir, offsetof(sh4_context_t, fpul), VALUE_I16); + fpul = ir_zext(ir, fpul, VALUE_I64); + + ir_value_t *fsca_table = ir_alloc_i64(ir, (int64_t)s_fsca_table); + ir_value_t *fsca_offset = ir_shli(ir, fpul, 3); + ir_value_t *addr = ir_add(ir, fsca_table, fsca_offset); + + store_fpr(n, ir_load_host(ir, addr, VALUE_F32)); + store_fpr(n + 1, + ir_load_host(ir, ir_add(ir, addr, ir_alloc_i64(ir, 4)), VALUE_F32)); +} + +// FTRV XMTRX,FVn PR=0 1111nn0111111101 +EMITTER(FTRV) { + int n = i->Rn << 2; + + ir_value_t *col0 = load_xfr(0, VALUE_V128); + ir_value_t *row0 = ir_vbroadcast(ir, load_fpr(n + 0, VALUE_F32)); + ir_value_t *result = ir_vmul(ir, col0, row0, VALUE_F32); + + ir_value_t *col1 = load_xfr(4, VALUE_V128); + ir_value_t *row1 = ir_vbroadcast(ir, load_fpr(n + 1, VALUE_F32)); + result = ir_vadd(ir, result, ir_vmul(ir, col1, row1, VALUE_F32), VALUE_F32); + + ir_value_t *col2 = load_xfr(8, VALUE_V128); + ir_value_t *row2 = ir_vbroadcast(ir, load_fpr(n + 2, VALUE_F32)); + result = ir_vadd(ir, result, ir_vmul(ir, col2, row2, VALUE_F32), VALUE_F32); + + ir_value_t *col3 = load_xfr(12, VALUE_V128); + ir_value_t *row3 = ir_vbroadcast(ir, load_fpr(n + 3, VALUE_F32)); + result = ir_vadd(ir, result, ir_vmul(ir, col3, row3, VALUE_F32), VALUE_F32); + + store_fpr(n, result); +} + +// FRCHG 1111101111111101 +EMITTER(FRCHG) { + ir_value_t *fpscr = load_fpscr(); + ir_value_t *v = ir_xor(ir, fpscr, ir_alloc_i32(ir, FR)); + store_fpscr(v); +} + +// FSCHG 1111001111111101 +EMITTER(FSCHG) { + ir_value_t *fpscr = load_fpscr(); + ir_value_t *v = ir_xor(ir, fpscr, ir_alloc_i32(ir, SZ)); + store_fpscr(v); +} + +void sh4_translate(uint32_t guest_addr, uint8_t *guest_ptr, int size, int flags, + ir_t *ir) { + // PROFILER_RUNTIME("SH4ir::Emit"); + sh4_instr_t delay_instr; + + int i = 0; + int guest_cycles = 0; + + while (i < size) { + sh4_instr_t instr = {}; + instr.addr = guest_addr + i; + instr.opcode = *(uint16_t *)(guest_ptr + i); + + if (!sh4_disasm(&instr)) { + sh4_invalid_instr(ir, instr.addr); + break; + } + + i += 2; + guest_cycles += instr.cycles; + + if (instr.flags & SH4_FLAG_DELAYED) { + delay_instr.addr = guest_addr + i; + delay_instr.opcode = *(uint16_t *)(guest_ptr + i); + + // instruction must be valid, breakpoints on delay instructions aren't + // currently supported + CHECK(sh4_disasm(&delay_instr)); + + // delay instruction itself should never have a delay instr + CHECK(!(delay_instr.flags & SH4_FLAG_DELAYED)); + + i += 2; + guest_cycles += delay_instr.cycles; + } + + sh4_emit_instr(ir, flags, &instr, &delay_instr); + } + + ir_instr_t *tail_instr = list_last_entry(&ir->instrs, ir_instr_t, it); + + // if the block was terminated before a branch instruction, emit a + // fallthrough branch to the next pc + if (tail_instr->op != OP_BRANCH && tail_instr->op != OP_BRANCH_COND) { + ir_branch(ir, ir_alloc_i32(ir, guest_addr + i)); + } + + // emit block epilog + ir->current_instr = list_prev_entry(tail_instr, it); + + // update remaining cycles + ir_value_t *num_cycles = + ir_load_context(ir, offsetof(sh4_context_t, num_cycles), VALUE_I32); + num_cycles = ir_sub(ir, num_cycles, ir_alloc_i32(ir, guest_cycles)); + ir_store_context(ir, offsetof(sh4_context_t, num_cycles), num_cycles); + + // update num instructions + ir_value_t *num_instrs = + ir_load_context(ir, offsetof(sh4_context_t, num_instrs), VALUE_I32); + num_instrs = ir_add(ir, num_instrs, ir_alloc_i32(ir, size >> 1)); + ir_store_context(ir, offsetof(sh4_context_t, num_instrs), num_instrs); +} diff --git a/src/jit/frontend/sh4/sh4_translate.h b/src/jit/frontend/sh4/sh4_translate.h new file mode 100644 index 00000000..277fdd41 --- /dev/null +++ b/src/jit/frontend/sh4/sh4_translate.h @@ -0,0 +1,17 @@ +#ifndef SH4_BUILDER_H +#define SH4_BUILDER_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct ir_s; + +void sh4_translate(uint32_t guest_addr, uint8_t *guest_ptr, int size, int flags, + struct ir_s *ir); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/jit/ir/ir.c b/src/jit/ir/ir.c new file mode 100644 index 00000000..eb24ac8c --- /dev/null +++ b/src/jit/ir/ir.c @@ -0,0 +1,696 @@ +#include "core/math.h" +#include "jit/ir/ir.h" + +const char *ir_op_names[NUM_OPS] = { +#define IR_OP(name) #name, +#include "jit/ir/ir_ops.inc" +}; + +static void *ir_calloc(ir_t *ir, int size) { + CHECK_LE(ir->used + size, ir->capacity); + uint8_t *ptr = ir->buffer + ir->used; + memset(ptr, 0, size); + ir->used += size; + return ptr; +} + +static ir_instr_t *ir_alloc_instr(ir_t *ir, ir_op_t op) { + ir_instr_t *instr = ir_calloc(ir, sizeof(ir_instr_t)); + + instr->op = op; + + // initialize use links + for (int i = 0; i < MAX_INSTR_ARGS; i++) { + ir_use_t *use = &instr->used[i]; + use->instr = instr; + use->parg = &instr->arg[i]; + } + + return instr; +} + +static void ir_add_use(ir_value_t *v, ir_use_t *use) { + list_add(&v->uses, &use->it); +} + +static void ir_remove_use(ir_value_t *v, ir_use_t *use) { + list_remove(&v->uses, &use->it); +} + +ir_instr_t *ir_append_instr(ir_t *ir, ir_op_t op, ir_type_t result_type) { + ir_instr_t *instr = ir_alloc_instr(ir, op); + + // allocate result if needed + if (result_type != VALUE_V) { + ir_value_t *result = ir_calloc(ir, sizeof(ir_value_t)); + result->type = result_type; + result->def = instr; + result->reg = NO_REGISTER; + instr->result = result; + } + + list_add_after_entry(&ir->instrs, ir->current_instr, it, instr); + + ir->current_instr = instr; + + return instr; +} + +void ir_remove_instr(ir_t *ir, ir_instr_t *instr) { + // remove arguments from the use lists of their values + for (int i = 0; i < MAX_INSTR_ARGS; i++) { + ir_value_t *value = instr->arg[i]; + + if (value) { + ir_remove_use(value, &instr->used[i]); + } + } + + list_remove(&ir->instrs, &instr->it); +} + +ir_value_t *ir_alloc_i8(ir_t *ir, int8_t c) { + ir_value_t *v = ir_calloc(ir, sizeof(ir_value_t)); + v->type = VALUE_I8; + v->i8 = c; + v->reg = NO_REGISTER; + return v; +} + +ir_value_t *ir_alloc_i16(ir_t *ir, int16_t c) { + ir_value_t *v = ir_calloc(ir, sizeof(ir_value_t)); + v->type = VALUE_I16; + v->i16 = c; + v->reg = NO_REGISTER; + return v; +} + +ir_value_t *ir_alloc_i32(ir_t *ir, int32_t c) { + ir_value_t *v = ir_calloc(ir, sizeof(ir_value_t)); + v->type = VALUE_I32; + v->i32 = c; + v->reg = NO_REGISTER; + return v; +} + +ir_value_t *ir_alloc_i64(ir_t *ir, int64_t c) { + ir_value_t *v = ir_calloc(ir, sizeof(ir_value_t)); + v->type = VALUE_I64; + v->i64 = c; + v->reg = NO_REGISTER; + return v; +} + +ir_value_t *ir_alloc_f32(ir_t *ir, float c) { + ir_value_t *v = ir_calloc(ir, sizeof(ir_value_t)); + v->type = VALUE_F32; + v->f32 = c; + v->reg = NO_REGISTER; + return v; +} + +ir_value_t *ir_alloc_f64(ir_t *ir, double c) { + ir_value_t *v = ir_calloc(ir, sizeof(ir_value_t)); + v->type = VALUE_F64; + v->f64 = c; + v->reg = NO_REGISTER; + return v; +} + +ir_local_t *ir_alloc_local(ir_t *ir, ir_type_t type) { + // align local to natural size + int type_size = ir_type_size(type); + ir->locals_size = align_up(ir->locals_size, type_size); + + ir_local_t *l = ir_calloc(ir, sizeof(ir_local_t)); + l->type = type; + l->offset = ir_alloc_i32(ir, ir->locals_size); + list_add(&ir->locals, &l->it); + + ir->locals_size += type_size; + + return l; +} + +void ir_set_arg(ir_t *ir, ir_instr_t *instr, int n, ir_value_t *v) { + ir_replace_use(&instr->used[n], v); +} + +void ir_set_arg0(ir_t *ir, ir_instr_t *instr, ir_value_t *v) { + ir_set_arg(ir, instr, 0, v); +} + +void ir_set_arg1(ir_t *ir, ir_instr_t *instr, ir_value_t *v) { + ir_set_arg(ir, instr, 1, v); +} + +void ir_set_arg2(ir_t *ir, ir_instr_t *instr, ir_value_t *v) { + ir_set_arg(ir, instr, 2, v); +} + +void ir_replace_use(ir_use_t *use, ir_value_t *other) { + if (*use->parg) { + ir_remove_use(*use->parg, use); + } + + *use->parg = other; + + if (*use->parg) { + ir_add_use(*use->parg, use); + } +} + +// replace all uses of v with other +void ir_replace_uses(ir_value_t *v, ir_value_t *other) { + CHECK_NE(v, other); + + list_for_each_entry_safe(use, &v->uses, ir_use_t, it) { + ir_replace_use(use, other); + } +} + +bool ir_is_constant(const ir_value_t *v) { + return !v->def; +} + +uint64_t ir_zext_constant(const ir_value_t *v) { + switch (v->type) { + case VALUE_I8: + return (uint8_t)v->i8; + case VALUE_I16: + return (uint16_t)v->i16; + case VALUE_I32: + return (uint32_t)v->i32; + case VALUE_I64: + return (uint64_t)v->i64; + default: + LOG_FATAL("Unexpected value type"); + break; + } +} + +ir_value_t *ir_load_host(ir_t *ir, ir_value_t *addr, ir_type_t type) { + CHECK_EQ(VALUE_I64, addr->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_LOAD_HOST, type); + ir_set_arg0(ir, instr, addr); + return instr->result; +} + +void ir_store_host(ir_t *ir, ir_value_t *addr, ir_value_t *v) { + CHECK_EQ(VALUE_I64, addr->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_STORE_HOST, VALUE_V); + ir_set_arg0(ir, instr, addr); + ir_set_arg1(ir, instr, v); +} + +ir_value_t *ir_load_fast(ir_t *ir, ir_value_t *addr, ir_type_t type) { + CHECK_EQ(VALUE_I32, addr->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_LOAD_FAST, type); + ir_set_arg0(ir, instr, addr); + return instr->result; +} + +void ir_store_fast(ir_t *ir, ir_value_t *addr, ir_value_t *v) { + CHECK_EQ(VALUE_I32, addr->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_STORE_FAST, VALUE_V); + ir_set_arg0(ir, instr, addr); + ir_set_arg1(ir, instr, v); +} + +ir_value_t *ir_load_slow(ir_t *ir, ir_value_t *addr, ir_type_t type) { + CHECK_EQ(VALUE_I32, addr->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_LOAD_SLOW, type); + ir_set_arg0(ir, instr, addr); + return instr->result; +} + +void ir_store_slow(ir_t *ir, ir_value_t *addr, ir_value_t *v) { + CHECK_EQ(VALUE_I32, addr->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_STORE_SLOW, VALUE_V); + ir_set_arg0(ir, instr, addr); + ir_set_arg1(ir, instr, v); +} + +ir_value_t *ir_load_context(ir_t *ir, size_t offset, ir_type_t type) { + ir_instr_t *instr = ir_append_instr(ir, OP_LOAD_CONTEXT, type); + ir_set_arg0(ir, instr, ir_alloc_i32(ir, offset)); + return instr->result; +} + +void ir_store_context(ir_t *ir, size_t offset, ir_value_t *v) { + ir_instr_t *instr = ir_append_instr(ir, OP_STORE_CONTEXT, VALUE_V); + ir_set_arg0(ir, instr, ir_alloc_i32(ir, offset)); + ir_set_arg1(ir, instr, v); +} + +ir_value_t *ir_load_local(ir_t *ir, ir_local_t *local) { + ir_instr_t *instr = ir_append_instr(ir, OP_LOAD_LOCAL, local->type); + ir_set_arg0(ir, instr, local->offset); + return instr->result; +} + +void ir_store_local(ir_t *ir, ir_local_t *local, ir_value_t *v) { + ir_instr_t *instr = ir_append_instr(ir, OP_STORE_LOCAL, VALUE_V); + ir_set_arg0(ir, instr, local->offset); + ir_set_arg1(ir, instr, v); +} + +ir_value_t *ir_ftoi(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(ir_is_float(v->type) && is_is_int(dest_type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_FTOI, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_itof(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(is_is_int(v->type) && ir_is_float(dest_type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_ITOF, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_sext(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(is_is_int(v->type) && is_is_int(dest_type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_SEXT, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_zext(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(is_is_int(v->type) && is_is_int(dest_type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_ZEXT, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_trunc(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(is_is_int(v->type) && is_is_int(dest_type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_TRUNC, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_fext(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(v->type == VALUE_F32 && dest_type == VALUE_F64); + + ir_instr_t *instr = ir_append_instr(ir, OP_FEXT, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_ftrunc(ir_t *ir, ir_value_t *v, ir_type_t dest_type) { + CHECK(v->type == VALUE_F64 && dest_type == VALUE_F32); + + ir_instr_t *instr = ir_append_instr(ir, OP_FTRUNC, dest_type); + ir_set_arg0(ir, instr, v); + return instr->result; +} + +ir_value_t *ir_select(ir_t *ir, ir_value_t *cond, ir_value_t *t, + ir_value_t *f) { + CHECK(is_is_int(cond->type) && is_is_int(t->type) && t->type == f->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_SELECT, t->type); + ir_set_arg0(ir, instr, t); + ir_set_arg1(ir, instr, f); + ir_set_arg2(ir, instr, cond); + return instr->result; +} + +static ir_value_t *ir_cmp(ir_t *ir, ir_value_t *a, ir_value_t *b, + ir_cmp_t type) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_CMP, VALUE_I8); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + ir_set_arg2(ir, instr, ir_alloc_i32(ir, type)); + return instr->result; +} + +ir_value_t *ir_cmp_eq(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_EQ); +} + +ir_value_t *ir_cmp_ne(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_NE); +} + +ir_value_t *ir_cmp_sge(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_SGE); +} + +ir_value_t *ir_cmp_sgt(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_SGT); +} + +ir_value_t *ir_cmp_uge(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_UGE); +} + +ir_value_t *ir_cmp_ugt(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_UGT); +} + +ir_value_t *ir_cmp_sle(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_SLE); +} + +ir_value_t *ir_cmp_slt(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_SLT); +} + +ir_value_t *ir_cmp_ule(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_ULE); +} + +ir_value_t *ir_cmp_ult(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_cmp(ir, a, b, CMP_ULT); +} + +static ir_value_t *ir_fcmp(ir_t *ir, ir_value_t *a, ir_value_t *b, + ir_cmp_t type) { + CHECK(ir_is_float(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_FCMP, VALUE_I8); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + ir_set_arg2(ir, instr, ir_alloc_i32(ir, type)); + return instr->result; +} + +ir_value_t *ir_fcmp_eq(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_fcmp(ir, a, b, CMP_EQ); +} + +ir_value_t *ir_fcmp_ne(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_fcmp(ir, a, b, CMP_NE); +} + +ir_value_t *ir_fcmp_ge(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_fcmp(ir, a, b, CMP_SGE); +} + +ir_value_t *ir_fcmp_gt(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_fcmp(ir, a, b, CMP_SGT); +} + +ir_value_t *ir_fcmp_le(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_fcmp(ir, a, b, CMP_SLE); +} + +ir_value_t *ir_fcmp_lt(ir_t *ir, ir_value_t *a, ir_value_t *b) { + return ir_fcmp(ir, a, b, CMP_SLT); +} + +ir_value_t *ir_add(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_ADD, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_sub(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_SUB, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_smul(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_SMUL, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_umul(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + CHECK(is_is_int(a->type)); + ir_instr_t *instr = ir_append_instr(ir, OP_UMUL, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_div(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_DIV, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_neg(ir_t *ir, ir_value_t *a) { + CHECK(is_is_int(a->type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_NEG, a->type); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_abs(ir_t *ir, ir_value_t *a) { + CHECK(is_is_int(a->type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_ABS, a->type); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_fadd(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(ir_is_float(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_FADD, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_fsub(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(ir_is_float(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_FSUB, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_fmul(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(ir_is_float(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_FMUL, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_fdiv(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(ir_is_float(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_FDIV, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_fneg(ir_t *ir, ir_value_t *a) { + CHECK(ir_is_float(a->type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_FNEG, a->type); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_fabs(ir_t *ir, ir_value_t *a) { + CHECK(ir_is_float(a->type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_FABS, a->type); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_sqrt(ir_t *ir, ir_value_t *a) { + CHECK(ir_is_float(a->type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_SQRT, a->type); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_vbroadcast(ir_t *ir, ir_value_t *a) { + CHECK(a->type == VALUE_F32); + + ir_instr_t *instr = ir_append_instr(ir, OP_VBROADCAST, VALUE_V128); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_vadd(ir_t *ir, ir_value_t *a, ir_value_t *b, ir_type_t el_type) { + CHECK(ir_is_vector(a->type) && ir_is_vector(b->type)); + CHECK_EQ(el_type, VALUE_F32); + + ir_instr_t *instr = ir_append_instr(ir, OP_VADD, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_vdot(ir_t *ir, ir_value_t *a, ir_value_t *b, ir_type_t el_type) { + CHECK(ir_is_vector(a->type) && ir_is_vector(b->type)); + CHECK_EQ(el_type, VALUE_F32); + + ir_instr_t *instr = ir_append_instr(ir, OP_VDOT, el_type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_vmul(ir_t *ir, ir_value_t *a, ir_value_t *b, ir_type_t el_type) { + CHECK(ir_is_vector(a->type) && ir_is_vector(b->type)); + CHECK_EQ(el_type, VALUE_F32); + + ir_instr_t *instr = ir_append_instr(ir, OP_VMUL, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_and(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_AND, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_or(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_OR, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_xor(ir_t *ir, ir_value_t *a, ir_value_t *b) { + CHECK(is_is_int(a->type) && a->type == b->type); + + ir_instr_t *instr = ir_append_instr(ir, OP_XOR, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, b); + return instr->result; +} + +ir_value_t *ir_not(ir_t *ir, ir_value_t *a) { + CHECK(is_is_int(a->type)); + + ir_instr_t *instr = ir_append_instr(ir, OP_NOT, a->type); + ir_set_arg0(ir, instr, a); + return instr->result; +} + +ir_value_t *ir_shl(ir_t *ir, ir_value_t *a, ir_value_t *n) { + CHECK(is_is_int(a->type) && n->type == VALUE_I32); + + ir_instr_t *instr = ir_append_instr(ir, OP_SHL, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, n); + return instr->result; +} + +ir_value_t *ir_shli(ir_t *ir, ir_value_t *a, int n) { + return ir_shl(ir, a, ir_alloc_i32(ir, n)); +} + +ir_value_t *ir_ashr(ir_t *ir, ir_value_t *a, ir_value_t *n) { + CHECK(is_is_int(a->type) && n->type == VALUE_I32); + + ir_instr_t *instr = ir_append_instr(ir, OP_ASHR, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, n); + return instr->result; +} + +ir_value_t *ir_ashri(ir_t *ir, ir_value_t *a, int n) { + return ir_ashr(ir, a, ir_alloc_i32(ir, n)); +} + +ir_value_t *ir_lshr(ir_t *ir, ir_value_t *a, ir_value_t *n) { + CHECK(is_is_int(a->type) && n->type == VALUE_I32); + + ir_instr_t *instr = ir_append_instr(ir, OP_LSHR, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, n); + return instr->result; +} + +ir_value_t *ir_lshri(ir_t *ir, ir_value_t *a, int n) { + return ir_lshr(ir, a, ir_alloc_i32(ir, n)); +} + +ir_value_t *ir_ashd(ir_t *ir, ir_value_t *a, ir_value_t *n) { + CHECK(a->type == VALUE_I32 && n->type == VALUE_I32); + + ir_instr_t *instr = ir_append_instr(ir, OP_ASHD, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, n); + return instr->result; +} + +ir_value_t *ir_lshd(ir_t *ir, ir_value_t *a, ir_value_t *n) { + CHECK(a->type == VALUE_I32 && n->type == VALUE_I32); + + ir_instr_t *instr = ir_append_instr(ir, OP_LSHD, a->type); + ir_set_arg0(ir, instr, a); + ir_set_arg1(ir, instr, n); + return instr->result; +} + +void ir_branch(ir_t *ir, ir_value_t *dest) { + ir_instr_t *instr = ir_append_instr(ir, OP_BRANCH, VALUE_V); + ir_set_arg0(ir, instr, dest); +} + +void ir_branch_cond(ir_t *ir, ir_value_t *cond, ir_value_t *true_addr, + ir_value_t *false_addr) { + ir_instr_t *instr = ir_append_instr(ir, OP_BRANCH_COND, VALUE_V); + ir_set_arg0(ir, instr, cond); + ir_set_arg1(ir, instr, true_addr); + ir_set_arg2(ir, instr, false_addr); +} + +void ir_call_external_1(ir_t *ir, ir_value_t *addr) { + CHECK_EQ(addr->type, VALUE_I64); + + ir_instr_t *instr = ir_append_instr(ir, OP_CALL_EXTERNAL, VALUE_V); + ir_set_arg0(ir, instr, addr); +} + +void ir_call_external_2(ir_t *ir, ir_value_t *addr, ir_value_t *arg0) { + CHECK_EQ(addr->type, VALUE_I64); + CHECK_EQ(arg0->type, VALUE_I64); + + ir_instr_t *instr = ir_append_instr(ir, OP_CALL_EXTERNAL, VALUE_V); + ir_set_arg0(ir, instr, addr); + ir_set_arg1(ir, instr, arg0); +} diff --git a/src/jit/ir/ir.h b/src/jit/ir/ir.h new file mode 100644 index 00000000..2fdc50b0 --- /dev/null +++ b/src/jit/ir/ir.h @@ -0,0 +1,303 @@ +#ifndef IR_BUILDER_H +#define IR_BUILDER_H + +#include +#include "core/assert.h" +#include "core/list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { +#define IR_OP(name) OP_##name, +#include "jit/ir/ir_ops.inc" +#undef IR_OP + NUM_OPS +} ir_op_t; + +typedef enum { + VALUE_V, + VALUE_I8, + VALUE_I16, + VALUE_I32, + VALUE_I64, + VALUE_F32, + VALUE_F64, + VALUE_V128, + VALUE_NUM, +} ir_type_t; + +typedef enum { + CMP_EQ, + CMP_NE, + CMP_SGE, + CMP_SGT, + CMP_UGE, + CMP_UGT, + CMP_SLE, + CMP_SLT, + CMP_ULE, + CMP_ULT +} ir_cmp_t; + +struct ir_value_s; +struct ir_instr_s; + +static const int MAX_INSTR_ARGS = 3; + +// use is a layer of indirection between an instruction and the values it uses +// as arguments. this indirection makes it possible to maintain a list for each +// value of the arguments that reference it +typedef struct ir_use_s { + // the instruction that's using the value + struct ir_instr_s *instr; + + // pointer to the argument that's using the value. this is used to substitute + // a new value for the argument in the case that the original value is + // removed (e.g. due to constant propagation) + struct ir_value_s **parg; + + list_node_t it; +} ir_use_t; + +typedef struct ir_value_s { + ir_type_t type; + + union { + int8_t i8; + int16_t i16; + int32_t i32; + int64_t i64; + float f32; + double f64; + }; + + // instruction that defines this value (non-constant values) + struct ir_instr_s *def; + + // instructions that use this value as an argument + list_t uses; + + // host register allocated for this value + int reg; + + // generic meta data used by optimization passes + intptr_t tag; +} ir_value_t; + +typedef struct ir_instr_s { + ir_op_t op; + + // values used by each argument. note, the argument / use is split into two + // separate members to ease reading the argument value (instr->arg[0] vs + // instr->arg[0].value) + ir_value_t *arg[MAX_INSTR_ARGS]; + ir_use_t used[MAX_INSTR_ARGS]; + + // result of the instruction. note, instruction results don't consider + // themselves users of the value (eases register allocation logic) + ir_value_t *result; + + // generic meta data used by optimization passes + intptr_t tag; + + list_node_t it; +} ir_instr_t; + +// locals are allocated for values that need to be spilled to the stack +// during register allocation +typedef struct ir_local_s { + ir_type_t type; + ir_value_t *offset; + list_node_t it; +} ir_local_t; + +typedef struct ir_s { + uint8_t *buffer; + int capacity; + int used; + + list_t instrs; + list_t locals; + int locals_size; + + ir_instr_t *current_instr; +} ir_t; + +extern const char *ir_op_names[NUM_OPS]; + +static const int VALUE_I8_MASK = 1 << VALUE_I8; +static const int VALUE_I16_MASK = 1 << VALUE_I16; +static const int VALUE_I32_MASK = 1 << VALUE_I32; +static const int VALUE_I64_MASK = 1 << VALUE_I64; +static const int VALUE_F32_MASK = 1 << VALUE_F32; +static const int VALUE_F64_MASK = 1 << VALUE_F64; +static const int VALUE_V128_MASK = 1 << VALUE_V128; +static const int VALUE_INT_MASK = + VALUE_I8_MASK | VALUE_I16_MASK | VALUE_I32_MASK | VALUE_I64_MASK; +static const int VALUE_FLOAT_MASK = VALUE_F32_MASK | VALUE_F64_MASK; +static const int VALUE_VECTOR_MASK = VALUE_V128_MASK; +static const int VALUE_ALL_MASK = VALUE_INT_MASK | VALUE_FLOAT_MASK; + +static const int NO_REGISTER = -1; + +static inline int ir_type_size(ir_type_t type) { + switch (type) { + case VALUE_I8: + return 1; + case VALUE_I16: + return 2; + case VALUE_I32: + return 4; + case VALUE_I64: + return 8; + case VALUE_F32: + return 4; + case VALUE_F64: + return 8; + case VALUE_V128: + return 16; + default: + LOG_FATAL("Unexpected value type"); + break; + } +} + +static inline bool is_is_int(ir_type_t type) { + return type == VALUE_I8 || type == VALUE_I16 || type == VALUE_I32 || + type == VALUE_I64; +} + +static inline bool ir_is_float(ir_type_t type) { + return type == VALUE_F32 || type == VALUE_F64; +} + +static inline bool ir_is_vector(ir_type_t type) { + return type == VALUE_V128; +} + +bool ir_read(FILE *input, struct ir_s *ir); +void ir_write(struct ir_s *ir, FILE *output); + +ir_instr_t *ir_append_instr(ir_t *ir, ir_op_t op, ir_type_t result_type); +void ir_remove_instr(ir_t *ir, ir_instr_t *instr); + +ir_value_t *ir_alloc_i8(ir_t *ir, int8_t c); +ir_value_t *ir_alloc_i16(ir_t *ir, int16_t c); +ir_value_t *ir_alloc_i32(ir_t *ir, int32_t c); +ir_value_t *ir_alloc_i64(ir_t *ir, int64_t c); +ir_value_t *ir_alloc_f32(ir_t *ir, float c); +ir_value_t *ir_alloc_f64(ir_t *ir, double c); +ir_local_t *ir_alloc_local(ir_t *ir, ir_type_t type); + +void ir_set_arg(ir_t *ir, ir_instr_t *instr, int n, ir_value_t *v); +void ir_set_arg0(ir_t *ir, ir_instr_t *instr, ir_value_t *v); +void ir_set_arg1(ir_t *ir, ir_instr_t *instr, ir_value_t *v); +void ir_set_arg2(ir_t *ir, ir_instr_t *instr, ir_value_t *v); + +void ir_replace_use(ir_use_t *use, ir_value_t *other); +void ir_replace_uses(ir_value_t *v, ir_value_t *other); + +bool ir_is_constant(const ir_value_t *v); +uint64_t ir_zext_constant(const ir_value_t *v); + +// direct access to host memory +ir_value_t *ir_load_host(ir_t *ir, ir_value_t *addr, ir_type_t type); +void ir_store_host(ir_t *ir, ir_value_t *addr, ir_value_t *v); + +// guest memory operations +ir_value_t *ir_load_fast(ir_t *ir, ir_value_t *addr, ir_type_t type); +void ir_store_fast(ir_t *ir, ir_value_t *addr, ir_value_t *v); + +ir_value_t *ir_load_slow(ir_t *ir, ir_value_t *addr, ir_type_t type); +void ir_store_slow(ir_t *ir, ir_value_t *addr, ir_value_t *v); + +// context operations +ir_value_t *ir_load_context(ir_t *ir, size_t offset, ir_type_t type); +void ir_store_context(ir_t *ir, size_t offset, ir_value_t *v); + +// local operations +ir_value_t *ir_load_local(ir_t *ir, ir_local_t *local); +void ir_store_local(ir_t *ir, ir_local_t *local, ir_value_t *v); + +// cast / conversion operations +ir_value_t *ir_ftoi(ir_t *ir, ir_value_t *v, ir_type_t dest_type); +ir_value_t *ir_itof(ir_t *ir, ir_value_t *v, ir_type_t dest_type); +ir_value_t *ir_sext(ir_t *ir, ir_value_t *v, ir_type_t dest_type); +ir_value_t *ir_zext(ir_t *ir, ir_value_t *v, ir_type_t dest_type); +ir_value_t *ir_trunc(ir_t *ir, ir_value_t *v, ir_type_t dest_type); +ir_value_t *ir_fext(ir_t *ir, ir_value_t *v, ir_type_t dest_type); +ir_value_t *ir_ftrunc(ir_t *ir, ir_value_t *v, ir_type_t dest_type); + +// conditionals +ir_value_t *ir_select(ir_t *ir, ir_value_t *cond, ir_value_t *t, ir_value_t *f); +ir_value_t *ir_cmp_eq(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_ne(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_sge(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_sgt(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_uge(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_ugt(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_sle(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_slt(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_ule(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_cmp_ult(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fcmp_eq(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fcmp_ne(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fcmp_ge(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fcmp_gt(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fcmp_le(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fcmp_lt(ir_t *ir, ir_value_t *a, ir_value_t *b); + +// integer math operators +ir_value_t *ir_add(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_sub(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_smul(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_umul(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_div(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_neg(ir_t *ir, ir_value_t *a); +ir_value_t *ir_abs(ir_t *ir, ir_value_t *a); + +// floating point math operators +ir_value_t *ir_fadd(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fsub(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fmul(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fdiv(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_fneg(ir_t *ir, ir_value_t *a); +ir_value_t *ir_fabs(ir_t *ir, ir_value_t *a); +ir_value_t *ir_sqrt(ir_t *ir, ir_value_t *a); + +// vector math operators +ir_value_t *ir_vbroadcast(ir_t *ir, ir_value_t *a); +ir_value_t *ir_vadd(ir_t *ir, ir_value_t *a, ir_value_t *b, ir_type_t el_type); +ir_value_t *ir_vdot(ir_t *ir, ir_value_t *a, ir_value_t *b, ir_type_t el_type); +ir_value_t *ir_vmul(ir_t *ir, ir_value_t *a, ir_value_t *b, ir_type_t el_type); + +// bitwise operations +ir_value_t *ir_and(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_or(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_xor(ir_t *ir, ir_value_t *a, ir_value_t *b); +ir_value_t *ir_not(ir_t *ir, ir_value_t *a); +ir_value_t *ir_shl(ir_t *ir, ir_value_t *a, ir_value_t *n); +ir_value_t *ir_shli(ir_t *ir, ir_value_t *a, int n); +ir_value_t *ir_ashr(ir_t *ir, ir_value_t *a, ir_value_t *n); +ir_value_t *ir_ashri(ir_t *ir, ir_value_t *a, int n); +ir_value_t *ir_lshr(ir_t *ir, ir_value_t *a, ir_value_t *n); +ir_value_t *ir_lshri(ir_t *ir, ir_value_t *a, int n); +ir_value_t *ir_ashd(ir_t *ir, ir_value_t *a, ir_value_t *n); +ir_value_t *ir_lshd(ir_t *ir, ir_value_t *a, ir_value_t *n); + +// branches +void ir_branch(ir_t *ir, ir_value_t *dest); +void ir_branch_cond(ir_t *ir, ir_value_t *cond, ir_value_t *true_addr, + ir_value_t *false_addr); + +// calls +void ir_call_external_1(ir_t *ir, ir_value_t *addr); +void ir_call_external_2(ir_t *ir, ir_value_t *addr, ir_value_t *arg0); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/jit/ir/ir_builder.cc b/src/jit/ir/ir_builder.cc deleted file mode 100644 index 01e61201..00000000 --- a/src/jit/ir/ir_builder.cc +++ /dev/null @@ -1,702 +0,0 @@ -#include -#include "core/math.h" -#include "core/memory.h" -#include "jit/ir/ir_builder.h" -#include "jit/ir/ir_writer.h" - -using namespace re::jit; -using namespace re::jit::ir; - -const char *re::jit::ir::Opnames[NUM_OPS] = { -#define IR_OP(name) #name, -#include "jit/ir/ir_ops.inc" -}; - -// -// Value -// -Value::Value(ValueType ty) : type_(ty), constant_(false) {} -Value::Value(int8_t v) : type_(VALUE_I8), constant_(true), i8_(v) {} -Value::Value(int16_t v) : type_(VALUE_I16), constant_(true), i16_(v) {} -Value::Value(int32_t v) : type_(VALUE_I32), constant_(true), i32_(v) {} -Value::Value(int64_t v) : type_(VALUE_I64), constant_(true), i64_(v) {} -Value::Value(float v) : type_(VALUE_F32), constant_(true), f32_(v) {} -Value::Value(double v) : type_(VALUE_F64), constant_(true), f64_(v) {} - -uint64_t Value::GetZExtValue() const { - switch (type_) { - case VALUE_I8: - return static_cast(i8_); - case VALUE_I16: - return static_cast(i16_); - case VALUE_I32: - return static_cast(i32_); - case VALUE_I64: - return static_cast(i64_); - default: - LOG_FATAL("Unexpected value type"); - break; - } -} - -void Value::AddRef(Use *ref) { - refs_.Append(ref); -} - -void Value::RemoveRef(Use *ref) { - refs_.Remove(ref); -} - -void Value::ReplaceRefsWith(Value *other) { - CHECK_NE(this, other); - - // NOTE set_value will modify refs, be careful iterating - auto it = refs_.begin(); - while (it != refs_.end()) { - Use *ref = *(it++); - ref->set_value(other); - } -} - -// -// Use -// -Use::Use(Instr *instr) : instr_(instr), value_(nullptr) {} - -Use::~Use() { - if (value_) { - value_->RemoveRef(this); - } -} - -// -// Local -// -Local::Local(ValueType ty, Value *offset) : type_(ty), offset_(offset) {} - -// -// Instr -// -Instr::Instr(Op op, ValueType result_type) - : Value(result_type), op_(op), uses_{{this}, {this}, {this}}, tag_(0) {} - -Instr::~Instr() {} - -// -// IRBuilder -// -IRBuilder::IRBuilder(Arena &arena) - : arena_(arena), current_instr_(nullptr), locals_size_(0) {} - -void IRBuilder::Dump(std::ostream &output) const { - IRWriter writer; - writer.Print(*this, output); -} - -void IRBuilder::Dump() const { - Dump(std::cout); -} - -InsertPoint IRBuilder::GetInsertPoint() { - return {current_instr_}; -} - -void IRBuilder::SetInsertPoint(const InsertPoint &point) { - current_instr_ = point.instr; -} - -void IRBuilder::RemoveInstr(Instr *instr) { - instrs_.Remove(instr); - - // call destructor manually to release value references - instr->~Instr(); -} - -Instr *IRBuilder::LoadHost(Value *addr, ValueType type) { - CHECK_EQ(VALUE_I64, addr->type()); - - Instr *instr = AppendInstr(OP_LOAD_HOST, type); - instr->set_arg0(addr); - return instr; -} - -void IRBuilder::StoreHost(Value *addr, Value *v) { - CHECK_EQ(VALUE_I64, addr->type()); - - Instr *instr = AppendInstr(OP_STORE_HOST); - instr->set_arg0(addr); - instr->set_arg1(v); -} - -Instr *IRBuilder::LoadFast(Value *addr, ValueType type) { - CHECK_EQ(VALUE_I32, addr->type()); - - Instr *instr = AppendInstr(OP_LOAD_FAST, type); - instr->set_arg0(addr); - return instr; -} - -void IRBuilder::StoreFast(Value *addr, Value *v) { - CHECK_EQ(VALUE_I32, addr->type()); - - Instr *instr = AppendInstr(OP_STORE_FAST); - instr->set_arg0(addr); - instr->set_arg1(v); -} - -Instr *IRBuilder::LoadSlow(Value *addr, ValueType type) { - CHECK_EQ(VALUE_I32, addr->type()); - - Instr *instr = AppendInstr(OP_LOAD_SLOW, type); - instr->set_arg0(addr); - return instr; -} - -void IRBuilder::StoreSlow(Value *addr, Value *v) { - CHECK_EQ(VALUE_I32, addr->type()); - - Instr *instr = AppendInstr(OP_STORE_SLOW); - instr->set_arg0(addr); - instr->set_arg1(v); -} - -Instr *IRBuilder::LoadContext(size_t offset, ValueType type) { - Instr *instr = AppendInstr(OP_LOAD_CONTEXT, type); - instr->set_arg0(AllocConstant((int32_t)offset)); - return instr; -} - -void IRBuilder::StoreContext(size_t offset, Value *v) { - Instr *instr = AppendInstr(OP_STORE_CONTEXT); - instr->set_arg0(AllocConstant((int32_t)offset)); - instr->set_arg1(v); -} - -Instr *IRBuilder::LoadLocal(Local *local) { - Instr *instr = AppendInstr(OP_LOAD_LOCAL, local->type()); - instr->set_arg0(local->offset()); - return instr; -} - -void IRBuilder::StoreLocal(Local *local, Value *v) { - Instr *instr = AppendInstr(OP_STORE_LOCAL); - instr->set_arg0(local->offset()); - instr->set_arg1(v); -} - -Instr *IRBuilder::FToI(Value *v, ValueType dest_type) { - CHECK(IsFloatType(v->type()) && IsIntType(dest_type)); - - Instr *instr = AppendInstr(OP_FTOI, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::IToF(Value *v, ValueType dest_type) { - CHECK(IsIntType(v->type()) && IsFloatType(dest_type)); - - Instr *instr = AppendInstr(OP_ITOF, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::SExt(Value *v, ValueType dest_type) { - CHECK(IsIntType(v->type()) && IsIntType(dest_type)); - - Instr *instr = AppendInstr(OP_SEXT, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::ZExt(Value *v, ValueType dest_type) { - CHECK(IsIntType(v->type()) && IsIntType(dest_type)); - - Instr *instr = AppendInstr(OP_ZEXT, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::Trunc(Value *v, ValueType dest_type) { - CHECK(IsIntType(v->type()) && IsIntType(dest_type)); - - Instr *instr = AppendInstr(OP_TRUNC, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::FExt(Value *v, ValueType dest_type) { - CHECK(v->type() == VALUE_F32 && dest_type == VALUE_F64); - - Instr *instr = AppendInstr(OP_FEXT, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::FTrunc(Value *v, ValueType dest_type) { - CHECK(v->type() == VALUE_F64 && dest_type == VALUE_F32); - - Instr *instr = AppendInstr(OP_FTRUNC, dest_type); - instr->set_arg0(v); - return instr; -} - -Instr *IRBuilder::Select(Value *cond, Value *t, Value *f) { - CHECK(IsIntType(cond->type()) && IsIntType(t->type()) && - t->type() == f->type()); - - Instr *instr = AppendInstr(OP_SELECT, t->type()); - instr->set_arg0(t); - instr->set_arg1(f); - instr->set_arg2(cond); - return instr; -} - -Instr *IRBuilder::Cmp(Value *a, Value *b, CmpType type) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_CMP, VALUE_I8); - instr->set_arg0(a); - instr->set_arg1(b); - instr->set_arg2(AllocConstant(type)); - return instr; -} - -Instr *IRBuilder::CmpEQ(Value *a, Value *b) { - return Cmp(a, b, CMP_EQ); -} - -Instr *IRBuilder::CmpNE(Value *a, Value *b) { - return Cmp(a, b, CMP_NE); -} - -Instr *IRBuilder::CmpSGE(Value *a, Value *b) { - return Cmp(a, b, CMP_SGE); -} - -Instr *IRBuilder::CmpSGT(Value *a, Value *b) { - return Cmp(a, b, CMP_SGT); -} - -Instr *IRBuilder::CmpUGE(Value *a, Value *b) { - return Cmp(a, b, CMP_UGE); -} - -Instr *IRBuilder::CmpUGT(Value *a, Value *b) { - return Cmp(a, b, CMP_UGT); -} - -Instr *IRBuilder::CmpSLE(Value *a, Value *b) { - return Cmp(a, b, CMP_SLE); -} - -Instr *IRBuilder::CmpSLT(Value *a, Value *b) { - return Cmp(a, b, CMP_SLT); -} - -Instr *IRBuilder::CmpULE(Value *a, Value *b) { - return Cmp(a, b, CMP_ULE); -} - -Instr *IRBuilder::CmpULT(Value *a, Value *b) { - return Cmp(a, b, CMP_ULT); -} - -Instr *IRBuilder::FCmp(Value *a, Value *b, CmpType type) { - CHECK(IsFloatType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_FCMP, VALUE_I8); - instr->set_arg0(a); - instr->set_arg1(b); - instr->set_arg2(AllocConstant(type)); - return instr; -} - -Instr *IRBuilder::FCmpEQ(Value *a, Value *b) { - return FCmp(a, b, CMP_EQ); -} - -Instr *IRBuilder::FCmpNE(Value *a, Value *b) { - return FCmp(a, b, CMP_NE); -} - -Instr *IRBuilder::FCmpGE(Value *a, Value *b) { - return FCmp(a, b, CMP_SGE); -} - -Instr *IRBuilder::FCmpGT(Value *a, Value *b) { - return FCmp(a, b, CMP_SGT); -} - -Instr *IRBuilder::FCmpLE(Value *a, Value *b) { - return FCmp(a, b, CMP_SLE); -} - -Instr *IRBuilder::FCmpLT(Value *a, Value *b) { - return FCmp(a, b, CMP_SLT); -} - -Instr *IRBuilder::Add(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_ADD, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::Sub(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_SUB, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::SMul(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_SMUL, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::UMul(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - CHECK(IsIntType(a->type())); - Instr *instr = AppendInstr(OP_UMUL, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::Div(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_DIV, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::Neg(Value *a) { - CHECK(IsIntType(a->type())); - - Instr *instr = AppendInstr(OP_NEG, a->type()); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::Abs(Value *a) { - CHECK(IsIntType(a->type())); - - Instr *instr = AppendInstr(OP_ABS, a->type()); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::FAdd(Value *a, Value *b) { - CHECK(IsFloatType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_FADD, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::FSub(Value *a, Value *b) { - CHECK(IsFloatType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_FSUB, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::FMul(Value *a, Value *b) { - CHECK(IsFloatType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_FMUL, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::FDiv(Value *a, Value *b) { - CHECK(IsFloatType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_FDIV, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::FNeg(Value *a) { - CHECK(IsFloatType(a->type())); - - Instr *instr = AppendInstr(OP_FNEG, a->type()); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::FAbs(Value *a) { - CHECK(IsFloatType(a->type())); - - Instr *instr = AppendInstr(OP_FABS, a->type()); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::Sqrt(Value *a) { - CHECK(IsFloatType(a->type())); - - Instr *instr = AppendInstr(OP_SQRT, a->type()); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::VBroadcast(Value *a) { - CHECK(a->type() == VALUE_F32); - - Instr *instr = AppendInstr(OP_VBROADCAST, VALUE_V128); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::VAdd(Value *a, Value *b, ValueType el_type) { - CHECK(IsVectorType(a->type()) && IsVectorType(b->type())); - CHECK_EQ(el_type, VALUE_F32); - - Instr *instr = AppendInstr(OP_VADD, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::VDot(Value *a, Value *b, ValueType el_type) { - CHECK(IsVectorType(a->type()) && IsVectorType(b->type())); - CHECK_EQ(el_type, VALUE_F32); - - Instr *instr = AppendInstr(OP_VDOT, el_type); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::VMul(Value *a, Value *b, ValueType el_type) { - CHECK(IsVectorType(a->type()) && IsVectorType(b->type())); - CHECK_EQ(el_type, VALUE_F32); - - Instr *instr = AppendInstr(OP_VMUL, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::And(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_AND, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::Or(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_OR, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::Xor(Value *a, Value *b) { - CHECK(IsIntType(a->type()) && a->type() == b->type()); - - Instr *instr = AppendInstr(OP_XOR, a->type()); - instr->set_arg0(a); - instr->set_arg1(b); - return instr; -} - -Instr *IRBuilder::Not(Value *a) { - CHECK(IsIntType(a->type())); - - Instr *instr = AppendInstr(OP_NOT, a->type()); - instr->set_arg0(a); - return instr; -} - -Instr *IRBuilder::Shl(Value *a, Value *n) { - CHECK(IsIntType(a->type()) && n->type() == VALUE_I32); - - Instr *instr = AppendInstr(OP_SHL, a->type()); - instr->set_arg0(a); - instr->set_arg1(n); - return instr; -} - -Instr *IRBuilder::Shl(Value *a, int n) { - return Shl(a, AllocConstant((int32_t)n)); -} - -Instr *IRBuilder::AShr(Value *a, Value *n) { - CHECK(IsIntType(a->type()) && n->type() == VALUE_I32); - - Instr *instr = AppendInstr(OP_ASHR, a->type()); - instr->set_arg0(a); - instr->set_arg1(n); - return instr; -} - -Instr *IRBuilder::AShr(Value *a, int n) { - return AShr(a, AllocConstant((int32_t)n)); -} - -Instr *IRBuilder::LShr(Value *a, Value *n) { - CHECK(IsIntType(a->type()) && n->type() == VALUE_I32); - - Instr *instr = AppendInstr(OP_LSHR, a->type()); - instr->set_arg0(a); - instr->set_arg1(n); - return instr; -} - -Instr *IRBuilder::LShr(Value *a, int n) { - return LShr(a, AllocConstant((int32_t)n)); -} - -Instr *IRBuilder::AShd(Value *a, Value *n) { - CHECK(a->type() == VALUE_I32 && n->type() == VALUE_I32); - - Instr *instr = AppendInstr(OP_ASHD, a->type()); - instr->set_arg0(a); - instr->set_arg1(n); - return instr; -} - -Instr *IRBuilder::LShd(Value *a, Value *n) { - CHECK(a->type() == VALUE_I32 && n->type() == VALUE_I32); - - Instr *instr = AppendInstr(OP_LSHD, a->type()); - instr->set_arg0(a); - instr->set_arg1(n); - return instr; -} - -void IRBuilder::Branch(Value *dest) { - Instr *instr = AppendInstr(OP_BRANCH); - instr->set_arg0(dest); -} - -void IRBuilder::BranchCond(Value *cond, Value *true_addr, Value *false_addr) { - Instr *instr = AppendInstr(OP_BRANCH_COND); - instr->set_arg0(cond); - instr->set_arg1(true_addr); - instr->set_arg2(false_addr); -} - -void IRBuilder::CallExternal1(Value *addr) { - CHECK_EQ(addr->type(), VALUE_I64); - - Instr *instr = AppendInstr(OP_CALL_EXTERNAL); - instr->set_arg0(addr); -} - -void IRBuilder::CallExternal2(Value *addr, Value *arg0) { - CHECK_EQ(addr->type(), VALUE_I64); - CHECK_EQ(arg0->type(), VALUE_I64); - - Instr *instr = AppendInstr(OP_CALL_EXTERNAL); - instr->set_arg0(addr); - instr->set_arg1(arg0); -} - -Value *IRBuilder::AllocConstant(uint8_t c) { - return AllocConstant((int8_t)c); -} - -Value *IRBuilder::AllocConstant(uint16_t c) { - return AllocConstant((int16_t)c); -} - -Value *IRBuilder::AllocConstant(uint32_t c) { - return AllocConstant((int32_t)c); -} - -Value *IRBuilder::AllocConstant(uint64_t c) { - return AllocConstant((int64_t)c); -} - -Value *IRBuilder::AllocConstant(int8_t c) { - Value *v = arena_.Alloc(); - new (v) Value(c); - return v; -} - -Value *IRBuilder::AllocConstant(int16_t c) { - Value *v = arena_.Alloc(); - new (v) Value(c); - return v; -} - -Value *IRBuilder::AllocConstant(int32_t c) { - Value *v = arena_.Alloc(); - new (v) Value(c); - return v; -} - -Value *IRBuilder::AllocConstant(int64_t c) { - Value *v = arena_.Alloc(); - new (v) Value(c); - return v; -} - -Value *IRBuilder::AllocConstant(float c) { - Value *v = arena_.Alloc(); - new (v) Value(c); - return v; -} - -Value *IRBuilder::AllocConstant(double c) { - Value *v = arena_.Alloc(); - new (v) Value(c); - return v; -} - -Local *IRBuilder::AllocLocal(ValueType type) { - // align local to natural size - int type_size = SizeForType(type); - locals_size_ = align_up(locals_size_, type_size); - - Local *l = arena_.Alloc(); - new (l) Local(type, AllocConstant(locals_size_)); - locals_.Append(l); - - locals_size_ += type_size; - - return l; -} - -Instr *IRBuilder::AllocInstr(Op op, ValueType result_type) { - Instr *instr = arena_.Alloc(); - new (instr) Instr(op, result_type); - return instr; -} - -Instr *IRBuilder::AppendInstr(Op op) { - Instr *instr = AllocInstr(op, VALUE_V); - instrs_.Insert(current_instr_, instr); - current_instr_ = instr; - return instr; -} - -Instr *IRBuilder::AppendInstr(Op op, ValueType result_type) { - Instr *instr = AllocInstr(op, result_type); - instrs_.Insert(current_instr_, instr); - current_instr_ = instr; - return instr; -} diff --git a/src/jit/ir/ir_builder.h b/src/jit/ir/ir_builder.h deleted file mode 100644 index c76e60a9..00000000 --- a/src/jit/ir/ir_builder.h +++ /dev/null @@ -1,550 +0,0 @@ -#ifndef IR_BUILDER_H -#define IR_BUILDER_H - -#include -#include -#include "core/arena.h" -#include "core/assert.h" -#include "core/intrusive_list.h" - -namespace re { -namespace jit { -namespace ir { - -enum Op { -#define IR_OP(name) OP_##name, -#include "jit/ir/ir_ops.inc" -#undef IR_OP - NUM_OPS -}; - -extern const char *Opnames[NUM_OPS]; - -// -// values -// -enum ValueType { - VALUE_V, - VALUE_I8, - VALUE_I16, - VALUE_I32, - VALUE_I64, - VALUE_F32, - VALUE_F64, - VALUE_V128, - VALUE_NUM, -}; - -enum { - VALUE_I8_MASK = 1 << VALUE_I8, - VALUE_I16_MASK = 1 << VALUE_I16, - VALUE_I32_MASK = 1 << VALUE_I32, - VALUE_I64_MASK = 1 << VALUE_I64, - VALUE_F32_MASK = 1 << VALUE_F32, - VALUE_F64_MASK = 1 << VALUE_F64, - VALUE_V128_MASK = 1 << VALUE_V128, - VALUE_INT_MASK = - VALUE_I8_MASK | VALUE_I16_MASK | VALUE_I32_MASK | VALUE_I64_MASK, - VALUE_FLOAT_MASK = VALUE_F32_MASK | VALUE_F64_MASK, - VALUE_VECTOR_MASK = VALUE_V128_MASK, - VALUE_ALL_MASK = VALUE_INT_MASK | VALUE_FLOAT_MASK, -}; - -enum { - NO_REGISTER = -1, -}; - -class Instr; -class Use; - -static inline bool IsIntType(ValueType type) { - return type == VALUE_I8 || type == VALUE_I16 || type == VALUE_I32 || - type == VALUE_I64; -} - -static inline bool IsFloatType(ValueType type) { - return type == VALUE_F32 || type == VALUE_F64; -} - -static inline bool IsVectorType(ValueType type) { - return type == VALUE_V128; -} - -static inline int SizeForType(ValueType type) { - switch (type) { - case VALUE_I8: - return 1; - case VALUE_I16: - return 2; - case VALUE_I32: - return 4; - case VALUE_I64: - return 8; - case VALUE_F32: - return 4; - case VALUE_F64: - return 8; - case VALUE_V128: - return 16; - default: - LOG_FATAL("Unexpected value type"); - break; - } -} - -class Value { - public: - Value(ValueType ty); - Value(int8_t v); - Value(int16_t v); - Value(int32_t v); - Value(int64_t v); - Value(float v); - Value(double v); - - ValueType type() const { - return type_; - } - - bool constant() const { - return constant_; - } - - // defined at the end of the file, Instr is only forward declared at this - // point, it can't be static_cast to - const Instr *def() const; - Instr *def(); - - int8_t i8() const { - DCHECK(constant_ && type_ == VALUE_I8); - return i8_; - } - int8_t i8() { - return static_cast(this)->i8(); - } - int16_t i16() const { - DCHECK(constant_ && type_ == VALUE_I16); - return i16_; - } - int16_t i16() { - return static_cast(this)->i16(); - } - int32_t i32() const { - DCHECK(constant_ && type_ == VALUE_I32); - return i32_; - } - int32_t i32() { - return static_cast(this)->i32(); - } - int64_t i64() const { - DCHECK(constant_ && type_ == VALUE_I64); - return i64_; - } - int64_t i64() { - return static_cast(this)->i64(); - } - float f32() const { - DCHECK(constant_ && type_ == VALUE_F32); - return f32_; - } - float f32() { - return static_cast(this)->f32(); - } - double f64() const { - DCHECK(constant_ && type_ == VALUE_F64); - return f64_; - } - double f64() { - return static_cast(this)->f64(); - } - - const IntrusiveList &uses() const { - return refs_; - } - IntrusiveList &uses() { - return refs_; - } - - int reg() const { - return reg_; - } - void set_reg(int reg) { - reg_ = reg; - } - - intptr_t tag() const { - return tag_; - } - void set_tag(intptr_t tag) { - tag_ = tag; - } - - uint64_t GetZExtValue() const; - - void AddRef(Use *ref); - void RemoveRef(Use *ref); - void ReplaceRefsWith(Value *other); - - private: - const ValueType type_; - const bool constant_; - const union { - int8_t i8_; - int16_t i16_; - int32_t i32_; - int64_t i64_; - float f32_; - double f64_; - }; - IntrusiveList refs_; - // initializing here so each constructor variation doesn't have to - int reg_{NO_REGISTER}; - intptr_t tag_{0}; -}; - -// Use is a layer of indirection between an instruction and a values it uses. -// Values maintain a list of all of their uses, making it possible to replace -// all uses of a value with a new value during optimizations -class Use : public IntrusiveListNode { - public: - Use(Instr *instr); - ~Use(); - - const Instr *instr() const { - return instr_; - } - Instr *instr() { - return instr_; - } - - const Value *value() const { - return value_; - } - Value *value() { - return value_; - } - void set_value(Value *v) { - if (value_) { - value_->RemoveRef(this); - } - value_ = v; - value_->AddRef(this); - } - - private: - Instr *instr_; - Value *value_; -}; - -// Templated structs to aid the interpreter / constant propagation handlers -template -struct ValueInfo; - -template <> -struct ValueInfo { - typedef void signed_type; - constexpr static void (Value::*fn)() = nullptr; -}; -template <> -struct ValueInfo { - typedef int8_t signed_type; - typedef uint8_t unsigned_type; - constexpr static int8_t (Value::*fn)() = &Value::i8; -}; -template <> -struct ValueInfo { - typedef int16_t signed_type; - typedef uint16_t unsigned_type; - constexpr static int16_t (Value::*fn)() = &Value::i16; -}; -template <> -struct ValueInfo { - typedef int32_t signed_type; - typedef uint32_t unsigned_type; - constexpr static int32_t (Value::*fn)() = &Value::i32; -}; -template <> -struct ValueInfo { - typedef int64_t signed_type; - typedef uint64_t unsigned_type; - constexpr static int64_t (Value::*fn)() = &Value::i64; -}; -template <> -struct ValueInfo { - typedef float signed_type; - constexpr static float (Value::*fn)() = &Value::f32; -}; -template <> -struct ValueInfo { - typedef double signed_type; - constexpr static double (Value::*fn)() = &Value::f64; -}; - -// Locals are allocated for values that need to be spilled to the stack during -// register allocation. -class Local : public IntrusiveListNode { - public: - Local(ValueType ty, Value *offset); - - ValueType type() const { - return type_; - } - Value *offset() const { - return offset_; - } - - private: - ValueType type_; - Value *offset_; -}; - -// -// instructions -// -class Instr : public Value, public IntrusiveListNode { - public: - Instr(Op op, ValueType result_type); - ~Instr(); - - Op op() const { - return op_; - } - - const Value *arg0() const { - return arg(0); - } - Value *arg0() { - return arg(0); - } - void set_arg0(Value *v) { - set_arg(0, v); - } - - const Value *arg1() const { - return arg(1); - } - Value *arg1() { - return arg(1); - } - void set_arg1(Value *v) { - set_arg(1, v); - } - - const Value *arg2() const { - return arg(2); - } - Value *arg2() { - return arg(2); - } - void set_arg2(Value *v) { - set_arg(2, v); - } - - const Value *arg(int i) const { - CHECK_LT(i, 3); - return uses_[i].value(); - } - Value *arg(int i) { - CHECK_LT(i, 3); - return uses_[i].value(); - } - void set_arg(int i, Value *v) { - CHECK_LT(i, 3); - uses_[i].set_value(v); - } - - intptr_t tag() const { - return tag_; - } - void set_tag(intptr_t tag) { - tag_ = tag; - } - - private: - Op op_; - Use uses_[3]; - intptr_t tag_; -}; - -// -// IRBuilder -// -enum CmpType { - CMP_EQ, - CMP_NE, - CMP_SGE, - CMP_SGT, - CMP_UGE, - CMP_UGT, - CMP_SLE, - CMP_SLT, - CMP_ULE, - CMP_ULT -}; - -typedef void (*ExternalFn)(void *); - -struct InsertPoint { - Instr *instr; -}; - -class IRBuilder { - friend class IRReader; - - public: - IRBuilder(Arena &arena); - - const IntrusiveList &instrs() const { - return instrs_; - } - IntrusiveList &instrs() { - return instrs_; - } - - int locals_size() const { - return locals_size_; - } - - void Dump(std::ostream &output) const; - void Dump() const; - - InsertPoint GetInsertPoint(); - void SetInsertPoint(const InsertPoint &point); - - void RemoveInstr(Instr *instr); - - // direct access to host memory - Instr *LoadHost(Value *addr, ValueType type); - void StoreHost(Value *addr, Value *v); - - // guest memory operations - Instr *LoadFast(Value *addr, ValueType type); - void StoreFast(Value *addr, Value *v); - - Instr *LoadSlow(Value *addr, ValueType type); - void StoreSlow(Value *addr, Value *v); - - // context operations - Instr *LoadContext(size_t offset, ValueType type); - void StoreContext(size_t offset, Value *v); - - // local operations - Instr *LoadLocal(Local *local); - void StoreLocal(Local *local, Value *v); - - // cast / conversion operations - Instr *FToI(Value *v, ValueType dest_type); - Instr *IToF(Value *v, ValueType dest_type); - Instr *SExt(Value *v, ValueType dest_type); - Instr *ZExt(Value *v, ValueType dest_type); - Instr *Trunc(Value *v, ValueType dest_type); - Instr *FExt(Value *v, ValueType dest_type); - Instr *FTrunc(Value *v, ValueType dest_type); - - // conditionals - Instr *Select(Value *cond, Value *t, Value *f); - Instr *CmpEQ(Value *a, Value *b); - Instr *CmpNE(Value *a, Value *b); - Instr *CmpSGE(Value *a, Value *b); - Instr *CmpSGT(Value *a, Value *b); - Instr *CmpUGE(Value *a, Value *b); - Instr *CmpUGT(Value *a, Value *b); - Instr *CmpSLE(Value *a, Value *b); - Instr *CmpSLT(Value *a, Value *b); - Instr *CmpULE(Value *a, Value *b); - Instr *CmpULT(Value *a, Value *b); - Instr *FCmpEQ(Value *a, Value *b); - Instr *FCmpNE(Value *a, Value *b); - Instr *FCmpGE(Value *a, Value *b); - Instr *FCmpGT(Value *a, Value *b); - Instr *FCmpLE(Value *a, Value *b); - Instr *FCmpLT(Value *a, Value *b); - - // integer math operators - Instr *Add(Value *a, Value *b); - Instr *Sub(Value *a, Value *b); - Instr *SMul(Value *a, Value *b); - Instr *UMul(Value *a, Value *b); - Instr *Div(Value *a, Value *b); - Instr *Neg(Value *a); - Instr *Abs(Value *a); - - // floating point math operators - Instr *FAdd(Value *a, Value *b); - Instr *FSub(Value *a, Value *b); - Instr *FMul(Value *a, Value *b); - Instr *FDiv(Value *a, Value *b); - Instr *FNeg(Value *a); - Instr *FAbs(Value *a); - Instr *Sqrt(Value *a); - - // vector math operators - Instr *VBroadcast(Value *a); - Instr *VAdd(Value *a, Value *b, ValueType el_type); - Instr *VDot(Value *a, Value *b, ValueType el_type); - Instr *VMul(Value *a, Value *b, ValueType el_type); - - // bitwise operations - Instr *And(Value *a, Value *b); - Instr *Or(Value *a, Value *b); - Instr *Xor(Value *a, Value *b); - Instr *Not(Value *a); - Instr *Shl(Value *a, Value *n); - Instr *Shl(Value *a, int n); - Instr *AShr(Value *a, Value *n); - Instr *AShr(Value *a, int n); - Instr *LShr(Value *a, Value *n); - Instr *LShr(Value *a, int n); - Instr *AShd(Value *a, Value *n); - Instr *LShd(Value *a, Value *n); - - // branches - void Branch(Value *dest); - void BranchCond(Value *cond, Value *true_addr, Value *false_addr); - - // calls - void CallExternal1(Value *addr); - void CallExternal2(Value *addr, Value *arg0); - - // values - Value *AllocConstant(uint8_t c); - Value *AllocConstant(uint16_t c); - Value *AllocConstant(uint32_t c); - Value *AllocConstant(uint64_t c); - Value *AllocConstant(int8_t c); - Value *AllocConstant(int16_t c); - Value *AllocConstant(int32_t c); - Value *AllocConstant(int64_t c); - Value *AllocConstant(float c); - Value *AllocConstant(double c); - Local *AllocLocal(ValueType type); - - protected: - Instr *AllocInstr(Op op, ValueType result_type); - Instr *AppendInstr(Op op); - Instr *AppendInstr(Op op, ValueType result_type); - - Instr *Cmp(Value *a, Value *b, CmpType type); - Instr *FCmp(Value *a, Value *b, CmpType type); - - Arena &arena_; - IntrusiveList instrs_; - Instr *current_instr_; - IntrusiveList locals_; - int locals_size_; -}; - -inline const Instr *Value::def() const { - CHECK(!constant_); - return static_cast(this); -} - -inline Instr *Value::def() { - CHECK(!constant_); - return static_cast(this); -} -} -} -} - -#endif diff --git a/src/jit/ir/ir_read.c b/src/jit/ir/ir_read.c new file mode 100644 index 00000000..f87cd3e8 --- /dev/null +++ b/src/jit/ir/ir_read.c @@ -0,0 +1,347 @@ +#include "core/string.h" +#include "jit/ir/ir.h" + +typedef enum { + TOK_EOF, + TOK_EOL, + TOK_COMMA, + TOK_OPERATOR, + TOK_TYPE, + TOK_INTEGER, + TOK_IDENTIFIER, +} ir_token_t; + +typedef struct { + char s[128]; + uint64_t i; + ir_type_t ty; +} ir_lexeme_t; + +typedef struct { + FILE *input; + ir_token_t tok; + ir_lexeme_t val; +} ir_parser_t; + +static const char *s_typenames[] = {"", "i8", "i16", "i32", + "i64", "f32", "f64", "v128"}; +static const int s_num_typenames = sizeof(s_typenames) / sizeof(s_typenames[0]); + +static char ir_lex_get(ir_parser_t *p) { + return fgetc(p->input); +} + +static void ir_lex_unget(ir_parser_t *p, char c) { + ungetc(c, p->input); +} + +static void ir_lex_next(ir_parser_t *p) { + // skip past whitespace characters, except newlines + char next; + do { + next = ir_lex_get(p); + } while (isspace(next) && next != '\n'); + + // test for end of file + if (next == EOF) { + strncpy(p->val.s, "", sizeof(p->val.s)); + p->tok = TOK_EOF; + return; + } + + // test for newline + if (next == '\n') { + strncpy(p->val.s, "\n", sizeof(p->val.s)); + + // chomp adjacent newlines + while (next == '\n') { + next = ir_lex_get(p); + } + ir_lex_unget(p, next); + + p->tok = TOK_EOL; + return; + } + + // test for comma + if (next == ',') { + strncpy(p->val.s, ",", sizeof(p->val.s)); + p->tok = TOK_COMMA; + return; + } + + // test for assignment operator + if (next == '=') { + strncpy(p->val.s, "=", sizeof(p->val.s)); + p->tok = TOK_OPERATOR; + return; + } + + // test for type keyword + for (int i = 1; i < s_num_typenames; i++) { + const char *typename = s_typenames[i]; + const char *ptr = typename; + char tmp = next; + + // try to match + while (*ptr && *ptr == tmp) { + tmp = ir_lex_get(p); + ptr++; + } + + // if the typename matched, return + if (!*ptr) { + strncpy(p->val.s, typename, sizeof(p->val.s)); + p->val.ty = i; + p->tok = TOK_TYPE; + return; + } + + // no match, unget everything + if (*ptr && ptr != typename) { + ir_lex_unget(p, tmp); + ptr--; + } + + while (*ptr && ptr != typename) { + ir_lex_unget(p, *ptr); + ptr--; + } + } + + // test for hex literal + if (next == '0') { + next = ir_lex_get(p); + + if (next == 'x') { + next = ir_lex_get(p); + + // parse literal + p->val.i = 0; + while (isxdigit(next)) { + p->val.i <<= 4; + p->val.i |= xtoi(next); + next = ir_lex_get(p); + } + ir_lex_unget(p, next); + + p->tok = TOK_INTEGER; + return; + } else { + ir_lex_unget(p, next); + } + } + + // treat anything else as an identifier + char *ptr = p->val.s; + while (isalpha(next) || isdigit(next) || next == '%' || next == '_') { + *ptr++ = next; + next = ir_lex_get(p); + } + ir_lex_unget(p, next); + *ptr = 0; + + p->tok = TOK_IDENTIFIER; + return; +} + +bool ir_parse_type(ir_parser_t *p, ir_t *ir, ir_type_t *type) { + if (p->tok != TOK_TYPE) { + LOG_INFO("Unexpected token %d when parsing type", p->tok); + return false; + } + + // eat token + ir_lex_next(p); + + *type = p->val.ty; + + return true; +} + +bool ir_parse_op(ir_parser_t *p, ir_t *ir, ir_op_t *op) { + if (p->tok != TOK_IDENTIFIER) { + LOG_INFO("Unexpected token %d when parsing op", p->tok); + return false; + } + + const char *op_str = p->val.s; + + // match token against opnames + int i; + for (i = 0; i < NUM_OPS; i++) { + if (!strcasecmp(op_str, ir_op_names[i])) { + break; + } + } + + if (i == NUM_OPS) { + LOG_INFO("Unexpected op '%s'", op_str); + return false; + } + + // eat token + ir_lex_next(p); + + *op = (ir_op_t)i; + + return true; +} + +bool ir_parse_value(ir_parser_t *p, ir_t *ir, ir_value_t **value) { + // parse value type + ir_type_t type; + if (!ir_parse_type(p, ir, &type)) { + return false; + } + + // parse value + if (p->tok == TOK_IDENTIFIER) { + const char *ident = p->val.s; + + if (ident[0] != '%') { + return false; + } + + // lookup the slot slowly + int slot = atoi(&ident[1]); + + ir_instr_t *instr = list_first_entry(&ir->instrs, ir_instr_t, it); + while (instr) { + if (instr->tag == slot) { + break; + } + instr = list_next_entry(instr, it); + } + CHECK_NOTNULL(instr); + + *value = instr->result; + } else if (p->tok == TOK_INTEGER) { + switch (type) { + case VALUE_I8: { + uint8_t v = (uint8_t)p->val.i; + *value = ir_alloc_i8(ir, v); + } break; + case VALUE_I16: { + uint16_t v = (uint16_t)p->val.i; + *value = ir_alloc_i16(ir, v); + } break; + case VALUE_I32: { + uint32_t v = (uint32_t)p->val.i; + *value = ir_alloc_i32(ir, v); + } break; + case VALUE_I64: { + uint64_t v = (uint64_t)p->val.i; + *value = ir_alloc_i64(ir, v); + } break; + case VALUE_F32: { + uint32_t v = (uint32_t)p->val.i; + *value = ir_alloc_f32(ir, *(float *)&v); + } break; + case VALUE_F64: { + uint64_t v = (uint64_t)p->val.i; + *value = ir_alloc_f64(ir, *(double *)&v); + } break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } else { + return false; + } + + // eat token + ir_lex_next(p); + + return true; +} + +bool ir_parse_operator(ir_parser_t *p, ir_t *ir) { + const char *op_str = p->val.s; + + if (strcmp(op_str, "=")) { + LOG_INFO("Unexpected operator '%s'", op_str); + return false; + } + + // eat token + ir_lex_next(p); + + // nothing to do, there's only one operator token + + return true; +} + +bool ir_parse_instr(ir_parser_t *p, ir_t *ir) { + int slot = -1; + ir_type_t type = VALUE_V; + ir_value_t *arg[3] = {}; + + // parse result type and slot number + if (p->tok == TOK_TYPE) { + if (!ir_parse_type(p, ir, &type)) { + return false; + } + + const char *ident = p->val.s; + if (ident[0] != '%') { + return false; + } + slot = atoi(&ident[1]); + ir_lex_next(p); + + if (!ir_parse_operator(p, ir)) { + return false; + } + } + + // parse op + ir_op_t op; + if (!ir_parse_op(p, ir, &op)) { + return false; + } + + // parse arguments + for (int i = 0; i < 3; i++) { + if (!ir_parse_value(p, ir, &arg[i])) { + return false; + } + + if (p->tok != TOK_COMMA) { + break; + } + + // eat comma and move onto the next argument + ir_lex_next(p); + } + + // create instruction + ir_instr_t *instr = ir_append_instr(ir, op, type); + + for (int i = 0; i < 3; i++) { + ir_set_arg(ir, instr, i, arg[i]); + } + + instr->tag = slot; + + return true; +} + +bool ir_read(FILE *input, ir_t *ir) { + ir_parser_t p = {}; + p.input = input; + + while (true) { + ir_lex_next(&p); + + if (p.tok == TOK_EOF) { + break; + } + + if (!ir_parse_instr(&p, ir)) { + return false; + } + } + + return true; +} diff --git a/src/jit/ir/ir_reader.cc b/src/jit/ir/ir_reader.cc deleted file mode 100644 index 6ba1d8cb..00000000 --- a/src/jit/ir/ir_reader.cc +++ /dev/null @@ -1,323 +0,0 @@ -#include "core/string.h" -#include "jit/ir/ir_reader.h" - -using namespace re; -using namespace re::jit; -using namespace re::jit::ir; - -struct IRType { - const char *name; - ValueType ty; -}; - -static IRType s_ir_types[] = { - {"i8", VALUE_I8}, {"i16", VALUE_I16}, {"i32", VALUE_I32}, - {"i64", VALUE_I64}, {"f32", VALUE_F32}, {"f64", VALUE_F64}, - {"v128", VALUE_V128}, -}; -static const int s_num_ir_types = sizeof(s_ir_types) / sizeof(s_ir_types[0]); - -IRLexer::IRLexer(std::istream &input) : input_(input) {} - -IRToken IRLexer::Next() { - // skip past whitespace characters, except newlines - char next; - do { - next = Get(); - } while (isspace(next) && next != '\n'); - - // test for end of file - if (next == EOF) { - strncpy(val_.s, "", sizeof(val_.s)); - return (tok_ = TOK_EOF); - } - - // test for newline - if (next == '\n') { - strncpy(val_.s, "\n", sizeof(val_.s)); - - // chomp adjacent newlines - while (next == '\n') { - next = Get(); - } - Unget(); - - return (tok_ = TOK_EOL); - } - - // test for comma - if (next == ',') { - strncpy(val_.s, ",", sizeof(val_.s)); - return (tok_ = TOK_COMMA); - } - - // test for assignment operator - if (next == '=') { - strncpy(val_.s, "=", sizeof(val_.s)); - return (tok_ = TOK_OPERATOR); - } - - // test for type keyword - for (int i = 0; i < s_num_ir_types; i++) { - IRType &ir_type = s_ir_types[i]; - const char *ptr = ir_type.name; - char tmp = next; - - // try to match - while (*ptr && *ptr == tmp) { - tmp = Get(); - ptr++; - } - - // if we had a match, return - if (!*ptr) { - strncpy(val_.s, ir_type.name, sizeof(val_.s)); - val_.ty = ir_type.ty; - return (tok_ = TOK_TYPE); - } - - // no match, undo - while (*ptr && ptr != ir_type.name) { - Unget(); - ptr--; - } - } - - // test for hex literal - if (next == '0') { - next = Get(); - - if (next == 'x') { - next = Get(); - - // parse literal - val_.i = 0; - while (isxdigit(next)) { - val_.i <<= 4; - val_.i |= xtoi(next); - next = Get(); - } - Unget(); - - return (tok_ = TOK_INTEGER); - } else { - Unget(); - } - } - - // treat anything else as an identifier - char *ptr = val_.s; - while (isalpha(next) || isdigit(next) || next == '%' || next == '_') { - *ptr++ = next; - next = Get(); - } - Unget(); - *ptr = 0; - - return (tok_ = TOK_IDENTIFIER); -} - -char IRLexer::Get() { - return input_.get(); -} - -void IRLexer::Unget() { - input_.unget(); -} - -bool IRReader::Parse(std::istream &input, IRBuilder &builder) { - IRLexer lex(input); - - while (true) { - IRToken tok = lex.Next(); - - if (tok == TOK_EOF) { - break; - } - - if (!ParseInstruction(lex, builder)) { - return false; - } - } - - return true; -} - -bool IRReader::ParseType(IRLexer &lex, IRBuilder &builder, ValueType *type) { - if (lex.tok() != TOK_TYPE) { - LOG_INFO("Unexpected token %d when parsing type"); - return false; - } - - // eat token - lex.Next(); - - *type = lex.val().ty; - - return true; -} - -bool IRReader::ParseOp(IRLexer &lex, IRBuilder &builder, Op *op) { - if (lex.tok() != TOK_IDENTIFIER) { - LOG_INFO("Unexpected token %d when parsing op"); - return false; - } - - const char *op_str = lex.val().s; - - // match token against opnames - int i; - for (i = 0; i < NUM_OPS; i++) { - if (!strcasecmp(op_str, Opnames[i])) { - break; - } - } - - // eat token - lex.Next(); - - if (i == NUM_OPS) { - LOG_INFO("Unexpected op '%s'", op_str); - return false; - } - - *op = static_cast(i); - - return true; -} - -bool IRReader::ParseValue(IRLexer &lex, IRBuilder &builder, Value **value) { - // parse value type - ValueType type; - if (!ParseType(lex, builder, &type)) { - return false; - } - - // parse value - if (lex.tok() == TOK_IDENTIFIER) { - const char *ident = lex.val().s; - - if (ident[0] != '%') { - return false; - } - - int slot = atoi(&ident[1]); - auto it = slots_.find(slot); - CHECK_NE(it, slots_.end()); - - *value = it->second; - } else if (lex.tok() == TOK_INTEGER) { - switch (type) { - case VALUE_I8: { - uint8_t v = static_cast(lex.val().i); - *value = builder.AllocConstant(v); - } break; - case VALUE_I16: { - uint16_t v = static_cast(lex.val().i); - *value = builder.AllocConstant(v); - } break; - case VALUE_I32: { - uint32_t v = static_cast(lex.val().i); - *value = builder.AllocConstant(v); - } break; - case VALUE_I64: { - uint64_t v = static_cast(lex.val().i); - *value = builder.AllocConstant(v); - } break; - case VALUE_F32: { - uint32_t v = static_cast(lex.val().i); - *value = builder.AllocConstant(*reinterpret_cast(&v)); - } break; - case VALUE_F64: { - uint64_t v = static_cast(lex.val().i); - *value = builder.AllocConstant(*reinterpret_cast(&v)); - } break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } else { - return false; - } - - // eat token - lex.Next(); - - return true; -} - -bool IRReader::ParseOperator(IRLexer &lex, IRBuilder &builder) { - const char *op_str = lex.val().s; - - if (strcmp(op_str, "=")) { - LOG_INFO("Unexpected operator '%s'", op_str); - return false; - } - - // eat token - lex.Next(); - - // nothing to do, there's only one operator token - - return true; -} - -bool IRReader::ParseInstruction(IRLexer &lex, IRBuilder &builder) { - int slot = -1; - ValueType type = VALUE_V; - Value *arg[3] = {}; - - // parse result type and slot number - if (lex.tok() == TOK_TYPE) { - if (!ParseType(lex, builder, &type)) { - return false; - } - - const char *ident = lex.val().s; - if (ident[0] != '%') { - return false; - } - slot = atoi(&ident[1]); - lex.Next(); - - if (!ParseOperator(lex, builder)) { - return false; - } - } - - // parse op - Op op; - if (!ParseOp(lex, builder, &op)) { - return false; - } - - // parse arguments - for (int i = 0; i < 3; i++) { - ParseValue(lex, builder, &arg[i]); - - if (lex.tok() != TOK_COMMA) { - break; - } - - // eat comma and move onto the next argument - lex.Next(); - } - - // create instruction - Instr *instr = builder.AppendInstr(op, type); - - for (int i = 0; i < 3; i++) { - if (!arg[i]) { - continue; - } - - instr->set_arg(i, arg[i]); - } - - // insert instruction into slot if specified - if (slot != -1) { - slots_.insert(std::make_pair(slot, instr)); - } - - return true; -} diff --git a/src/jit/ir/ir_reader.h b/src/jit/ir/ir_reader.h deleted file mode 100644 index df40908d..00000000 --- a/src/jit/ir/ir_reader.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef IR_READER_H -#define IR_READER_H - -#include -#include -#include "jit/ir/ir_builder.h" - -namespace re { -namespace jit { -namespace ir { - -enum IRToken { - TOK_EOF, - TOK_EOL, - TOK_COMMA, - TOK_OPERATOR, - TOK_TYPE, - TOK_INTEGER, - TOK_IDENTIFIER, -}; - -struct IRLexeme { - char s[128]; - uint64_t i; - ValueType ty; -}; - -class IRLexer { - public: - IRLexer(std::istream &input); - - IRToken tok() const { - return tok_; - } - const IRLexeme &val() const { - return val_; - } - - IRToken Next(); - - private: - char Get(); - void Unget(); - - std::istream &input_; - IRToken tok_; - IRLexeme val_; -}; - -class IRReader { - public: - bool Parse(std::istream &input, IRBuilder &builder); - - private: - bool ParseType(IRLexer &lex, IRBuilder &builder, ValueType *type); - bool ParseOp(IRLexer &lex, IRBuilder &builder, Op *op); - bool ParseValue(IRLexer &lex, IRBuilder &builder, Value **value); - bool ParseOperator(IRLexer &lex, IRBuilder &builder); - bool ParseInstruction(IRLexer &lex, IRBuilder &builder); - - std::unordered_map slots_; -}; -} -} -} - -#endif diff --git a/src/jit/ir/ir_write.c b/src/jit/ir/ir_write.c new file mode 100644 index 00000000..0ba5548c --- /dev/null +++ b/src/jit/ir/ir_write.c @@ -0,0 +1,135 @@ +#include +#include "core/string.h" +#include "jit/ir/ir.h" + +static void ir_write_type(ir_type_t type, FILE *output) { + switch (type) { + case VALUE_I8: + fprintf(output, "i8"); + break; + case VALUE_I16: + fprintf(output, "i16"); + break; + case VALUE_I32: + fprintf(output, "i32"); + break; + case VALUE_I64: + fprintf(output, "i64"); + break; + case VALUE_F32: + fprintf(output, "f32"); + break; + case VALUE_F64: + fprintf(output, "f64"); + break; + case VALUE_V128: + fprintf(output, "v128"); + break; + default: + LOG_FATAL("Unexpected value type"); + break; + } +} + +static void ir_write_op(ir_op_t op, FILE *output) { + const char *name = ir_op_names[op]; + + while (*name) { + fprintf(output, "%c", tolower(*name)); + name++; + } +} + +static void ir_write_value(const ir_value_t *value, FILE *output) { + ir_write_type(value->type, output); + + fprintf(output, " "); + + if (ir_is_constant(value)) { + switch (value->type) { + case VALUE_I8: + // force to int to avoid printing out as a character + fprintf(output, "0x%x", value->i8); + break; + case VALUE_I16: + fprintf(output, "0x%x", value->i16); + break; + case VALUE_I32: + fprintf(output, "0x%x", value->i32); + break; + case VALUE_I64: + fprintf(output, "0x%" PRIx64, value->i64); + break; + case VALUE_F32: { + float v = value->f32; + fprintf(output, "0x%x", *(uint32_t *)&v); + } break; + case VALUE_F64: { + double v = value->f64; + fprintf(output, "0x%" PRIx64, *(uint64_t *)&v); + } break; + default: + LOG_FATAL("Unexpected value type"); + break; + } + } else { + fprintf(output, "%%%d", (int)value->def->tag); + } +} + +static void ir_write_instr(const ir_instr_t *instr, FILE *output) { + // print result value if we have one + if (instr->result) { + ir_write_value(instr->result, output); + fprintf(output, " = "); + } + + // print the actual op + ir_write_op(instr->op, output); + fprintf(output, " "); + + // print each argument + bool need_comma = false; + + for (int i = 0; i < 3; i++) { + const ir_value_t *arg = instr->arg[i]; + + if (!arg) { + continue; + } + + if (need_comma) { + fprintf(output, ", "); + need_comma = false; + } + + ir_write_value(arg, output); + + need_comma = true; + } + + // fprintf(output, "[tag %" PRId64 ", reg %d]", instr->tag, instr->reg); + + fprintf(output, "\n"); +} + +static void ir_assign_slots(ir_t *ir) { + int next_slot = 0; + + list_for_each_entry(instr, &ir->instrs, ir_instr_t, it) { + // don't assign a slot to instructions without a return value + if (!instr->result) { + continue; + } + + instr->tag = next_slot++; + } +} + +void ir_write(ir_t *ir, FILE *output) { + ir_assign_slots(ir); + + list_for_each_entry(instr, &ir->instrs, ir_instr_t, it) { + ir_write_instr(instr, output); + } +} diff --git a/src/jit/ir/ir_writer.cc b/src/jit/ir/ir_writer.cc deleted file mode 100644 index 8ffbf3ac..00000000 --- a/src/jit/ir/ir_writer.cc +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include "jit/ir/ir_writer.h" - -using namespace re::jit; -using namespace re::jit::ir; - -void IRWriter::Print(const IRBuilder &builder, std::ostream &output) { - slots_.clear(); - next_slot_ = 0; - - for (auto instr : builder.instrs()) { - PrintInstruction(instr, output); - } -} - -void IRWriter::PrintType(ValueType type, std::ostream &output) const { - switch (type) { - case VALUE_I8: - output << "i8"; - break; - case VALUE_I16: - output << "i16"; - break; - case VALUE_I32: - output << "i32"; - break; - case VALUE_I64: - output << "i64"; - break; - case VALUE_F32: - output << "f32"; - break; - case VALUE_F64: - output << "f64"; - break; - case VALUE_V128: - output << "v128"; - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } -} - -void IRWriter::PrintOp(Op op, std::ostream &output) const { - const char *name = Opnames[op]; - - while (*name) { - output << static_cast(tolower(*name)); - name++; - } -} - -void IRWriter::PrintValue(const Value *value, std::ostream &output) { - PrintType(value->type(), output); - - output << " "; - - if (value->constant()) { - switch (value->type()) { - case VALUE_I8: - // force to int to avoid printing out as a character - output << "0x" << std::hex << static_cast(value->i8()) << std::dec; - break; - case VALUE_I16: - output << "0x" << std::hex << value->i16() << std::dec; - break; - case VALUE_I32: - output << "0x" << std::hex << value->i32() << std::dec; - break; - case VALUE_I64: - output << "0x" << std::hex << value->i64() << std::dec; - break; - case VALUE_F32: - output << "0x" << std::hex << value->f32() << std::dec; - break; - case VALUE_F64: - output << "0x" << std::hex << value->f64() << std::dec; - break; - default: - LOG_FATAL("Unexpected value type"); - break; - } - } else { - uintptr_t key = reinterpret_cast(value); - auto it = slots_.find(key); - - if (it == slots_.end()) { - auto res = slots_.insert(std::make_pair(key, next_slot_++)); - it = res.first; - } - - output << "%" << it->second; - } -} - -void IRWriter::PrintInstruction(const Instr *instr, std::ostream &output) { - // print result value if we have one - if (instr->type() != VALUE_V) { - PrintValue(instr, output); - output << " = "; - } - - // print the actual op - PrintOp(instr->op(), output); - output << " "; - - // print each argument - bool need_comma = false; - - for (int i = 0; i < 3; i++) { - if (!instr->arg(i)) { - continue; - } - - if (need_comma) { - output << ", "; - need_comma = false; - } - - PrintValue(instr->arg(i), output); - - need_comma = true; - } - - // output << " [tag " << instr->tag() << ", reg " << instr->reg() << "]"; - - output << std::endl; -} diff --git a/src/jit/ir/ir_writer.h b/src/jit/ir/ir_writer.h deleted file mode 100644 index 5547d423..00000000 --- a/src/jit/ir/ir_writer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef IR_WRITER_H -#define IR_WRITER_H - -#include -#include -#include "jit/ir/ir_builder.h" - -namespace re { -namespace jit { -namespace ir { - -class IRWriter { - public: - void Print(const IRBuilder &builder, std::ostream &output); - - private: - void PrintType(ValueType type, std::ostream &output) const; - void PrintOp(Op op, std::ostream &output) const; - void PrintValue(const Value *value, std::ostream &output); - void PrintInstruction(const Instr *instr, std::ostream &output); - - std::unordered_map slots_; - int next_slot_; -}; -} -} -} - -#endif diff --git a/src/jit/ir/passes/constant_propagation_pass.cc b/src/jit/ir/passes/constant_propagation_pass.cc index cba68aaf..481aab16 100644 --- a/src/jit/ir/passes/constant_propagation_pass.cc +++ b/src/jit/ir/passes/constant_propagation_pass.cc @@ -2,9 +2,6 @@ #include #include "jit/ir/passes/constant_propagation_pass.h" -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - typedef void (*FoldFn)(IRBuilder &, Instr *i); // specify which arguments must be constant in order for fold operation to run @@ -28,43 +25,44 @@ int fold_masks[NUM_OPS]; // declare a templated callback for an IR operation. note, declaring a // callback does not actually register it. callbacks must be registered // for a particular signature with REGISTER_FOLD -#define FOLD(op, mask) \ - static struct _##op##_init { \ - _##op##_init() { \ - fold_masks[OP_##op] = mask; \ - } \ - } op##_init; \ - template , typename A0 = ValueInfo, \ - typename A1 = ValueInfo> \ - void Handle##op(IRBuilder &builder, Instr *instr) +#define FOLD(op, mask) \ + static struct _##op##_init { \ + _##op##_init() { \ + fold_masks[OP_##op] = mask; \ + } \ + } op##_init; \ + template , \ + typename A0 = ir_value_tInfo, \ + typename A1 = ir_value_tInfo> \ + void Handle##op(ir_t *ir, Instr *instr) // registers a fold callback for the specified signature -#define REGISTER_FOLD(op, r, a0, a1) \ - static struct _cpp_##op##_##r##_##a0##_##a1##_init { \ - _cpp_##op##_##r##_##a0##_##a1##_init() { \ - fold_cbs[CALLBACK_IDX(OP_##op, VALUE_##r, VALUE_##a0, VALUE_##a1)] = \ - &Handle##op, ValueInfo, \ - ValueInfo>; \ - } \ +#define REGISTER_FOLD(op, r, a0, a1) \ + static struct _cpp_##op##_##r##_##a0##_##a1##_init { \ + _cpp_##op##_##r##_##a0##_##a1##_init() { \ + fold_cbs[CALLBACK_IDX(OP_##op, VALUE_##r, VALUE_##a0, VALUE_##a1)] = \ + &Handle##op, ir_value_tInfo, \ + ir_value_tInfo>; \ + } \ } cpp_##op##_##r##_##a0##_##a1##_init // common helpers for fold functions -#define ARG0() (instr->arg0()->*A0::fn)() -#define ARG1() (instr->arg1()->*A1::fn)() -#define ARG2() (instr->arg2()->*A1::fn)() +#define ARG0() (instr->arg[0]->*A0::fn)() +#define ARG1() (instr->arg[1]->*A1::fn)() +#define ARG2() (instr->arg[2]->*A1::fn)() #define ARG0_UNSIGNED() static_cast(ARG0()) #define ARG1_UNSIGNED() static_cast(ARG1()) #define ARG2_UNSIGNED() static_cast(ARG2()) -#define RESULT(expr) \ - instr->ReplaceRefsWith( \ - builder.AllocConstant(static_cast(expr))); \ - builder.RemoveInstr(instr) +#define RESULT(expr) \ + ir_replace_uses(instr, ir_alloc_constant( \ + ir, static_cast(expr))); \ + ir_remove_instr(instr) static FoldFn GetFoldFn(Instr *instr) { auto it = fold_cbs.find( - CALLBACK_IDX(instr->op(), instr->type(), - instr->arg0() ? (int)instr->arg0()->type() : VALUE_V, - instr->arg1() ? (int)instr->arg1()->type() : VALUE_V)); + CALLBACK_IDX(instr->op, instr->type, + instr->arg[0] ? (int)instr->arg[0]->type : VALUE_V, + instr->arg[1] ? (int)instr->arg[1]->type : VALUE_V)); if (it == fold_cbs.end()) { return nullptr; } @@ -72,34 +70,29 @@ static FoldFn GetFoldFn(Instr *instr) { } static int GetFoldMask(Instr *instr) { - return fold_masks[instr->op()]; + return fold_masks[instr->op]; } static int GetConstantSig(Instr *instr) { int cnst_sig = 0; - if (instr->arg0() && instr->arg0()->constant()) { + if (instr->arg[0] && ir_is_constant(instr->arg[0])) { cnst_sig |= ARG0_CNST; } - if (instr->arg1() && instr->arg1()->constant()) { + if (instr->arg[1] && ir_is_constant(instr->arg[1])) { cnst_sig |= ARG1_CNST; } - if (instr->arg2() && instr->arg2()->constant()) { + if (instr->arg[2] && ir_is_constant(instr->arg[2])) { cnst_sig |= ARG2_CNST; } return cnst_sig; } -void ConstantPropagationPass::Run(IRBuilder &builder) { - auto it = builder.instrs().begin(); - auto end = builder.instrs().end(); - - while (it != end) { - Instr *instr = *(it++); - +void ConstantPropagationPass::Run(ir_t *ir) { + list_for_each_entry_safe(instr, &ir->instrs, ir_instr_t, it) { int fold_mask = GetFoldMask(instr); int cnst_sig = GetConstantSig(instr); if (!fold_mask || (cnst_sig & fold_mask) != fold_mask) { @@ -116,8 +109,8 @@ void ConstantPropagationPass::Run(IRBuilder &builder) { } FOLD(SELECT, ARG0_CNST) { - instr->ReplaceRefsWith(ARG0() ? instr->arg1() : instr->arg2()); - builder.RemoveInstr(instr); + ir_replace_uses(instr, ARG0() ? instr->arg[1] : instr->arg[2]); + ir_remove_instr(ir, instr); } REGISTER_FOLD(SELECT, I8, I8, I8); REGISTER_FOLD(SELECT, I16, I16, I16); diff --git a/src/jit/ir/passes/constant_propagation_pass.h b/src/jit/ir/passes/constant_propagation_pass.h index 5cfe39e7..c8736585 100644 --- a/src/jit/ir/passes/constant_propagation_pass.h +++ b/src/jit/ir/passes/constant_propagation_pass.h @@ -3,11 +3,6 @@ #include "jit/ir/passes/pass_runner.h" -namespace re { -namespace jit { -namespace ir { -namespace passes { - class ConstantPropagationPass : public Pass { public: static const char *NAME = "constprop"; @@ -16,11 +11,7 @@ class ConstantPropagationPass : public Pass { return NAME; } - void Run(IRBuilder &builder); + void Run(struct ir_s *ir); }; -} -} -} -} #endif diff --git a/src/jit/ir/passes/conversion_elimination_pass.c b/src/jit/ir/passes/conversion_elimination_pass.c new file mode 100644 index 00000000..a7789a81 --- /dev/null +++ b/src/jit/ir/passes/conversion_elimination_pass.c @@ -0,0 +1,66 @@ +#include "jit/ir/passes/conversion_elimination_pass.h" +#include "jit/ir/passes/pass_stat.h" +#include "jit/ir/ir.h" + +DEFINE_STAT(num_sext_removed, "Number of sext eliminated"); +DEFINE_STAT(num_zext_removed, "Number of zext eliminated"); +DEFINE_STAT(num_trunc_removed, "Number of trunc eliminated"); + +const char *cve_name = "cve"; + +void cve_run(ir_t *ir) { + list_for_each_entry_safe(instr, &ir->instrs, ir_instr_t, it) { + // eliminate unnecessary sext / zext operations + if (instr->op == OP_LOAD_HOST || instr->op == OP_LOAD_FAST || + instr->op == OP_LOAD_SLOW || instr->op == OP_LOAD_CONTEXT) { + ir_type_t memory_type = VALUE_V; + bool same_type = true; + bool all_sext = true; + bool all_zext = true; + + list_for_each_entry(use, &instr->result->uses, ir_use_t, it) { + ir_instr_t *use_instr = use->instr; + ir_value_t *use_result = use_instr->result; + + if (use_instr->op == OP_SEXT || use_instr->op == OP_ZEXT) { + if (memory_type == VALUE_V) { + memory_type = use_result->type; + } + + if (memory_type != use_result->type) { + same_type = false; + } + } + + if (use_instr->op != OP_SEXT) { + all_sext = false; + } + + if (use_instr->op != OP_ZEXT) { + all_zext = false; + } + } + + if (same_type && all_sext) { + // TODO implement + + STAT_num_sext_removed++; + } else if (same_type && all_zext) { + // TODO implement + + STAT_num_zext_removed++; + } + } else if (instr->op == OP_STORE_HOST || instr->op == OP_STORE_FAST || + instr->op == OP_STORE_SLOW || instr->op == OP_STORE_CONTEXT) { + ir_value_t *store_value = instr->arg[1]; + + if (store_value->def && store_value->def->op == OP_TRUNC) { + // TODO implement + + // note, don't actually remove the truncation as other values may + // reference it. let DCE clean it up + STAT_num_trunc_removed++; + } + } + } +} diff --git a/src/jit/ir/passes/conversion_elimination_pass.cc b/src/jit/ir/passes/conversion_elimination_pass.cc deleted file mode 100644 index 1d547a6e..00000000 --- a/src/jit/ir/passes/conversion_elimination_pass.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include "jit/ir/passes/conversion_elimination_pass.h" - -using namespace re::jit::backend; -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - -DEFINE_STAT(num_sext_removed, "Number of sext eliminated"); -DEFINE_STAT(num_zext_removed, "Number of zext eliminated"); -DEFINE_STAT(num_trunc_removed, "Number of trunc eliminated"); - -void ConversionEliminationPass::Run(IRBuilder &builder) { - auto it = builder.instrs().begin(); - auto end = builder.instrs().end(); - - while (it != end) { - Instr *instr = *(it++); - - // eliminate unnecessary sext / zext operations - if (instr->op() == OP_LOAD_HOST || instr->op() == OP_LOAD_FAST || - instr->op() == OP_LOAD_SLOW || instr->op() == OP_LOAD_CONTEXT) { - ValueType memory_type = VALUE_V; - bool same_type = true; - bool all_sext = true; - bool all_zext = true; - - for (auto use : instr->uses()) { - Instr *use_instr = use->instr(); - - if (use_instr->op() == OP_SEXT || use_instr->op() == OP_ZEXT) { - if (memory_type == VALUE_V) { - memory_type = use_instr->type(); - } - - if (memory_type != use_instr->type()) { - same_type = false; - } - } - - if (use_instr->op() != OP_SEXT) { - all_sext = false; - } - - if (use_instr->op() != OP_ZEXT) { - all_zext = false; - } - } - - if (same_type && all_sext) { - // TODO implement - - num_sext_removed++; - } else if (same_type && all_zext) { - // TODO implement - - num_zext_removed++; - } - } else if (instr->op() == OP_STORE_HOST || instr->op() == OP_STORE_FAST || - instr->op() == OP_STORE_SLOW || - instr->op() == OP_STORE_CONTEXT) { - Value *store_value = instr->arg1(); - - if (!store_value->constant()) { - Instr *def = store_value->def(); - - if (def->op() == OP_TRUNC) { - // TODO implement - - // note, don't actually remove the truncation as other values may - // reference it. let DCE clean it up - num_trunc_removed++; - } - } - } - } -} diff --git a/src/jit/ir/passes/conversion_elimination_pass.h b/src/jit/ir/passes/conversion_elimination_pass.h index ad25a2e8..231bd9fe 100644 --- a/src/jit/ir/passes/conversion_elimination_pass.h +++ b/src/jit/ir/passes/conversion_elimination_pass.h @@ -1,27 +1,18 @@ #ifndef CONVERSION_ELIMINATION_PASS_H #define CONVERSION_ELIMINATION_PASS_H -#include "jit/backend/backend.h" -#include "jit/ir/passes/pass_runner.h" +#ifdef __cplusplus +extern "C" { +#endif -namespace re { -namespace jit { -namespace ir { -namespace passes { +struct ir_s; -class ConversionEliminationPass : public Pass { - public: - static constexpr const char *NAME = "cve"; +extern const char *cve_name; - const char *name() { - return NAME; - } +void cve_run(struct ir_s *ir); - void Run(IRBuilder &builder); -}; -} -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/ir/passes/dead_code_elimination_pass.c b/src/jit/ir/passes/dead_code_elimination_pass.c new file mode 100644 index 00000000..d2bfd3b1 --- /dev/null +++ b/src/jit/ir/passes/dead_code_elimination_pass.c @@ -0,0 +1,23 @@ +#include "jit/ir/passes/dead_code_elimination_pass.h" +#include "jit/ir/passes/pass_stat.h" +#include "jit/ir/ir.h" + +DEFINE_STAT(num_dead_removed, "Number of dead instructions eliminated"); + +void dce_run(ir_t *ir) { + // iterate in reverse in order to remove groups of dead instructions that + // only use eachother + list_for_each_entry_safe_reverse(instr, &ir->instrs, ir_instr_t, it) { + ir_value_t *result = instr->result; + + if (!result) { + continue; + } + + if (list_empty(&result->uses)) { + ir_remove_instr(ir, instr); + + STAT_num_dead_removed++; + } + } +} diff --git a/src/jit/ir/passes/dead_code_elimination_pass.cc b/src/jit/ir/passes/dead_code_elimination_pass.cc deleted file mode 100644 index 0c7ee694..00000000 --- a/src/jit/ir/passes/dead_code_elimination_pass.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include "jit/ir/passes/dead_code_elimination_pass.h" - -using namespace re::jit::backend; -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - -DEFINE_STAT(num_dead_removed, "Number of dead instructions eliminated"); - -void DeadCodeEliminationPass::Run(IRBuilder &builder) { - // iterate in reverse in order to remove groups of dead instructions that - // only use eachother - auto it = builder.instrs().rbegin(); - auto end = builder.instrs().rend(); - - while (it != end) { - Instr *instr = *(it++); - - if (instr->type() == VALUE_V) { - continue; - } - - if (!instr->uses().head()) { - builder.RemoveInstr(instr); - - num_dead_removed++; - } - } -} diff --git a/src/jit/ir/passes/dead_code_elimination_pass.h b/src/jit/ir/passes/dead_code_elimination_pass.h index 69265fbd..6fc0a467 100644 --- a/src/jit/ir/passes/dead_code_elimination_pass.h +++ b/src/jit/ir/passes/dead_code_elimination_pass.h @@ -1,27 +1,18 @@ #ifndef DEAD_CODE_ELIMINATION_PASS_H #define DEAD_CODE_ELIMINATION_PASS_H -#include "jit/backend/backend.h" -#include "jit/ir/passes/pass_runner.h" +#ifdef __cplusplus +extern "C" { +#endif -namespace re { -namespace jit { -namespace ir { -namespace passes { +struct ir_s; -class DeadCodeEliminationPass : public Pass { - public: - static constexpr const char *NAME = "dce"; +extern const char *dce_name; - const char *name() { - return NAME; - } +void dce_run(struct ir_s *ir); - void Run(IRBuilder &builder); -}; -} -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/ir/passes/load_store_elimination_pass.c b/src/jit/ir/passes/load_store_elimination_pass.c new file mode 100644 index 00000000..d794fad6 --- /dev/null +++ b/src/jit/ir/passes/load_store_elimination_pass.c @@ -0,0 +1,142 @@ +#include "jit/ir/passes/load_store_elimination_pass.h" +#include "jit/ir/passes/pass_stat.h" +#include "jit/ir/ir.h" + +DEFINE_STAT(num_loads_removed, "Number of loads eliminated"); +DEFINE_STAT(num_stores_removed, "Number of stores eliminated"); + +const char *lse_name = "lse"; + +static const int MAX_OFFSET = 512; + +typedef struct { + int offset; + ir_value_t *value; +} available_t; + +typedef struct { available_t available[MAX_OFFSET]; } lse_t; + +static void lse_clear_available(lse_t *lse) { + memset(lse->available, 0, sizeof(lse->available)); +} + +static ir_value_t *lse_get_available(lse_t *lse, int offset) { + CHECK_LT(offset, MAX_OFFSET); + + available_t *entry = &lse->available[offset]; + + // entries are added for the entire range of an available value to help with + // invalidation. if this entry doesn't start at the requested offset, it's + // not actually valid for reuse + if (entry->offset != offset) { + return NULL; + } + + return entry->value; +} + +static void lse_erase_available(lse_t *lse, int offset, int size) { + int begin = offset; + int end = offset + size - 1; + + // if the invalidation range intersects with an entry, merge that entry into + // the invalidation range + available_t *begin_entry = &lse->available[begin]; + available_t *end_entry = &lse->available[end]; + + if (begin_entry->value) { + begin = begin_entry->offset; + } + + if (end_entry->value) { + end = end_entry->offset + ir_type_size(end_entry->value->type) - 1; + } + + for (; begin <= end; begin++) { + available_t *entry = &lse->available[begin]; + entry->offset = 0; + entry->value = NULL; + } +} + +static void lse_set_available(lse_t *lse, int offset, ir_value_t *v) { + int size = ir_type_size(v->type); + int begin = offset; + int end = offset + size - 1; + + lse_erase_available(lse, offset, size); + + // add entries for the entire range to aid in invalidation. only the initial + // entry where offset == entry.offset is valid for reuse + for (; begin <= end; begin++) { + available_t *entry = &lse->available[begin]; + entry->offset = offset; + entry->value = v; + } +} + +void lse_run(ir_t *ir) { + lse_t lse; + + // eliminate redundant loads + { + lse_clear_available(&lse); + + list_for_each_entry_safe(instr, &ir->instrs, ir_instr_t, it) { + if (instr->op == OP_LOAD_CONTEXT) { + // if there is already a value available for this offset, reuse it and + // remove this redundant load + int offset = instr->arg[0]->i32; + ir_value_t *available = lse_get_available(&lse, offset); + + if (available && available->type == instr->result->type) { + ir_replace_uses(instr->result, available); + ir_remove_instr(ir, instr); + + STAT_num_loads_removed++; + + continue; + } + + lse_set_available(&lse, offset, instr->result); + } else if (instr->op == OP_STORE_CONTEXT) { + int offset = instr->arg[0]->i32; + + // mark the value being stored as available + lse_set_available(&lse, offset, instr->arg[1]); + } + } + } + + // eliminate dead stores + { + // iterate in reverse so the current instruction is the one being removed + lse_clear_available(&lse); + + list_for_each_entry_safe_reverse(instr, &ir->instrs, ir_instr_t, it) { + if (instr->op == OP_LOAD_CONTEXT) { + int offset = instr->arg[0]->i32; + int size = ir_type_size(instr->result->type); + + lse_erase_available(&lse, offset, size); + } else if (instr->op == OP_STORE_CONTEXT) { + // if subsequent stores have been made for this offset that would + // overwrite it completely, mark instruction as dead + int offset = instr->arg[0]->i32; + ir_value_t *available = lse_get_available(&lse, offset); + int available_size = available ? ir_type_size(available->type) : 0; + int store_size = ir_type_size(instr->arg[1]->type); + + if (available_size >= store_size) { + ir_remove_instr(ir, instr); + + STAT_num_stores_removed++; + + continue; + } + + lse_set_available(&lse, offset, instr->arg[1]); + } + } + } +} diff --git a/src/jit/ir/passes/load_store_elimination_pass.cc b/src/jit/ir/passes/load_store_elimination_pass.cc deleted file mode 100644 index e2c0699b..00000000 --- a/src/jit/ir/passes/load_store_elimination_pass.cc +++ /dev/null @@ -1,176 +0,0 @@ -#include "core/memory.h" -#include "jit/ir/passes/load_store_elimination_pass.h" - -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - -DEFINE_STAT(num_loads_removed, "Number of loads eliminated"); -DEFINE_STAT(num_stores_removed, "Number of stores eliminated"); - -LoadStoreEliminationPass::LoadStoreEliminationPass() - : available_(nullptr), num_available_(0) {} - -void LoadStoreEliminationPass::Run(IRBuilder &builder) { - Reset(); - - // eliminate redundant loads - { - auto it = builder.instrs().begin(); - auto end = builder.instrs().end(); - - ClearAvailable(); - - while (it != end) { - Instr *instr = *(it++); - - if (instr->op() == OP_LOAD_CONTEXT) { - // if there is already a value available for this offset, reuse it and - // remove this redundant load - int offset = instr->arg0()->i32(); - Value *available = GetAvailable(offset); - - if (available && available->type() == instr->type()) { - instr->ReplaceRefsWith(available); - builder.RemoveInstr(instr); - - num_loads_removed++; - - continue; - } - - SetAvailable(offset, instr); - } else if (instr->op() == OP_STORE_CONTEXT) { - int offset = instr->arg0()->i32(); - - // mark the value being stored as available - SetAvailable(offset, instr->arg1()); - } - } - } - - // eliminate dead stores - { - // iterate in reverse so the current instruction is the one being removed - auto it = builder.instrs().rbegin(); - auto end = builder.instrs().rend(); - - ClearAvailable(); - - while (it != end) { - Instr *instr = *(it++); - - if (instr->op() == OP_LOAD_CONTEXT) { - int offset = instr->arg0()->i32(); - int size = SizeForType(instr->type()); - - EraseAvailable(offset, size); - } else if (instr->op() == OP_STORE_CONTEXT) { - // if subsequent stores have been made for this offset that would - // overwrite it completely, mark instruction as dead - int offset = instr->arg0()->i32(); - Value *available = GetAvailable(offset); - int available_size = available ? SizeForType(available->type()) : 0; - int store_size = SizeForType(instr->arg1()->type()); - - if (available_size >= store_size) { - builder.RemoveInstr(instr); - - num_stores_removed++; - - continue; - } - - SetAvailable(offset, instr->arg1()); - } - } - } -} - -void LoadStoreEliminationPass::Reset() { - ClearAvailable(); -} - -void LoadStoreEliminationPass::Reserve(int offset) { - int reserve = offset + 1; - - if (reserve <= num_available_) { - return; - } - - // resize availability array to hold new entry - available_ = reinterpret_cast( - realloc(available_, reserve * sizeof(AvailableEntry))); - - // memset the newly allocated entries - memset(available_ + num_available_, 0, - (reserve - num_available_) * sizeof(AvailableEntry)); - - num_available_ = reserve; -} - -void LoadStoreEliminationPass::ClearAvailable() { - if (!available_) { - return; - } - - memset(available_, 0, num_available_ * sizeof(AvailableEntry)); -} - -Value *LoadStoreEliminationPass::GetAvailable(int offset) { - Reserve(offset); - - AvailableEntry &entry = available_[offset]; - - // entries are added for the entire range of an available value to help with - // invalidation. if this entry doesn't start at the requested offset, it's - // not actually valid for reuse - if (entry.offset != offset) { - return nullptr; - } - - return entry.value; -} - -void LoadStoreEliminationPass::EraseAvailable(int offset, int size) { - int begin = offset; - int end = offset + size - 1; - - Reserve(end); - - // if the invalidation range intersects with an entry, merge that entry into - // the invalidation range - AvailableEntry &begin_entry = available_[begin]; - AvailableEntry &end_entry = available_[end]; - - if (begin_entry.value) { - begin = begin_entry.offset; - } - - if (end_entry.value) { - end = end_entry.offset + SizeForType(end_entry.value->type()) - 1; - } - - for (; begin <= end; begin++) { - AvailableEntry &entry = available_[begin]; - entry.offset = 0; - entry.value = nullptr; - } -} - -void LoadStoreEliminationPass::SetAvailable(int offset, Value *v) { - int size = SizeForType(v->type()); - int begin = offset; - int end = offset + size - 1; - - Reserve(end); - - EraseAvailable(offset, size); - - // add entries for the entire range to aid in invalidation. only the initial - // entry where offset == entry.offset is valid for reuse - for (; begin <= end; begin++) { - AvailableEntry &entry = available_[begin]; - entry.offset = offset; - entry.value = v; - } -} diff --git a/src/jit/ir/passes/load_store_elimination_pass.h b/src/jit/ir/passes/load_store_elimination_pass.h index 695e3ac9..164e87da 100644 --- a/src/jit/ir/passes/load_store_elimination_pass.h +++ b/src/jit/ir/passes/load_store_elimination_pass.h @@ -1,45 +1,18 @@ #ifndef LOAD_STORE_ELIMINATION_PASS_H #define LOAD_STORE_ELIMINATION_PASS_H -#include "jit/ir/passes/pass_runner.h" +#ifdef __cplusplus +extern "C" { +#endif -namespace re { -namespace jit { -namespace ir { -namespace passes { +struct ir_s; -struct AvailableEntry { - int offset; - Value *value; -}; +extern const char *lse_name; -class LoadStoreEliminationPass : public Pass { - public: - static constexpr const char *NAME = "lse"; +void lse_run(struct ir_s *ir); - LoadStoreEliminationPass(); - - const char *name() { - return NAME; - } - - void Run(IRBuilder &builder); - - private: - void Reset(); - - void Reserve(int offset); - void ClearAvailable(); - void EraseAvailable(int offset, int size); - Value *GetAvailable(int offset); - void SetAvailable(int offset, Value *v); - - AvailableEntry *available_; - int num_available_; -}; -} -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/jit/ir/passes/pass_runner.cc b/src/jit/ir/passes/pass_runner.cc deleted file mode 100644 index 448abcdf..00000000 --- a/src/jit/ir/passes/pass_runner.cc +++ /dev/null @@ -1,22 +0,0 @@ -#include "core/profiler.h" -#include "jit/ir/ir_builder.h" -#include "jit/ir/passes/pass_runner.h" - -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - -PassRunner::PassRunner() {} - -void PassRunner::AddPass(std::unique_ptr pass) { - passes_.push_back(std::move(pass)); -} - -void PassRunner::Run(IRBuilder &builder) { - // PROFILER_RUNTIME("PassRunner::Run"); - - for (auto &pass : passes_) { - // PROFILER_RUNTIME(pass->name()); - - pass->Run(builder); - } -} diff --git a/src/jit/ir/passes/pass_runner.h b/src/jit/ir/passes/pass_runner.h deleted file mode 100644 index a08d50a4..00000000 --- a/src/jit/ir/passes/pass_runner.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef PASS_RUNNER_H -#define PASS_RUNNER_H - -#include -#include -#include "jit/ir/passes/pass_stats.h" -#include "jit/ir/ir_builder.h" - -namespace re { -namespace jit { -namespace ir { -namespace passes { - -class Pass { - public: - virtual ~Pass() {} - - virtual const char *name() = 0; - - virtual void Run(IRBuilder &builder) = 0; -}; - -class PassRunner { - public: - PassRunner(); - - void AddPass(std::unique_ptr pass); - void Run(IRBuilder &builder); - - private: - std::vector> passes_; -}; -} -} -} -} - -#endif diff --git a/src/jit/ir/passes/pass_stat.c b/src/jit/ir/passes/pass_stat.c new file mode 100644 index 00000000..52f44d42 --- /dev/null +++ b/src/jit/ir/passes/pass_stat.c @@ -0,0 +1,30 @@ +#include "core/assert.h" +#include "core/math.h" +#include "core/string.h" +#include "jit/ir/passes/pass_stat.h" + +static list_t s_stats; + +void pass_stat_register(pass_stat_t *stat) { + list_add(&s_stats, &stat->it); +} + +void pass_stat_unregister(pass_stat_t *stat) { + list_remove(&s_stats, &stat->it); +} + +void pass_stat_print_all() { + LOG_INFO("===-----------------------------------------------------==="); + LOG_INFO("Pass stats"); + LOG_INFO("===-----------------------------------------------------==="); + + int w = 0; + list_for_each_entry(stat, &s_stats, pass_stat_t, it) { + int l = (int)strlen(stat->desc); + w = MAX(l, w); + } + + list_for_each_entry(stat, &s_stats, pass_stat_t, it) { + LOG_INFO("%-*s %d", w, stat->desc, stat->n); + } +} diff --git a/src/jit/ir/passes/pass_stat.h b/src/jit/ir/passes/pass_stat.h new file mode 100644 index 00000000..3f2b1bac --- /dev/null +++ b/src/jit/ir/passes/pass_stat.h @@ -0,0 +1,36 @@ +#ifndef PASS_STATS_H +#define PASS_STATS_H + +#include "core/constructor.h" +#include "core/list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEFINE_STAT(name, desc) \ + static int STAT_##name; \ + static pass_stat_t STAT_T_##name = {#name, desc, &STAT_##name, {}}; \ + CONSTRUCTOR(STAT_REGISTER_##name) { \ + pass_stat_register(&STAT_T_##name); \ + } \ + DESTRUCTOR(STAT_UNREGISTER_##name) { \ + pass_stat_unregister(&STAT_T_##name); \ + } + +typedef struct { + const char *name; + const char *desc; + int *n; + list_node_t it; +} pass_stat_t; + +void pass_stat_register(pass_stat_t *stat); +void pass_stat_unregister(pass_stat_t *stat); +void pass_stat_print_all(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/jit/ir/passes/pass_stats.cc b/src/jit/ir/passes/pass_stats.cc deleted file mode 100644 index 8b30b412..00000000 --- a/src/jit/ir/passes/pass_stats.cc +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include "core/assert.h" -#include "core/string.h" -#include "jit/ir/passes/pass_stats.h" - -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - -namespace re { -namespace jit { -namespace ir { -namespace passes { - -static Stat *s_head_stat; - -static void RegisterStat(Stat *stat) { - stat->next = s_head_stat; - s_head_stat = stat; -} - -static void UnregisterStat(Stat *stat) { - Stat **tmp = &s_head_stat; - - while (*tmp) { - Stat **next = &(*tmp)->next; - - if (*tmp == stat) { - *tmp = *next; - break; - } - - tmp = next; - } -} - -Stat::Stat(const char *desc) : desc(desc), n(0), next(nullptr) { - RegisterStat(this); -} - -Stat::~Stat() { - UnregisterStat(this); -} - -void DumpStats() { - LOG_INFO("===-----------------------------------------------------==="); - LOG_INFO("Pass stats"); - LOG_INFO("===-----------------------------------------------------==="); - - int w = 0; - Stat *stat = s_head_stat; - while (stat) { - int l = static_cast(strlen(stat->desc)); - w = std::max(l, w); - stat = stat->next; - } - - stat = s_head_stat; - while (stat) { - LOG_INFO("%-*s %d", w, stat->desc, stat->n); - stat = stat->next; - } -} -} -} -} -} diff --git a/src/jit/ir/passes/pass_stats.h b/src/jit/ir/passes/pass_stats.h deleted file mode 100644 index c80aa1c7..00000000 --- a/src/jit/ir/passes/pass_stats.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef PASS_STATS_H -#define PASS_STATS_H - -namespace re { -namespace jit { -namespace ir { -namespace passes { - -#define DEFINE_STAT(name, desc) static Stat name(desc); - -struct Stat { - Stat(const char *desc); - ~Stat(); - - const char *desc; - int n; - Stat *next; - - operator int() const { - return n; - } - - const Stat &operator=(int v) { - n = v; - return *this; - } - - const Stat &operator++(int v) { - n++; - return *this; - } - - const Stat &operator+=(int v) { - n += v; - return *this; - } - - const Stat &operator--() { - n--; - return *this; - } - - const Stat &operator-=(int v) { - n -= v; - return *this; - } -}; - -void DumpStats(); -} -} -} -} - -#endif diff --git a/src/jit/ir/passes/register_allocation_pass.c b/src/jit/ir/passes/register_allocation_pass.c new file mode 100644 index 00000000..6174485d --- /dev/null +++ b/src/jit/ir/passes/register_allocation_pass.c @@ -0,0 +1,387 @@ +#include "core/mm_heap.h" +#include "jit/backend/backend.h" +#include "jit/ir/passes/pass_stat.h" +#include "jit/ir/passes/register_allocation_pass.h" +#include "jit/ir/ir.h" + +DEFINE_STAT(num_spills, "Number of registers spilled"); + +static const int MAX_REGISTERS = 32; + +typedef struct { + ir_instr_t *instr; + ir_instr_t *reused; + ir_use_t *start; + ir_use_t *end; + ir_use_t *next; + int reg; +} interval_t; + +typedef struct { + int free_regs[MAX_REGISTERS]; + int num_free_regs; + + interval_t *live_intervals[MAX_REGISTERS]; + int num_live_intervals; +} register_set_t; + +typedef struct { + // canonical backend register information + const register_def_t *registers; + int num_registers; + + // allocation state + register_set_t int_registers; + register_set_t float_registers; + register_set_t vector_registers; + + // intervals, keyed by register + interval_t intervals[MAX_REGISTERS]; +} ra_t; + +static int ra_get_ordinal(const ir_instr_t *i) { + return (int)i->tag; +} + +static void ra_set_ordinal(ir_instr_t *i, int ordinal) { + i->tag = (intptr_t)ordinal; +} + +static int ra_pop_register(register_set_t *set) { + if (!set->num_free_regs) { + return NO_REGISTER; + } + return set->free_regs[--set->num_free_regs]; +} + +static void ra_push_register(register_set_t *set, int reg) { + set->free_regs[set->num_free_regs++] = reg; +} + +static bool ra_interval_cmp(const interval_t *lhs, const interval_t *rhs) { + return !lhs->next || + ra_get_ordinal(lhs->next->instr) < ra_get_ordinal(rhs->next->instr); +}; + +static interval_t *ra_head_interval(register_set_t *set) { + if (!set->num_live_intervals) { + return NULL; + } + + mm_type *it = mm_find_min((mm_type *)set->live_intervals, + set->num_live_intervals, (mm_cmp)&ra_interval_cmp); + return *it; +} + +static interval_t *ra_tail_interval(register_set_t *set) { + if (!set->num_live_intervals) { + return NULL; + } + + mm_type *it = mm_find_max((mm_type *)set->live_intervals, + set->num_live_intervals, (mm_cmp)&ra_interval_cmp); + return *it; +} + +static void ra_pop_head_interval(register_set_t *set) { + mm_pop_min((mm_type *)set->live_intervals, set->num_live_intervals, + (mm_cmp)&ra_interval_cmp); + set->num_live_intervals--; +} + +static void ra_pop_tail_interval(register_set_t *set) { + mm_pop_max((mm_type *)set->live_intervals, set->num_live_intervals, + (mm_cmp)&ra_interval_cmp); + set->num_live_intervals--; +} + +static void ra_insert_interval(register_set_t *set, interval_t *interval) { + set->live_intervals[set->num_live_intervals++] = interval; + mm_push((mm_type *)set->live_intervals, set->num_live_intervals, + (mm_cmp)&ra_interval_cmp); +} + +static register_set_t *ra_get_register_set(ra_t *ra, ir_type_t type) { + if (is_is_int(type)) { + return &ra->int_registers; + } + + if (ir_is_float(type)) { + return &ra->float_registers; + } + + if (ir_is_vector(type)) { + return &ra->vector_registers; + } + + LOG_FATAL("Unexpected value type"); +} + +static int ra_alloc_blocked_register(ra_t *ra, ir_t *ir, ir_instr_t *instr) { + ir_instr_t *insert_point = ir->current_instr; + register_set_t *set = ra_get_register_set(ra, instr->result->type); + + // spill the register who's next use is furthest away from start + interval_t *interval = ra_tail_interval(set); + ra_pop_tail_interval(set); + + // the interval's value needs to be filled back from from the stack before + // its next use + ir_use_t *next_use = interval->next; + ir_use_t *prev_use = list_prev_entry(next_use, it); + CHECK(next_use, + "Register being spilled has no next use, why wasn't it expired?"); + + // allocate a place on the stack to spill the value + ir_local_t *local = ir_alloc_local(ir, interval->instr->result->type); + + // insert load before next use + ir->current_instr = list_prev_entry(next_use->instr, it); + ir_value_t *load_value = ir_load_local(ir, local); + ir_instr_t *load_instr = load_value->def; + + // assign the load a valid ordinal + int load_ordinal = ra_get_ordinal(list_prev_entry(load_instr, it)) + 1; + CHECK_LT(load_ordinal, ra_get_ordinal(list_next_entry(load_instr, it))); + ra_set_ordinal(load_instr, load_ordinal); + + // update uses of interval->instr after the next use to use the new value + // filled from the stack. this code asssumes that the uses were previously + // sorted inside of Run() + while (next_use) { + // cache off next next since calling set_value will modify the linked list + // pointers + ir_use_t *next_next_use = list_next_entry(next_use, it); + ir_replace_use(next_use, load_instr->result); + next_use = next_next_use; + } + + // insert spill after prev use, note that order here is extremely important. + // interval->instr's use list has already been sorted, and when the save + // instruction is created and added as a use, the sorted order will be + // invalidated. because of this, the save instruction needs to be added after + // the load instruction has updated the sorted uses + ir_instr_t *after = NULL; + + if (prev_use) { + // there is a previous useerence, insert store after it + CHECK(list_next_entry(prev_use, it) == NULL, + "All future uses should have been replaced"); + after = prev_use->instr; + } else { + // there is no previous use, insert store immediately after definition + CHECK(list_empty(&interval->instr->result->uses), + "All future uses should have been replaced"); + after = interval->instr; + } + + ir->current_instr = after; + ir_store_local(ir, local, interval->instr->result); + + // since the interval that this store belongs to has now expired, there's no + // need to assign an ordinal to it + + // reuse the old interval + interval->instr = instr; + interval->reused = NULL; + interval->start = list_first_entry(&instr->result->uses, ir_use_t, it); + interval->end = list_last_entry(&instr->result->uses, ir_use_t, it); + interval->next = interval->start; + ra_insert_interval(set, interval); + + // reset insert point + ir->current_instr = insert_point; + + STAT_num_spills++; + + return interval->reg; +} + +static int ra_alloc_free_register(ra_t *ra, ir_instr_t *instr) { + register_set_t *set = ra_get_register_set(ra, instr->result->type); + + // get the first free register for this value type + int reg = ra_pop_register(set); + if (reg == NO_REGISTER) { + return NO_REGISTER; + } + + // add interval + interval_t *interval = &ra->intervals[reg]; + interval->instr = instr; + interval->reused = NULL; + interval->start = list_first_entry(&instr->result->uses, ir_use_t, it); + interval->end = list_last_entry(&instr->result->uses, ir_use_t, it); + interval->next = interval->start; + interval->reg = reg; + ra_insert_interval(set, interval); + + return reg; +} + +// If the first argument isn't used after this instruction, its register +// can be reused to take advantage of many architectures supporting +// operations where the destination is the first argument. +// TODO could reorder arguments for communicative binary ops and do this +// with the second argument as well +static int ra_reuse_arg_register(ra_t *ra, ir_t *ir, ir_instr_t *instr) { + if (!instr->arg[0]) { + return NO_REGISTER; + } + + int prefered = instr->arg[0]->reg; + if (prefered == NO_REGISTER) { + return NO_REGISTER; + } + + // make sure the register can hold the result type + const register_def_t *r = &ra->registers[prefered]; + if (!(r->value_types & (1 << instr->result->type))) { + return NO_REGISTER; + } + + // if the argument's register is used after this instruction, it's not + // trivial to reuse + interval_t *interval = &ra->intervals[prefered]; + if (list_next_entry(interval->next, it)) { + return NO_REGISTER; + } + + // the argument's register is not used after the current instruction, so the + // register can be reused for the result. note, since the interval min/max + // heap does not support removal of an arbitrary interval, the interval + // removal must be deferred. since there are no more uses, the interval will + // expire on the next call to ExpireOldintervals, and then immediately + // requeued by setting the reused property + interval->reused = instr; + + return prefered; +} + +static void ra_expire_set(ra_t *ra, register_set_t *set, ir_instr_t *instr) { + while (true) { + interval_t *interval = ra_head_interval(set); + if (!interval) { + break; + } + + // intervals are sorted by their next use, once one fails to expire or + // advance, they all will + if (interval->next && + ra_get_ordinal(interval->next->instr) >= ra_get_ordinal(instr)) { + break; + } + + // remove interval from the sorted set + ra_pop_head_interval(set); + + // if there are more uses, advance the next use and reinsert the interval + // into the correct position + if (interval->next && list_next_entry(interval->next, it)) { + interval->next = list_next_entry(interval->next, it); + ra_insert_interval(set, interval); + } + // if there are no more uses, but the register has been reused by + // ReuseArgRegister, requeue the interval at this time + else if (interval->reused) { + ir_instr_t *reused = interval->reused; + interval->instr = reused; + interval->reused = NULL; + interval->start = list_first_entry(&reused->result->uses, ir_use_t, it); + interval->end = list_last_entry(&reused->result->uses, ir_use_t, it); + interval->next = interval->start; + ra_insert_interval(set, interval); + } + // if there are no other uses, free the register assigned to this + // interval + else { + ra_push_register(set, interval->reg); + } + } +} + +static void ra_expire_intervals(ra_t *ra, ir_instr_t *instr) { + ra_expire_set(ra, &ra->int_registers, instr); + ra_expire_set(ra, &ra->float_registers, instr); + ra_expire_set(ra, &ra->vector_registers, instr); +} + +static int use_cmp(const list_node_t *a_it, const list_node_t *b_it) { + ir_use_t *a = list_entry(a_it, ir_use_t, it); + ir_use_t *b = list_entry(b_it, ir_use_t, it); + return ra_get_ordinal(a->instr) - ra_get_ordinal(b->instr); +} + +static void ra_assign_ordinals(ir_t *ir) { + // assign each instruction an ordinal. these ordinals are used to describe + // the live range of a particular value + int ordinal = 0; + + list_for_each_entry(instr, &ir->instrs, ir_instr_t, it) { + ra_set_ordinal(instr, ordinal); + + // space out ordinals to leave available values for instructions inserted + // by AllocBlockedRegister. there should never be an ir op with more than + // 10 arguments to spill registers for + ordinal += 10; + } +} + +static void ra_init_sets(ra_t *ra, const register_def_t *registers, + int num_registers) { + ra->registers = registers; + ra->num_registers = num_registers; + + for (int i = 0; i < ra->num_registers; i++) { + const register_def_t *r = &ra->registers[i]; + + if (r->value_types == VALUE_INT_MASK) { + ra_push_register(&ra->int_registers, i); + } else if (r->value_types == VALUE_FLOAT_MASK) { + ra_push_register(&ra->float_registers, i); + } else if (r->value_types == VALUE_VECTOR_MASK) { + ra_push_register(&ra->vector_registers, i); + } else { + LOG_FATAL("Unsupported register value mask"); + } + } +} + +void ra_run(ir_t *ir, const register_def_t *registers, int num_registers) { + ra_t ra = {}; + + ra_init_sets(&ra, registers, num_registers); + + ra_assign_ordinals(ir); + + list_for_each_entry(instr, &ir->instrs, ir_instr_t, it) { + ir_value_t *result = instr->result; + + // only allocate registers for results, assume constants can always be + // encoded as immediates or that the backend has registers reserved + // for storing the constants + if (!result) { + continue; + } + + // sort the instruction's use list + list_sort(&result->uses, &use_cmp); + + // expire any old intervals, freeing up the registers they claimed + ra_expire_intervals(&ra, instr); + + // first, try and reuse the register of one of the incoming arguments + int reg = ra_reuse_arg_register(&ra, ir, instr); + if (reg == NO_REGISTER) { + // else, allocate a new register for the result + reg = ra_alloc_free_register(&ra, instr); + if (reg == NO_REGISTER) { + // if a register couldn't be allocated, spill a register and try again + reg = ra_alloc_blocked_register(&ra, ir, instr); + } + } + + CHECK_NE(reg, NO_REGISTER, "Failed to allocate register"); + result->reg = reg; + } +} diff --git a/src/jit/ir/passes/register_allocation_pass.cc b/src/jit/ir/passes/register_allocation_pass.cc deleted file mode 100644 index 4b85119e..00000000 --- a/src/jit/ir/passes/register_allocation_pass.cc +++ /dev/null @@ -1,377 +0,0 @@ -#include "core/minmax_heap.h" -#include "jit/ir/passes/register_allocation_pass.h" - -using namespace re::jit::backend; -using namespace re::jit::ir; -using namespace re::jit::ir::passes; - -DEFINE_STAT(num_spills, "Number of registers spilled"); - -static inline int GetOrdinal(const Instr *i) { - return (int)i->tag(); -} - -static inline void SetOrdinal(Instr *i, int ordinal) { - i->set_tag((intptr_t)ordinal); -} - -static inline bool RegisterCanStore(const Register &r, ValueType type) { - return r.value_types & (1 << type); -} - -struct LiveIntervalSort { - bool operator()(const Interval *lhs, const Interval *rhs) const { - return !lhs->next || - GetOrdinal(lhs->next->instr()) < GetOrdinal(rhs->next->instr()); - } -}; - -RegisterSet::RegisterSet(int max_registers) { - free_ = new int[max_registers]; - live_ = new Interval *[max_registers]; -} - -RegisterSet::~RegisterSet() { - delete[] free_; - delete[] live_; -} - -void RegisterSet::Clear() { - num_free_ = 0; - num_live_ = 0; -} - -int RegisterSet::PopRegister() { - if (!num_free_) { - return NO_REGISTER; - } - return free_[--num_free_]; -} - -void RegisterSet::PushRegister(int reg) { - free_[num_free_++] = reg; -} - -Interval *RegisterSet::HeadInterval() { - if (!num_live_) { - return nullptr; - } - - auto it = re::mmheap_find_min(live_, live_ + num_live_, LiveIntervalSort()); - return *it; -} - -Interval *RegisterSet::TailInterval() { - if (!num_live_) { - return nullptr; - } - - auto it = re::mmheap_find_max(live_, live_ + num_live_, LiveIntervalSort()); - return *it; -} - -void RegisterSet::PopHeadInterval() { - re::mmheap_pop_min(live_, live_ + num_live_, LiveIntervalSort()); - num_live_--; -} - -void RegisterSet::PopTailInterval() { - re::mmheap_pop_max(live_, live_ + num_live_, LiveIntervalSort()); - num_live_--; -} - -void RegisterSet::InsertInterval(Interval *interval) { - live_[num_live_++] = interval; - re::mmheap_push(live_, live_ + num_live_, LiveIntervalSort()); -} - -RegisterAllocationPass::RegisterAllocationPass( - const backend::Register *registers, int num_registers) - : int_registers_(num_registers), - float_registers_(num_registers), - vector_registers_(num_registers) { - registers_ = registers; - num_registers_ = num_registers; - - intervals_ = new Interval[num_registers_]; -} - -RegisterAllocationPass::~RegisterAllocationPass() { - delete[] intervals_; -} - -void RegisterAllocationPass::Run(IRBuilder &builder) { - Reset(); - - AssignOrdinals(builder); - - for (auto instr : builder.instrs()) { - // only allocate registers for results, assume constants can always be - // encoded as immediates or that the backend has registers reserved - // for storing the constants - if (instr->type() == VALUE_V) { - continue; - } - - // sort the instruction's ref list - instr->uses().Sort([](const Use *a, const Use *b) { - return GetOrdinal(a->instr()) < GetOrdinal(b->instr()); - }); - - // expire any old intervals, freeing up the registers they claimed - ExpireOldIntervals(instr); - - // first, try and reuse the register of one of the incoming arguments - int reg = ReuseArgRegister(builder, instr); - if (reg == NO_REGISTER) { - // else, allocate a new register for the result - reg = AllocFreeRegister(instr); - if (reg == NO_REGISTER) { - // if a register couldn't be allocated, spill a register and try again - reg = AllocBlockedRegister(builder, instr); - CHECK_NE(reg, NO_REGISTER, "Failed to allocate register"); - } - } - - instr->set_reg(reg); - } -} - -RegisterSet &RegisterAllocationPass::GetRegisterSet(ValueType type) { - if (IsIntType(type)) { - return int_registers_; - } - - if (IsFloatType(type)) { - return float_registers_; - } - - if (IsVectorType(type)) { - return vector_registers_; - } - - LOG_FATAL("Unexpected value type"); -} - -void RegisterAllocationPass::Reset() { - int_registers_.Clear(); - float_registers_.Clear(); - vector_registers_.Clear(); - - for (int i = 0; i < num_registers_; i++) { - const Register &r = registers_[i]; - - if (r.value_types == VALUE_INT_MASK) { - int_registers_.PushRegister(i); - } else if (r.value_types == VALUE_FLOAT_MASK) { - float_registers_.PushRegister(i); - } else if (r.value_types == VALUE_VECTOR_MASK) { - vector_registers_.PushRegister(i); - } else { - LOG_FATAL("Unsupported register value mask"); - } - } -} - -void RegisterAllocationPass::AssignOrdinals(IRBuilder &builder) { - // assign each instruction an ordinal. these ordinals are used to describe - // the live range of a particular value - int ordinal = 0; - for (auto instr : builder.instrs()) { - SetOrdinal(instr, ordinal); - - // space out ordinals to leave available values for instructions inserted - // by AllocBlockedRegister. there should never be an ir op with more than - // 10 arguments to spill registers for - ordinal += 10; - } -} - -void RegisterAllocationPass::ExpireOldIntervals(Instr *instr) { - auto expire_set = [&](RegisterSet &set) { - while (true) { - Interval *interval = set.HeadInterval(); - if (!interval) { - break; - } - - // intervals are sorted by their next use, once one fails to expire or - // advance, they all will - if (interval->next && - GetOrdinal(interval->next->instr()) >= GetOrdinal(instr)) { - break; - } - - // remove interval from the sorted set - set.PopHeadInterval(); - - // if there are more uses, advance the next use and reinsert the interval - // into the correct position - if (interval->next && interval->next->next()) { - interval->next = interval->next->next(); - set.InsertInterval(interval); - } - // if there are no more uses, but the register has been reused by - // ReuseArgRegister, requeue the interval at this time - else if (interval->reused) { - Instr *reused = interval->reused; - interval->instr = reused; - interval->reused = nullptr; - interval->start = reused->uses().head(); - interval->end = reused->uses().tail(); - interval->next = interval->start; - set.InsertInterval(interval); - } - // if there are no other uses, free the register assigned to this - // interval - else { - set.PushRegister(interval->reg); - } - } - }; - - expire_set(int_registers_); - expire_set(float_registers_); - expire_set(vector_registers_); -} - -// If the first argument isn't used after this instruction, its register -// can be reused to take advantage of many architectures supporting -// operations where the destination is the first argument. -// TODO could reorder arguments for communicative binary ops and do this -// with the second argument as well -int RegisterAllocationPass::ReuseArgRegister(IRBuilder &builder, Instr *instr) { - if (!instr->arg0() || instr->arg0()->constant()) { - return NO_REGISTER; - } - - int prefered = instr->arg0()->reg(); - if (prefered == NO_REGISTER) { - return NO_REGISTER; - } - - // make sure the register can hold the result type - const Register &r = registers_[prefered]; - if (!RegisterCanStore(r, instr->type())) { - return NO_REGISTER; - } - - // if the argument's register is used after this instruction, it's not - // trivial to reuse - Interval *interval = &intervals_[prefered]; - if (interval->next->next()) { - return NO_REGISTER; - } - - // the argument's register is not used after the current instruction, so the - // register can be reused for the result. note, since the interval min/max - // heap does not support removal of an arbitrary interval, the interval - // removal must be deferred. since there are no more references, the interval - // will expire on the next call to ExpireOldIntervals, and then immediately - // requeued by setting the reused property - interval->reused = instr; - - return prefered; -} - -int RegisterAllocationPass::AllocFreeRegister(Instr *instr) { - RegisterSet &set = GetRegisterSet(instr->type()); - - // get the first free register for this value type - int reg = set.PopRegister(); - if (reg == NO_REGISTER) { - return NO_REGISTER; - } - - // add interval - Interval *interval = &intervals_[reg]; - interval->instr = instr; - interval->reused = nullptr; - interval->start = instr->uses().head(); - interval->end = instr->uses().tail(); - interval->next = interval->start; - interval->reg = reg; - set.InsertInterval(interval); - - return reg; -} - -int RegisterAllocationPass::AllocBlockedRegister(IRBuilder &builder, - Instr *instr) { - InsertPoint insert_point = builder.GetInsertPoint(); - RegisterSet &set = GetRegisterSet(instr->type()); - - // spill the register who's next use is furthest away from start - Interval *interval = set.TailInterval(); - set.PopTailInterval(); - - // the interval's value needs to be filled back from from the stack before - // its next use - Use *next_ref = interval->next; - Use *prev_ref = next_ref->prev(); - CHECK(next_ref, - "Register being spilled has no next use, why wasn't it expired?"); - - // allocate a place on the stack to spill the value - Local *local = builder.AllocLocal(interval->instr->type()); - - // insert load before next use - builder.SetInsertPoint({next_ref->instr()->prev()}); - Instr *load_instr = builder.LoadLocal(local); - - // assign the load a valid ordinal - int load_ordinal = GetOrdinal(load_instr->prev()) + 1; - CHECK_LT(load_ordinal, GetOrdinal(load_instr->next())); - SetOrdinal(load_instr, load_ordinal); - - // update references to interval->instr after the next use to use the new - // value filled from the stack. this code asssumes that the refs were - // previously sorted inside of Run(). - while (next_ref) { - // cache off next next since calling set_value will modify the linked list - // pointers - Use *next_next_ref = next_ref->next(); - next_ref->set_value(load_instr); - next_ref = next_next_ref; - } - - // insert spill after prev use, note that order here is extremely important. - // interval->instr's ref list has already been sorted, and when the save - // instruction is created and added as a reference, the sorted order will be - // invalidated. because of this, the save instruction needs to be added after - // the load instruction has updated the sorted references. - Instr *after = nullptr; - - if (prev_ref) { - // there is a previous reference, insert store after it - CHECK(prev_ref->next() == nullptr, - "All future references should have been replaced"); - after = prev_ref->instr(); - } else { - // there is no previous reference, insert store immediately after definition - CHECK(interval->instr->uses().head() == nullptr, - "All future references should have been replaced"); - after = interval->instr; - } - - builder.SetInsertPoint({after}); - builder.StoreLocal(local, interval->instr); - - // since the interval that this store belongs to has now expired, there's no - // need to assign an ordinal to it - - // reuse the old interval - interval->instr = instr; - interval->reused = nullptr; - interval->start = instr->uses().head(); - interval->end = instr->uses().tail(); - interval->next = interval->start; - set.InsertInterval(interval); - - // reset insert point - builder.SetInsertPoint(insert_point); - - num_spills++; - - return interval->reg; -} diff --git a/src/jit/ir/passes/register_allocation_pass.h b/src/jit/ir/passes/register_allocation_pass.h index 4e889662..cf992999 100644 --- a/src/jit/ir/passes/register_allocation_pass.h +++ b/src/jit/ir/passes/register_allocation_pass.h @@ -1,85 +1,20 @@ #ifndef REGISTER_ALLOCATION_PASS_H #define REGISTER_ALLOCATION_PASS_H -#include -#include "jit/backend/backend.h" -#include "jit/ir/passes/pass_runner.h" +#ifdef __cplusplus +extern "C" { +#endif -namespace re { -namespace jit { -namespace ir { -namespace passes { +struct ir_s; +struct register_def_s; -struct Interval { - Instr *instr; - Instr *reused; - Use *start; - Use *end; - Use *next; - int reg; -}; +extern const char *ra_name; -class RegisterSet { - public: - RegisterSet(int max_registers); - ~RegisterSet(); +void ra_run(struct ir_s *ir, const struct register_def_s *registers, + int num_registers); - void Clear(); - - int PopRegister(); - void PushRegister(int reg); - - Interval *HeadInterval(); - Interval *TailInterval(); - void PopHeadInterval(); - void PopTailInterval(); - void InsertInterval(Interval *interval); - - private: - // free register vector - int *free_, num_free_; - - // intervals used by this register set, sorted in order of next use - Interval **live_; - int num_live_; -}; - -class RegisterAllocationPass : public Pass { - public: - static constexpr const char *NAME = "ra"; - - RegisterAllocationPass(const backend::Register *registers, int num_registers); - ~RegisterAllocationPass(); - - const char *name() { - return "ra"; - } - - void Run(IRBuilder &builder); - - private: - const backend::Register *registers_; - int num_registers_; - - RegisterSet int_registers_; - RegisterSet float_registers_; - RegisterSet vector_registers_; - - // intervals, keyed by register - Interval *intervals_; - - RegisterSet &GetRegisterSet(ValueType type); - - void Reset(); - void AssignOrdinals(IRBuilder &builder); - void ExpireOldIntervals(Instr *instr); - int ReuseArgRegister(IRBuilder &builder, Instr *instr); - int AllocFreeRegister(Instr *instr); - int AllocBlockedRegister(IRBuilder &builder, Instr *instr); -}; -} -} -} +#ifdef __cplusplus } +#endif #endif diff --git a/src/sys/exception_handler.c b/src/sys/exception_handler.c index e725bc80..7fd2e93b 100644 --- a/src/sys/exception_handler.c +++ b/src/sys/exception_handler.c @@ -49,7 +49,7 @@ void exception_handler_remove(re_exception_handler_t *handler) { } bool exception_handler_handle(re_exception_t *ex) { - list_for_each_entry(&s_live_handlers, re_exception_handler_t, it, handler) { + list_for_each_entry(handler, &s_live_handlers, re_exception_handler_t, it) { if (handler->cb(handler->data, ex)) { return true; } diff --git a/src/sys/exception_handler_linux.cc b/src/sys/exception_handler_linux.cc index 3a7384f3..8d77c253 100644 --- a/src/sys/exception_handler_linux.cc +++ b/src/sys/exception_handler_linux.cc @@ -1,8 +1,6 @@ #include #include "sys/exception_handler_linux.h" -using namespace re::sys; - static struct sigaction old_sigsegv; static struct sigaction old_sigill; diff --git a/src/sys/exception_handler_win.cc b/src/sys/exception_handler_win.cc index f42f09af..5f1f1216 100644 --- a/src/sys/exception_handler_win.cc +++ b/src/sys/exception_handler_win.cc @@ -1,8 +1,6 @@ #include #include "sys/exception_handler_win.h" -using namespace re::sys; - static void CopyStateTo(PCONTEXT src, re_thread_state_t *dst) { dst->rax = src->Rax; dst->rcx = src->Rcx; diff --git a/src/sys/memory.h b/src/sys/memory.h index 4a46414e..83285065 100644 --- a/src/sys/memory.h +++ b/src/sys/memory.h @@ -17,6 +17,7 @@ typedef enum { ACC_NONE, ACC_READONLY, ACC_READWRITE, + ACC_READWRITEEXEC, } page_access_t; size_t get_page_size(); diff --git a/src/sys/memory_posix.c b/src/sys/memory_posix.c index c3739db8..415a04c8 100644 --- a/src/sys/memory_posix.c +++ b/src/sys/memory_posix.c @@ -48,6 +48,8 @@ static int access_to_protect_flags(page_access_t access) { return PROT_READ; case ACC_READWRITE: return PROT_READ | PROT_WRITE; + case ACC_READWRITEEXEC: + return PROT_READ | PROT_WRITE | PROT_EXEC; default: return PROT_NONE; } diff --git a/src/sys/memory_win.c b/src/sys/memory_win.c index a4753ac7..83a2ff00 100644 --- a/src/sys/memory_win.c +++ b/src/sys/memory_win.c @@ -18,6 +18,8 @@ static DWORD access_to_protection_flags(page_access_t access) { return PAGE_READONLY; case ACC_READWRITE: return PAGE_READWRITE; + case ACC_READWRITEEXEC: + return PAGE_EXECUTE_READWRITE; default: return PAGE_NOACCESS; } diff --git a/src/ui/window.c b/src/ui/window.c index a0f5b084..03cde4dd 100644 --- a/src/ui/window.c +++ b/src/ui/window.c @@ -61,19 +61,19 @@ static void win_init_joystick(window_t *win) { static void win_handle_paint(window_t *win) { rb_begin_frame(win->rb); - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.prepaint) { listener->cb.prepaint(listener->data); } } - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.paint) { listener->cb.paint(listener->data, win->show_main_menu); } } - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.postpaint) { listener->cb.postpaint(listener->data); } @@ -83,7 +83,7 @@ static void win_handle_paint(window_t *win) { } static void win_handle_keydown(window_t *win, keycode_t code, int16_t value) { - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.keydown) { listener->cb.keydown(listener->data, code, value); } @@ -132,7 +132,7 @@ static void win_handle_hatdown(window_t *win, int hat, uint8_t state, } static void win_handle_textinput(window_t *win, const char *text) { - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.textinput) { listener->cb.textinput(listener->data, text); } @@ -140,7 +140,7 @@ static void win_handle_textinput(window_t *win, const char *text) { } static void win_handle_mousemove(window_t *win, int x, int y) { - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.mousemove) { listener->cb.mousemove(listener->data, x, y); } @@ -148,7 +148,7 @@ static void win_handle_mousemove(window_t *win, int x, int y) { } static void win_handle_close(window_t *win) { - list_for_each_entry(&win->live_listeners, window_listener_t, it, listener) { + list_for_each_entry(listener, &win->live_listeners, window_listener_t, it) { if (listener->cb.close) { listener->cb.close(listener->data); } diff --git a/test/asm/ldcl.s b/test/asm/ldcl.s index 6b08bc82..feabfaa6 100644 --- a/test/asm/ldcl.s +++ b/test/asm/ldcl.s @@ -23,7 +23,6 @@ test_ldcl_stcl_sr: # r1 in alt bank should have been pre-decremented by 4 mov.l .DATA_ADDR, r1 stc r1_bank, r5 - sub r1, r5 rts nop # REGISTER_OUT r2 13 diff --git a/test/test_dead_code_elimination_pass.cc b/test/test_dead_code_elimination_pass.cc index 4140b710..1339eb67 100644 --- a/test/test_dead_code_elimination_pass.cc +++ b/test/test_dead_code_elimination_pass.cc @@ -1,26 +1,22 @@ -#include #include #include "jit/ir/passes/dead_code_elimination_pass.h" -#include "jit/ir/ir_builder.h" -#include "jit/ir/ir_reader.h" -#include "jit/ir/ir_writer.h" +#include "jit/ir/ir.h" -using namespace re; -using namespace re::jit::ir; -using namespace re::jit::ir::passes; +static uint8_t ir_buffer[1024 * 1024]; +static char scratch_buffer[1024 * 1024]; TEST(DeadCodeEliminationPassTest, Sanity) { - static const char *input = + static const char input_str[] = "i32 %0 = load_context i32 0xbc\n" - "i32 %1 = load_guest i32 %0\n" - "i32 %2 = load_guest i32 0x8c000a10\n" - "i32 %3 = load_guest i32 %2\n" + "i32 %1 = load_slow i32 %0\n" + "i32 %2 = load_slow i32 0x8c000a10\n" + "i32 %3 = load_slow i32 %2\n" "i32 %4 = load_context i32 0xc0\n" "i32 %5 = and i32 %3, i32 %4\n" "store_context i32 0xb0, i32 %5\n" - "store_guest i32 %2, i32 %5\n" + "store_slow i32 %2, i32 %5\n" "i32 %6 = load_context i32 0xe4\n" - "i32 %7 = load_guest i32 %6\n" + "i32 %7 = load_slow i32 %6\n" "store_context i32 0xb4, i32 %7\n" "i64 %8 = load_context i32 0x18\n" "i32 %9 = load_context i32 0x38\n" @@ -35,15 +31,15 @@ TEST(DeadCodeEliminationPassTest, Sanity) { "call_external i64 %8, i64 %10\n" "store_context i32 0x30, i32 0x8c000940\n"; - static const char *output = - "i32 %0 = load_guest i32 0x8c000a10\n" - "i32 %1 = load_guest i32 %0\n" + static const char output_str[] = + "i32 %0 = load_slow i32 0x8c000a10\n" + "i32 %1 = load_slow i32 %0\n" "i32 %2 = load_context i32 0xc0\n" "i32 %3 = and i32 %1, i32 %2\n" "store_context i32 0xb0, i32 %3\n" - "store_guest i32 %0, i32 %3\n" + "store_slow i32 %0, i32 %3\n" "i32 %4 = load_context i32 0xe4\n" - "i32 %5 = load_guest i32 %4\n" + "i32 %5 = load_slow i32 %4\n" "store_context i32 0xb4, i32 %5\n" "i64 %6 = load_context i32 0x18\n" "i32 %7 = load_context i32 0x38\n" @@ -58,19 +54,25 @@ TEST(DeadCodeEliminationPassTest, Sanity) { "call_external i64 %6, i64 %8\n" "store_context i32 0x30, i32 0x8c000940\n"; - Arena arena(4096); - IRBuilder builder(arena); + ir_t ir = {}; + ir.buffer = ir_buffer; + ir.capacity = sizeof(ir_buffer); - IRReader reader; - std::stringstream input_stream(input); - reader.Parse(input_stream, builder); + FILE *input = tmpfile(); + fwrite(input_str, 1, sizeof(input_str) - 1, input); + rewind(input); + bool res = ir_read(input, &ir); + fclose(input); + ASSERT_TRUE(res); - DeadCodeEliminationPass pass; - pass.Run(builder); + dce_run(&ir); - IRWriter writer; - std::stringstream output_stream; - writer.Print(builder, output_stream); + FILE *output = tmpfile(); + ir_write(&ir, output); + rewind(output); + size_t n = fread(&scratch_buffer, 1, sizeof(scratch_buffer), output); + fclose(output); + ASSERT_NE(n, 0u); - ASSERT_STREQ(output_stream.str().c_str(), output); + ASSERT_STREQ(scratch_buffer, output_str); } diff --git a/test/test_list.cc b/test/test_list.cc index cb1204ab..ddccfa5d 100644 --- a/test/test_list.cc +++ b/test/test_list.cc @@ -38,7 +38,7 @@ static void validate_people(list_t *people_list, person_t **expected_people, int num_expected_people) { int n = 0; - list_for_each_entry(people_list, person_t, it, person) { + list_for_each_entry(person, people_list, person_t, it) { person_t *expected_person = expected_people[n]; ASSERT_STREQ(person->name, expected_person->name); n++; @@ -52,7 +52,7 @@ static void validate_people_reverse(list_t *people_list, int num_expected_people) { int n = 0; - list_for_each_entry_reverse(people_list, person_t, it, person) { + list_for_each_entry_reverse(person, people_list, person_t, it) { person_t *expected_person = expected_people[num_expected_people - n - 1]; ASSERT_STREQ(person->name, expected_person->name); n++; diff --git a/test/test_load_store_elimination_pass.cc b/test/test_load_store_elimination_pass.cc index db32eddd..ade5167c 100644 --- a/test/test_load_store_elimination_pass.cc +++ b/test/test_load_store_elimination_pass.cc @@ -1,16 +1,12 @@ -#include #include #include "jit/ir/passes/load_store_elimination_pass.h" -#include "jit/ir/ir_builder.h" -#include "jit/ir/ir_reader.h" -#include "jit/ir/ir_writer.h" +#include "jit/ir/ir.h" -using namespace re; -using namespace re::jit::ir; -using namespace re::jit::ir::passes; +static uint8_t ir_buffer[1024 * 1024]; +static char scratch_buffer[1024 * 1024]; TEST(LoadStoreEliminationPassTest, Aliasing) { - static const char *input = + static const char input_str[] = "store_context i32 0x104, i32 0x0\n" "store_context i32 0x100, i32 0x0\n" "store_context i32 0x10c, i32 0x0\n" @@ -37,7 +33,7 @@ TEST(LoadStoreEliminationPassTest, Aliasing) { "i32 %10 = sub i32 %9, i32 0x10\n" "store_context i32 0x20, i32 %10\n"; - static const char *output = + static const char output_str[] = "store_context i32 0x104, i32 0x0\n" "store_context i32 0x100, i32 0x0\n" "store_context i32 0x10c, i32 0x0\n" @@ -59,19 +55,25 @@ TEST(LoadStoreEliminationPassTest, Aliasing) { "i32 %5 = sub i32 %4, i32 0x10\n" "store_context i32 0x20, i32 %5\n"; - Arena arena(4096); - IRBuilder builder(arena); + ir_t ir = {}; + ir.buffer = ir_buffer; + ir.capacity = sizeof(ir_buffer); - IRReader reader; - std::stringstream input_stream(input); - reader.Parse(input_stream, builder); + FILE *input = tmpfile(); + fwrite(input_str, 1, sizeof(input_str) - 1, input); + rewind(input); + bool res = ir_read(input, &ir); + fclose(input); + ASSERT_TRUE(res); - LoadStoreEliminationPass pass; - pass.Run(builder); + lse_run(&ir); - IRWriter writer; - std::stringstream output_stream; - writer.Print(builder, output_stream); + FILE *output = tmpfile(); + ir_write(&ir, output); + rewind(output); + size_t n = fread(&scratch_buffer, 1, sizeof(scratch_buffer), output); + fclose(output); + ASSERT_NE(n, 0u); - ASSERT_STREQ(output_stream.str().c_str(), output); + ASSERT_STREQ(scratch_buffer, output_str); } diff --git a/test/test_sh4.cc b/test/test_sh4.cc index 93ff001d..d8ce533b 100644 --- a/test/test_sh4.cc +++ b/test/test_sh4.cc @@ -3,7 +3,6 @@ #include #include #include "core/math.h" -#include "core/memory.h" #include "hw/sh4/sh4.h" #include "hw/dreamcast.h" #include "hw/memory.h" @@ -146,14 +145,15 @@ void run_sh4_test(const SH4Test &test) { for (int i = 0; i < sh4_num_test_regs; i++) { SH4TestRegister ® = sh4_test_regs[i]; - uint32_t input = load( + uint32_t input = *reinterpret_cast( reinterpret_cast(&test.in) + reg.offset); if (input == UNINITIALIZED_REG) { continue; } - store(reinterpret_cast(&dc->sh4->ctx) + reg.offset, input); + *reinterpret_cast(reinterpret_cast(&dc->sh4->ctx) + + reg.offset) = input; } // setup initial stack pointer @@ -178,14 +178,14 @@ void run_sh4_test(const SH4Test &test) { for (int i = 0; i < sh4_num_test_regs; i++) { SH4TestRegister ® = sh4_test_regs[i]; - uint32_t expected = load( + uint32_t expected = *reinterpret_cast( reinterpret_cast(&test.out) + reg.offset); if (expected == UNINITIALIZED_REG) { continue; } - uint32_t actual = load( + uint32_t actual = *reinterpret_cast( reinterpret_cast(&dc->sh4->ctx) + reg.offset); ASSERT_EQ(expected, actual) << reg.name << " expected: 0x" << std::hex diff --git a/test/test_sh4.inc b/test/test_sh4.inc index 0be8b553..093f22fa 100644 --- a/test/test_sh4.inc +++ b/test/test_sh4.inc @@ -112,13 +112,13 @@ TEST_SH4(test_ldc_stc_sr,(uint8_t *)"\x0d\xe2\x1b\xd0\x0e\x40\x63\xe2\x02\x01\x1 TEST_SH4(test_ldc_stc_rbank,(uint8_t *)"\x0d\xe2\x1b\xd0\x0e\x40\x63\xe2\x02\x01\x15\xd0\x12\x20\x1c\xd0\x0e\x40\x13\xd0\x02\x63\x0b\x00\x09\x00\x9e\x40\x63\xe1\x92\x01\x0b\x00\x09\x00\x1e\x40\x12\x01\x0b\x00\x09\x00\x2e\x40\x22\x01\x0b\x00\x09\x00\x3e\x40\x32\x01\x0b\x00\x09\x00\x4e\x40\x42\x01\x0b\x00\x09\x00\xfa\x40\xfa\x01\x0b\x00\x09\x00\x09\x00\x09\x00\x00\x00\x00\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x50\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x70\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",144,0x1a,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) TEST_SH4(test_ldc_stc_ssr,(uint8_t *)"\x0d\xe2\x1b\xd0\x0e\x40\x63\xe2\x02\x01\x15\xd0\x12\x20\x1c\xd0\x0e\x40\x13\xd0\x02\x63\x0b\x00\x09\x00\x9e\x40\x63\xe1\x92\x01\x0b\x00\x09\x00\x1e\x40\x12\x01\x0b\x00\x09\x00\x2e\x40\x22\x01\x0b\x00\x09\x00\x3e\x40\x32\x01\x0b\x00\x09\x00\x4e\x40\x42\x01\x0b\x00\x09\x00\xfa\x40\xfa\x01\x0b\x00\x09\x00\x09\x00\x09\x00\x00\x00\x00\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x50\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x70\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",144,0x34,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) TEST_SH4(test_ldc_stc_dbr,(uint8_t *)"\x0d\xe2\x1b\xd0\x0e\x40\x63\xe2\x02\x01\x15\xd0\x12\x20\x1c\xd0\x0e\x40\x13\xd0\x02\x63\x0b\x00\x09\x00\x9e\x40\x63\xe1\x92\x01\x0b\x00\x09\x00\x1e\x40\x12\x01\x0b\x00\x09\x00\x2e\x40\x22\x01\x0b\x00\x09\x00\x3e\x40\x32\x01\x0b\x00\x09\x00\x4e\x40\x42\x01\x0b\x00\x09\x00\xfa\x40\xfa\x01\x0b\x00\x09\x00\x09\x00\x09\x00\x00\x00\x00\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x50\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x70\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",144,0x44,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_spc,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x80,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_sr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x0,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0x500000f0,0x4,0x0,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_rbank,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x26,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_vbr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x54,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_gbr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x3e,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_ssr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x6a,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) -TEST_SH4(test_ldcl_stcl_dbr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x18\x35\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1a\xd1\x02\x21\x19\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0f\xd1\x02\x21\x0e\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x96,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_spc,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x7e,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_sr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x0,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0x500000f0,0x4,0x0,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_rbank,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x24,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_vbr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x52,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_gbr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x3c,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_ssr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x68,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) +TEST_SH4(test_ldcl_stcl_dbr,(uint8_t *)"\x0d\xe2\x33\xd0\x07\x40\x63\xe2\x2d\xd1\x04\x71\x03\x41\x34\xd3\x07\x43\x2b\xd0\x02\x63\x2e\xd1\x82\x04\x18\x34\x28\xd1\x92\x05\x0b\x00\x09\x00\x26\xd1\x02\x21\xb7\x41\x63\xe3\x24\xd2\x08\x72\xb3\x42\x10\x32\x29\x04\x22\x65\x0b\x00\x09\x00\x20\xd1\x02\x21\x1f\xd2\x08\x72\x17\x41\x13\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x1b\xd1\x02\x21\x1a\xd2\x08\x72\x27\x41\x23\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x15\xd1\x02\x21\x14\xd2\x08\x72\x37\x41\x33\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x10\xd1\x02\x21\x0f\xd2\x08\x72\x47\x41\x43\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x0a\xd1\x02\x21\x09\xd2\x08\x72\xf6\x41\xf2\x42\x20\x31\x29\x03\x22\x64\x0b\x00\x09\x00\x09\x00\x09\x00\x09\x00\xf0\x00\x00\x50\xf0\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb0\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\xb4\x00\x01\x8c\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00\x09\x00",240,0x94,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0x1,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) TEST_SH4(test_lds_sts_mach,(uint8_t *)"\x0a\x40\x0a\x01\x0b\x00\x09\x00\x1a\x40\x1a\x01\x0b\x00\x09\x00\x2a\x02\x2a\x40\x2a\x01\x2a\x42\x0b\x00\x09\x00\x6a\x40\x6a\x01\x0b\x00\x09\x00\x5a\x40\x5a\x01\x0b\x00\x09\x00",44,0x0,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) TEST_SH4(test_lds_sts_fpul,(uint8_t *)"\x0a\x40\x0a\x01\x0b\x00\x09\x00\x1a\x40\x1a\x01\x0b\x00\x09\x00\x2a\x02\x2a\x40\x2a\x01\x2a\x42\x0b\x00\x09\x00\x6a\x40\x6a\x01\x0b\x00\x09\x00\x5a\x40\x5a\x01\x0b\x00\x09\x00",44,0x24,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d) TEST_SH4(test_lds_sts_macl,(uint8_t *)"\x0a\x40\x0a\x01\x0b\x00\x09\x00\x1a\x40\x1a\x01\x0b\x00\x09\x00\x2a\x02\x2a\x40\x2a\x01\x2a\x42\x0b\x00\x09\x00\x6a\x40\x6a\x01\x0b\x00\x09\x00\x5a\x40\x5a\x01\x0b\x00\x09\x00",44,0x8,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xd,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d,0xbaadf00d)