More crusty progress

This commit is contained in:
nattthebear 2020-06-23 21:25:04 -04:00
parent cacf04f8ca
commit c1caa3e346
16 changed files with 3341 additions and 476 deletions

View File

@ -1,11 +1,26 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "anyhow"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "block-buffer"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
dependencies = [
"generic-array",
]
[[package]]
name = "cfg-if"
version = "0.1.10"
@ -21,12 +36,37 @@ dependencies = [
"bitflags",
]
[[package]]
name = "cpuid-bool"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d375c433320f6c5057ae04a04376eef4d04ce2801448cf8863a78da99107be4"
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array",
]
[[package]]
name = "either"
version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
[[package]]
name = "generic-array"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac746a5f3bbfdadd6106868134545e684693d54d9d44f6e9588a7d54af0bf980"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getset"
version = "0.1.1"
@ -39,6 +79,17 @@ dependencies = [
"syn",
]
[[package]]
name = "goblin"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d20fd25aa456527ce4f544271ae4fea65d2eda4a6561ea56f39fb3ee4f7e3884"
dependencies = [
"log",
"plain",
"scroll",
]
[[package]]
name = "itertools"
version = "0.9.0"
@ -69,6 +120,21 @@ dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
dependencies = [
"cfg-if",
]
[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "page_size"
version = "0.4.2"
@ -103,6 +169,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "proc-macro-error"
version = "1.0.2"
@ -159,6 +231,39 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scroll"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb2332cb595d33f7edd5700f4cbf94892e680c7f0ae56adab58a35190b66cb1"
dependencies = [
"scroll_derive",
]
[[package]]
name = "scroll_derive"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e367622f934864ffa1c704ba2b82280aab856e3d8213c84c5720257eb34b15b9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sha2"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1"
dependencies = [
"block-buffer",
"cfg-if",
"cpuid-bool",
"digest",
"opaque-debug",
]
[[package]]
name = "smallvec"
version = "1.4.0"
@ -187,6 +292,12 @@ dependencies = [
"syn",
]
[[package]]
name = "typenum"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33"
[[package]]
name = "unicode-xid"
version = "0.2.0"
@ -203,13 +314,16 @@ checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
name = "waterboxhost"
version = "0.1.0"
dependencies = [
"anyhow",
"bitflags",
"getset",
"goblin",
"itertools",
"lazy_static",
"libc",
"page_size",
"parking_lot",
"sha2",
"winapi",
]

View File

@ -4,6 +4,7 @@ version = "0.1.0"
authors = ["nattthebear <goyuken@gmail.com>"]
edition = "2018"
publish = false
rust = "nightly"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -14,6 +15,9 @@ lazy_static = "1.4.0"
getset = "0.1.1"
parking_lot = "0.10.2"
itertools = "0.9.0"
goblin = { version = "0.2.3", features = ["elf64", "std"] }
anyhow = "1.0"
sha2 = "0.9.1"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3.8", features = ["memoryapi", "handleapi", "errhandlingapi", "winnt"] }
@ -23,3 +27,4 @@ libc = "0.2.71"
[lib]
doctest = false
crate-type=["cdylib"]

View File

@ -0,0 +1,55 @@
use std::io::*;
use std::mem::{transmute, size_of, zeroed};
use anyhow::anyhow;
use sha2::{Sha256, Digest};
pub fn write<T>(stream: &mut dyn Write, val: &T) -> Result<()> {
let s = unsafe { std::slice::from_raw_parts(transmute::<&T, *const u8>(val), size_of::<T>()) };
stream.write_all(s)?;
Ok(())
}
pub fn read<T>(stream: &mut dyn Read, val: &mut T) -> Result<()> {
let s = unsafe { std::slice::from_raw_parts_mut(transmute::<&mut T, *mut u8>(val), size_of::<T>()) };
stream.read_exact(s)?;
Ok(())
}
pub fn writeval<T>(stream: &mut dyn Write, val: T) -> Result<()> {
let s = unsafe { std::slice::from_raw_parts(transmute::<&T, *const u8>(&val), size_of::<T>()) };
stream.write_all(s)?;
Ok(())
}
pub fn readval<T>(stream: &mut dyn Read) -> Result<T> {
let mut v = unsafe { zeroed::<T>() };
read(stream, &mut v)?;
Ok(v)
}
pub fn write_magic(stream: &mut dyn Write, magic: &str) -> anyhow::Result<()> {
stream.write_all(magic.as_bytes())?;
Ok(())
}
pub fn verify_magic(stream: &mut dyn Read, magic: &str) -> anyhow::Result<()> {
let mut read_tag = vec![0u8; magic.len()];
stream.read_exact(&mut read_tag[..])?;
match std::str::from_utf8(&read_tag[..]) {
Ok(s) if s == magic => Ok(()),
_ => Err(anyhow!("Bad magic for {} state", magic))
}
}
pub fn write_hash(stream: &mut dyn Write, hash: &[u8]) -> anyhow::Result<()> {
stream.write_all(hash)?;
Ok(())
}
pub fn verify_hash(stream: &mut dyn Read, hash: &[u8]) -> anyhow::Result<()> {
let mut read_hash = vec![0u8; hash.len()];
stream.read_exact(&mut read_hash[..])?;
if read_hash == hash {
Ok(())
} else {
Err(anyhow!("Bad hash for state"))
}
}
pub fn hash(data: &[u8]) -> Vec<u8> {
let mut hasher = Sha256::new();
hasher.update(data);
hasher.finalize()[..].to_owned()
}

View File

@ -0,0 +1,265 @@
use crate::*;
use host::{ActivatedWaterboxHost, WaterboxHost};
use std::{os::raw::c_char, ffi::CStr};
/// The memory template for a WaterboxHost. Don't worry about
/// making every size as small as possible, since the savestater handles sparse regions
/// well enough. All values should be PAGESIZE aligned.
#[repr(C)]
pub struct MemoryLayoutTemplate {
/// Absolute pointer to the start of the mapped space
pub start: usize,
/// Memory space for the elf executable. The elf must be non-relocatable and
/// all loaded segments must fit within [start..start + elf_size]
pub elf_size: usize,
/// Memory space to serve brk(2)
pub sbrk_size: usize,
/// Memory space to serve alloc_sealed(3)
pub sealed_size: usize,
/// Memory space to serve alloc_invisible(3)
pub invis_size: usize,
/// Memory space to serve alloc_plain(3)
pub plain_size: usize,
/// Memory space to serve mmap(2) and friends.
/// Calls without MAP_FIXED or MREMAP_FIXED will be placed in this area.
/// TODO: Are we allowing fixed calls to happen anywhere in the block?
pub mmap_size: usize,
}
impl MemoryLayoutTemplate {
/// checks a memory layout for validity
pub fn make_layout(&self) -> anyhow::Result<WbxSysLayout> {
let start = align_down(self.start);
let elf_size = align_up(self.elf_size);
let sbrk_size = align_up(self.sbrk_size);
let sealed_size = align_up(self.sealed_size);
let invis_size = align_up(self.invis_size);
let plain_size = align_up(self.plain_size);
let mmap_size = align_up(self.mmap_size);
let mut res = unsafe { std::mem::zeroed::<WbxSysLayout>() };
res.elf = AddressRange {
start,
size: elf_size
};
res.sbrk = AddressRange {
start: res.elf.end(),
size: sbrk_size
};
res.sealed = AddressRange {
start: res.sbrk.end(),
size: sealed_size
};
res.invis = AddressRange {
start: res.sealed.end(),
size: invis_size
};
res.plain = AddressRange {
start: res.invis.end(),
size: plain_size
};
res.mmap = AddressRange {
start: res.invis.end(),
size: mmap_size
};
if start >> 32 != (res.mmap.end() - 1) >> 32 {
Err(anyhow!("HostMemoryLayout must fit into a single 4GiB region!"))
} else {
Ok(res)
}
}
}
/// "return" struct. On successful funtion call, error_message[0] will be 0 and data will be the return value.
/// On failed call, error_message will contain a string describing the error, and data will be unspecified.
/// Any function that takes this object as an argument can fail and should be checked for failure, even if
/// it does not return data.
#[repr(C)]
pub struct Return<T> {
pub error_message: [u8; 1024],
pub data: T,
}
impl<T> Return<T> {
pub fn put(&mut self, result: anyhow::Result<T>) {
match result {
Err(e) => {
let s = format!("Waterbox Error: {:?}", e);
let len = std::cmp::min(s.len(), 1023);
self.error_message[0..len].copy_from_slice(&s.as_bytes()[0..len]);
self.error_message[len] = 0;
},
Ok(t) => {
self.error_message[0] = 0;
self.data = t;
}
}
}
}
/// stream writer
#[repr(C)]
pub struct CWriter {
/// will be passed to callback
pub userdata: usize,
/// write bytes. Return number of bytes written on success, or < 0 on failure.
/// Permitted to write less than the provided number of bytes.
pub callback: extern fn(userdata: usize, data: *const u8, size: usize) -> isize,
}
impl Write for CWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let res = (self.callback)(self.userdata, buf.as_ptr(), buf.len());
if res < 0 {
Err(std::io::Error::new(std::io::ErrorKind::Other, "Callback signaled abnormal failure"))
} else {
Ok(res as usize)
}
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
/// stream reader
#[repr(C)]
pub struct CReader {
/// will be passed to callback
pub userdata: usize,
/// Read bytes into the buffer. Return number of bytes read on success, or < 0 on failure.
/// permitted to read less than the provided buffer size, but must always read at least 1
/// byte if EOF is not reached. If EOF is reached, should return 0.
pub callback: extern fn(userdata: usize, data: *mut u8, size: usize) -> isize,
}
impl Read for CReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let res = (self.callback)(self.userdata, buf.as_mut_ptr(), buf.len());
if res < 0 {
Err(std::io::Error::new(std::io::ErrorKind::Other, "Callback signaled abnormal failure"))
} else {
Ok(res as usize)
}
}
}
fn arg_to_str(arg: *const c_char) -> anyhow::Result<String> {
let cs = unsafe { CStr::from_ptr(arg as *const c_char) };
match cs.to_str() {
Ok(s) => Ok(s.to_string()),
Err(_) => Err(anyhow!("Bad UTF-8 string")),
}
}
fn read_whole_file(reader: &mut CReader) -> anyhow::Result<Vec<u8>> {
let mut res = Vec::<u8>::new();
std::io::copy(reader, &mut res)?;
Ok(res)
}
/// Given a guest executable and a memory layout, create a new host environment. All data will be immediately consumed from the reader,
/// which will not be used after this call.
#[no_mangle]
pub extern fn wbx_create_host(layout: &MemoryLayoutTemplate, module_name: *const c_char, wbx: &mut CReader, ret: &mut Return<*mut WaterboxHost>) {
let res = (|| {
let data = read_whole_file(wbx)?;
WaterboxHost::new(&data[..], &arg_to_str(module_name)?[..], layout)
})();
ret.put(res.map(|boxed| Box::into_raw(boxed)));
}
/// Tear down a host environment. May not be called while the environment is active.
#[no_mangle]
pub extern fn wbx_destroy_host(obj: *mut WaterboxHost, ret: &mut Return<()>) {
let res = (|| {
unsafe {
if (*obj).active() {
return Err(anyhow!("WaterboxHost is still active!"))
}
Box::from_raw(obj);
Ok(())
}
})();
ret.put(res);
}
/// Activate a host environment. This swaps it into memory and makes it available for use.
/// Pointers to inside the environment are only valid while active. Uses a mutex internally
/// so as to not stomp over other host environments in the same 4GiB slice.
/// Returns a pointer to the activated object, used to do most other functions.
#[no_mangle]
pub extern fn wbx_activate_host(obj: *mut WaterboxHost, ret: &mut Return<*mut ActivatedWaterboxHost>) {
let res = (|| {
unsafe {
if (*obj).active() {
return Err(anyhow!("WaterboxHost is already active!"))
}
Ok((&mut (*obj)).activate())
}
})();
ret.put(res.map(|boxed| Box::into_raw(boxed)));
}
/// Deactivates a host environment, and releases the mutex.
#[no_mangle]
pub extern fn wbx_deactivate_host(obj: *mut ActivatedWaterboxHost, ret: &mut Return<()>) {
unsafe { Box::from_raw(obj); }
ret.put(Ok(()));
}
/// Returns the address of an exported function from the guest executable. This pointer is only valid
/// while the host is active. A missing proc is not an error and simply returns 0.
#[no_mangle]
pub extern fn wbx_get_proc_addr(obj: &mut ActivatedWaterboxHost, name: *const c_char, ret: &mut Return<usize>) {
match arg_to_str(name) {
Ok(s) => {
ret.put(Ok(obj.get_proc_addr(&s)));
},
Err(e) => {
ret.put(Err(e))
}
}
}
/// Calls the seal operation, which is a one time action that prepares the host to save states.
#[no_mangle]
pub extern fn wbx_seal(obj: &mut ActivatedWaterboxHost, ret: &mut Return<()>) {
ret.put(obj.seal());
}
/// Mounts a file in the environment. All data will be immediately consumed from the reader, which will not be used after this call.
/// To prevent nondeterminism, adding and removing files is very limited WRT savestates. If a file is writable, it must never exist
/// when save_state is called, and can only be used for transient operations. If a file is readable, it can appear in savestates,
/// but it must exist in every savestate and the exact sequence of add_file calls must be consistent from savestate to savestate.
#[no_mangle]
pub extern fn wbx_mount_file(obj: &mut ActivatedWaterboxHost, name: *const c_char, reader: &mut CReader, writable: bool, ret: &mut Return<()>) {
let res: anyhow::Result<()> = (|| {
obj.mount_file(arg_to_str(name)?, read_whole_file(reader)?, writable)?;
Ok(())
})();
ret.put(res);
}
/// Remove a file previously added. Writer is optional; if provided, the contents of the file at time of removal will be dumped to it.
/// It is an error to remove a file which is currently open in the guest.
#[no_mangle]
pub extern fn wbx_unmount_file(obj: &mut ActivatedWaterboxHost, name: *const c_char, writer: Option<&mut CWriter>, ret: &mut Return<()>) {
let res: anyhow::Result<()> = (|| {
let data = obj.unmount_file(&arg_to_str(name)?)?;
if let Some(w) = writer {
std::io::copy(&mut &data[..], w)?;
}
Ok(())
})();
ret.put(res);
}
/// Save state. Must not be called before seal. Must not be called with any writable files mounted.
/// Must always be called with the same sequence and contents of readonly files.
#[no_mangle]
pub extern fn wbx_save_state(obj: &mut ActivatedWaterboxHost, writer: &mut CWriter, ret: &mut Return<()>) {
ret.put(obj.save_state(writer));
}
/// Load state. Must not be called before seal. Must not be called with any writable files mounted.
/// Must always be called with the same sequence and contents of readonly files that were in the save state.
/// Must be called with the same wbx executable and memory layout as in the savestate.
/// Errors generally poison the environment; sorry!
#[no_mangle]
pub extern fn wbx_load_state(obj: &mut ActivatedWaterboxHost, reader: &mut CReader, ret: &mut Return<()>) {
ret.put(obj.load_state(reader));
}

View File

@ -0,0 +1,208 @@
use goblin;
use goblin::elf64::{sym::*, section_header::*};
use crate::*;
use crate::memory_block::ActivatedMemoryBlock;
use crate::memory_block::Protection;
use std::collections::HashMap;
/// Special system import area
const IMPORTS_OBJECT_NAME: &str = "__wbxsysarea";
/// Section names that are not marked as readonly, but we'll make them readonly anyway
fn section_name_is_readonly(name: &str) -> bool {
name.contains(".rel.ro")
|| name.starts_with(".got")
|| name == ".init_array"
|| name == ".fini_array"
|| name == ".tbss"
|| name == ".sealed"
}
pub struct SectionInfo {
name: String,
addr: AddressRange,
}
pub struct ElfLoader {
sections: Vec<SectionInfo>,
exports: HashMap<String, AddressRange>,
entry_point: usize,
hash: Vec<u8>,
import_area: AddressRange,
}
impl ElfLoader {
pub fn new(data: &[u8],
module_name: &str,
layout: &WbxSysLayout,
b: &mut ActivatedMemoryBlock
) -> anyhow::Result<ElfLoader> {
let wbx = goblin::elf::Elf::parse(data)?;
let start = wbx.program_headers.iter()
.map(|x| x.vm_range().start)
.min()
.unwrap();
let end = wbx.program_headers.iter()
.map(|x| x.vm_range().end)
.max()
.unwrap();
if start < layout.elf.start || end > layout.elf.end() {
return Err(anyhow!("{} from {}..{} did not fit in the provided region", module_name, start, end))
}
println!("Mouting `{}` @{:x}", module_name, start);
let mut sections = Vec::new();
for section in wbx.section_headers.iter() {
let name = match wbx.shdr_strtab.get(section.sh_name) {
Some(Ok(s)) => s,
_ => "<anon>"
};
println!(" @{:x} {}{}{} `{}` {} bytes",
section.sh_addr,
if section.sh_flags & (SHF_ALLOC as u64) != 0 { "R" } else { " " },
if section.sh_flags & (SHF_WRITE as u64) != 0 { "W" } else { " " },
if section.sh_flags & (SHF_EXECINSTR as u64) != 0 { "X" } else { " " },
name,
section.sh_size
);
if section.sh_type != SHT_NOBITS
&& name != "<anon>"
&& section.sh_addr != 0 {
let si = SectionInfo {
name: name.to_string(),
addr: AddressRange {
start: section.sh_addr as usize,
size: section.sh_size as usize
}
};
sections.push(si);
}
}
let mut exports = HashMap::new();
let mut import_area_opt = None;
for sym in wbx.syms.iter() {
let name = match wbx.strtab.get(sym.st_name) {
Some(Ok(s)) => s,
_ => continue
};
if sym.st_visibility() == STV_DEFAULT && sym.st_bind() == STB_GLOBAL {
exports.insert(
name.to_string(),
AddressRange { start: sym.st_value as usize, size: sym.st_size as usize }
);
}
if name == IMPORTS_OBJECT_NAME {
import_area_opt = Some(AddressRange { start: sym.st_value as usize, size: sym.st_size as usize });
}
}
let import_area = match import_area_opt {
Some(i) => {
if i.size != std::mem::size_of::<WbxSysArea>() {
return Err(anyhow!("Symbol {} is the wrong size", IMPORTS_OBJECT_NAME))
}
i
},
None => return Err(anyhow!("Symbol {} is missing", IMPORTS_OBJECT_NAME))
};
{
let invis_opt = sections.iter().find(|x| x.name == ".invis");
if let Some(invis) = invis_opt {
let any_below = sections.iter().any(|x| x.addr.align_expand().end() > invis.addr.align_expand().start);
let any_above = sections.iter().any(|x| x.addr.align_expand().start < invis.addr.align_expand().end());
if any_below || any_above {
return Err(anyhow!("Overlap between .invis and other sections -- check linkscript."));
}
b.mark_invisible(invis.addr.align_expand())?;
}
}
b.mark_invisible(layout.invis)?;
for segment in wbx.program_headers.iter() {
let addr = AddressRange {
start: segment.vm_range().start,
size: segment.vm_range().end - segment.vm_range().start
};
let prot_addr = addr.align_expand();
let prot = match (segment.is_read(), segment.is_write(), segment.is_executable()) {
(false, false, false) => Protection::None,
(true, false, false) => Protection::R,
(_, false, true) => Protection::RX,
(_, true, false) => Protection::RW,
(_, true, true) => Protection::RWX
};
b.mmap_fixed(prot_addr, prot)?;
unsafe {
let src = &data[segment.file_range()];
let dst = AddressRange { start: addr.start, size: segment.file_range().end - segment.file_range().start }.slice_mut();
dst.copy_from_slice(src);
}
}
Ok(ElfLoader {
sections,
exports,
entry_point: wbx.entry as usize,
hash: bin::hash(data),
import_area
})
}
pub fn seal(&self, b: &mut ActivatedMemoryBlock) {
for section in self.sections.iter() {
if section_name_is_readonly(section.name.as_str()) {
b.mprotect(section.addr, Protection::R).unwrap();
}
}
}
pub fn connect_syscalls(&mut self, _b: &mut ActivatedMemoryBlock, sys: &WbxSysArea) {
let addr = self.import_area;
unsafe { *(addr.start as *mut WbxSysArea) = *sys; }
}
pub fn clear_syscalls(&mut self, _b: &mut ActivatedMemoryBlock) {
let addr = self.import_area;
unsafe { addr.zero(); }
}
pub fn native_init(&mut self, _b: &mut ActivatedMemoryBlock) {
println!("Calling _start()");
unsafe {
std::mem::transmute::<usize, extern "win64" fn() -> ()>(self.entry_point)();
}
}
pub fn co_clean(&mut self, _b: &mut ActivatedMemoryBlock) {
match self.get_proc_addr("co_clean") {
0 => (),
ptr => {
println!("Calling co_clean()");
unsafe {
std::mem::transmute::<usize, extern "win64" fn() -> ()>(ptr)();
}
},
}
}
pub fn get_proc_addr(&self, proc: &str) -> usize {
match self.exports.get(proc) {
Some(addr) => addr.start,
None => 0,
}
}
}
const MAGIC: &str = "ElfLoader";
impl IStateable for ElfLoader {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
bin::write_magic(stream, MAGIC)?;
bin::write_hash(stream, &self.hash[..])?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
bin::verify_magic(stream, MAGIC)?;
bin::verify_hash(stream, &self.hash[..])?;
Ok(())
}
}

View File

@ -0,0 +1,48 @@
use crate::syscall_defs::*;
use crate::*;
use std::io::{Write, Read};
use super::*;
/// stdin
pub struct EmptyRead {
}
impl IStateable for EmptyRead {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
bin::write_magic(stream, "EmptyRead")?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
bin::verify_magic(stream, "EmptyRead")?;
Ok(())
}
}
impl FileObject for EmptyRead {
fn can_read(&self) -> bool {
true
}
fn read(&mut self, _buf: &mut [u8]) -> Result<i64, SyscallError> {
Ok(0)
}
fn can_write(&self) -> bool {
false
}
fn write(&mut self, _buf: &[u8]) -> Result<i64, SyscallError> {
Err(EBADF)
}
fn seek(&mut self, _offset: i64, _whence: i32) -> Result<i64, SyscallError> {
Err(ESPIPE)
}
fn truncate(&mut self, _size: i64) -> SyscallResult {
Err(EINVAL)
}
fn stat(&self, statbuff: &mut KStat) -> SyscallResult {
fill_stat(statbuff, true, false, false, 0)
}
fn can_unmount(&self) -> bool {
false
}
fn unmount(self: Box<Self>) -> Vec<u8> {
panic!()
}
fn reset(&mut self) {}
}

View File

@ -0,0 +1,345 @@
mod empty_read;
mod sys_out;
mod regular_file;
use crate::syscall_defs::*;
use crate::*;
use std::io::{Write, Read};
use empty_read::EmptyRead;
use sys_out::SysOutObj;
use regular_file::RegularFile;
#[derive(Clone, Copy, PartialEq, Eq)]
#[repr(transparent)]
pub struct FileDescriptor(pub i32);
const BAD_FD: FileDescriptor = FileDescriptor(-1);
pub trait FileObject: IStateable {
fn stat(&self, statbuff: &mut KStat) -> SyscallResult;
fn truncate(&mut self, size: i64) -> SyscallResult;
fn can_read(&self) -> bool;
fn read(&mut self, buf: &mut [u8]) -> Result<i64, SyscallError>;
fn can_write(&self) -> bool;
fn write(&mut self, buf: &[u8]) -> Result<i64, SyscallError>;
fn seek(&mut self, offset: i64, whence: i32) -> Result<i64, SyscallError>;
fn reset(&mut self);
fn can_unmount(&self) -> bool;
fn unmount(self: Box<Self>) -> Vec<u8>;
}
fn fill_stat(s: &mut KStat, can_read: bool, can_write: bool, can_seek: bool, length: i64) -> SyscallResult {
s.st_dev = 1;
s.st_ino = 1;
s.st_nlink = 0;
let mut flags = 0;
if can_read {
flags |= S_IRUSR | S_IRGRP | S_IROTH;
}
if can_write {
flags |= S_IWUSR | S_IWGRP | S_IWOTH;
}
if can_seek {
flags |= S_IFREG;
} else {
flags |= S_IFIFO;
}
s.st_mode = flags;
s.st_uid = 0;
s.st_gid = 0;
s.__pad0 = 0;
s.st_rdev = 0;
if can_seek {
s.st_size = length;
} else {
s.st_size = 0;
}
s.st_blksize = 4096;
s.st_blocks = (s.st_size + 511) / 512;
s.st_atime_sec = 1262304000000;
s.st_atime_nsec = 1000000000 / 2;
s.st_mtime_sec = 1262304000000;
s.st_mtime_nsec = 1000000000 / 2;
s.st_ctime_sec = 1262304000000;
s.st_ctime_nsec = 1000000000 / 2;
Ok(())
}
struct MountedFile {
name: String,
fd: FileDescriptor,
obj: Box<dyn FileObject>,
}
impl IStateable for MountedFile {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
bin::write_magic(stream, "MountedFile")?;
bin::write_magic(stream, &self.name)?;
bin::write(stream, &self.fd)?;
self.obj.save_state(stream)?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
bin::verify_magic(stream, "MountedFile")?;
bin::verify_magic(stream, &self.name)?;
bin::read(stream, &mut self.fd)?;
self.obj.load_state(stream)?;
Ok(())
}
}
pub struct FileSystem {
files: Vec<MountedFile>,
}
impl FileSystem {
pub fn new() -> FileSystem {
FileSystem {
files: vec![
MountedFile {
name: "/dev/stdin".to_string(),
fd: FileDescriptor(0),
obj: Box::new(EmptyRead {})
},
MountedFile {
name: "/dev/stdout".to_string(),
fd: FileDescriptor(1),
obj: Box::new(SysOutObj { host_handle: Box::new(std::io::stdout()) })
},
MountedFile {
name: "/dev/stderr".to_string(),
fd: FileDescriptor(2),
obj: Box::new(SysOutObj { host_handle: Box::new(std::io::stderr()) })
},
],
}
}
/// Accept a file from the outside world. Writable files may never appear in a savestate,
/// and readonly files must not be added or removed from savestate to savestate, so all uses
/// are either transient or read only resources that last for the life of emulation.
pub fn mount(&mut self, name: String, data: Vec<u8>, writable: bool) -> anyhow::Result<()> {
if self.files.iter().any(|f| f.name == name) {
return Err(anyhow!("File with name {} already mounted.", name))
}
self.files.push(MountedFile {
name: name.to_string(),
fd: BAD_FD,
obj: Box::new(RegularFile::new(data, writable))
});
Ok(())
}
/// Remove a file previously loaded with mount(). Returns the content of the file at this time.
/// Not possible if the guest has yet to close the file.
pub fn unmount(&mut self, name: &str) -> anyhow::Result<Vec<u8>> {
let idx = match self.files.iter().position(|f| f.name == name) {
Some(f) => f,
None => return Err(anyhow!("File with name {} not previously mounted.", name))
};
let file = &self.files[idx];
if file.fd != BAD_FD {
return Err(anyhow!("File {} is still open in the system", name))
}
if !file.obj.can_unmount() {
return Err(anyhow!("File {} cannot be unmounted as it is permanently attached", name))
}
Ok(self.files.remove(idx).obj.unmount())
}
/// Implements a subset of open(2)
pub fn open(&mut self, name: &str, flags: i32, _mode: i32) -> Result<FileDescriptor, SyscallError> {
// TODO: Missing file callback
let fd = {
let mut i = 0;
loop {
if !self.files.iter().any(|f| f.fd.0 == i) {
break FileDescriptor(i)
}
i += 1;
}
};
let file = match self.files.iter_mut().find(|f| f.name == name) {
Some(f) => f,
None => return Err(ENOENT)
};
if file.fd != BAD_FD {
return Err(EACCES)
}
// TODO: We should be doing more with flags and mode
match flags & O_ACCMODE {
O_RDONLY => {
if !file.obj.can_read() {
return Err(EACCES)
}
}
O_WRONLY => {
if !file.obj.can_write() {
return Err(EACCES)
}
},
O_RDWR => {
if !file.obj.can_read() || !file.obj.can_write() {
return Err(EACCES)
}
},
_ => return Err(EINVAL)
}
// TODO: If the requested access was R on an RW file (transient), we still allow writing once opened
file.fd = fd;
Ok(fd)
}
/// Implements a subset of close(2)
pub fn close(&mut self, fd: FileDescriptor) -> SyscallResult {
let file = match self.files.iter_mut().find(|f| f.fd == fd) {
Some(f) => f,
None => return Err(EBADF)
};
file.obj.reset();
file.fd = BAD_FD;
Ok(())
}
fn wrap_action<T, P: FnOnce(&mut dyn FileObject) -> Result<T, SyscallError>>(&mut self, name: &str, action: P) -> Result<T, SyscallError> {
match self.files.iter_mut().find(|f| f.name == name) {
Some(f) => action(f.obj.as_mut()),
None => Err(ENOENT)
}
}
fn wrap_faction<T, P: FnOnce(&mut dyn FileObject) -> Result<T, SyscallError>>(&mut self, fd: FileDescriptor, action: P) -> Result<T, SyscallError> {
match self.files.iter_mut().find(|f| f.fd == fd) {
Some(f) => action(f.obj.as_mut()),
None => Err(ENOENT)
}
}
/// Implements a subset of stat(2)
pub fn stat(&mut self, name: &str, statbuff: &mut KStat) -> SyscallResult {
self.wrap_action(name, |f| f.stat(statbuff))
}
/// Implements a subset of fstat(2)
pub fn fstat(&mut self, fd: FileDescriptor, statbuff: &mut KStat) -> SyscallResult {
self.wrap_faction(fd, |f| f.stat(statbuff))
}
/// Implements a subset of truncate(2)
pub fn truncate(&mut self, name: &str, size: i64) -> SyscallResult {
self.wrap_action(name, |f| f.truncate(size))
}
/// Implements a subset of ftruncate(2)
pub fn ftruncate(&mut self, fd: FileDescriptor, size: i64) -> SyscallResult {
self.wrap_faction(fd, |f| f.truncate(size))
}
/// Implements a subset of read(2)
pub fn read(&mut self, fd: FileDescriptor, buf: &mut [u8]) -> Result<i64, SyscallError> {
self.wrap_faction(fd, |f| f.read(buf))
}
/// Implements a subset of write(2)
pub fn write(&mut self, fd: FileDescriptor, buf: &[u8]) -> Result<i64, SyscallError> {
self.wrap_faction(fd, |f| f.write(buf))
}
/// Implements a subset of lseek(2)
pub fn seek(&mut self, fd: FileDescriptor, offset: i64, whence: i32) -> Result<i64, SyscallError> {
self.wrap_faction(fd, |f| f.seek(offset, whence))
}
}
impl IStateable for FileSystem {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
bin::write_magic(stream, "FileSystem")?;
for f in self.files.iter_mut() {
f.save_state(stream)?;
}
bin::write_magic(stream, "FileSystemEnd")?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
bin::verify_magic(stream, "FileSystem")?;
for f in self.files.iter_mut() {
f.load_state(stream)?;
}
bin::verify_magic(stream, "FileSystemEnd")?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
type TestResult = anyhow::Result<()>;
#[test]
fn test_create() -> TestResult {
let mut fs = FileSystem::new();
let mut state0 = Vec::new();
fs.save_state(&mut state0)?;
fs.load_state(&mut &state0[..])?;
Ok(())
}
#[test]
fn test_ro_state() -> TestResult {
let mut fs = FileSystem::new();
fs.mount("myfile".to_string(),
"The quick brown fox jumps over the lazy dog.".to_string().into_bytes(), false)?;
let fd = fs.open("myfile", O_RDONLY, 0)?;
assert_eq!(fd.0, 3);
let mut buff = vec![0u8; 8];
assert!(fs.write(fd, &buff[..]).is_err());
assert_eq!(fs.read(fd, &mut buff[..])?, 8);
assert_eq!(buff, "The quic".as_bytes());
let mut state0 = Vec::new();
fs.save_state(&mut state0)?;
assert_eq!(fs.read(fd, &mut buff[..])?, 8);
assert_eq!(buff, "k brown ".as_bytes());
fs.load_state(&mut &state0[..])?;
assert_eq!(fs.read(fd, &mut buff[..])?, 8);
assert_eq!(buff, "k brown ".as_bytes());
assert_eq!(fs.read(fd, &mut buff[..])?, 8);
assert_eq!(buff, "fox jump".as_bytes());
assert_eq!(fs.read(fd, &mut buff[..])?, 8);
assert_eq!(buff, "s over t".as_bytes());
assert_eq!(fs.read(fd, &mut buff[..])?, 8);
assert_eq!(buff, "he lazy ".as_bytes());
assert_eq!(fs.read(fd, &mut buff[..])?, 4);
assert_eq!(&buff[0..4], "dog.".as_bytes());
Ok(())
}
#[test]
fn test_negative() -> TestResult {
let mut fs = FileSystem::new();
assert!(fs.mount("/dev/stdin".to_string(), Vec::new(), false).is_err()); // overriding existing name
assert!(fs.unmount("oopopo").is_err()); // unmounting nonexistant file
assert!(fs.unmount("/dev/stdout").is_err()); // unmounting permanent file
fs.mount("oopopo".to_string(), Vec::new(), true)?;
let mut state0 = Vec::new();
assert!(fs.save_state(&mut state0).is_err()); // save state with transient file
state0.resize(0, 0);
fs.unmount("oopopo")?;
fs.mount("oopopo".to_string(), Vec::new(), false)?;
fs.save_state(&mut state0)?;
fs.unmount("oopopo")?;
assert!(fs.load_state(&mut &state0[..]).is_err()); // loading state with different list of files
// TODO: Our general contract is that after a failed loadstate, the entire core is poisoned.
// Can we do better? Should we do better?
Ok(())
}
#[test]
fn test_rw_unmount() -> TestResult {
let mut fs = FileSystem::new();
fs.mount("z".to_string(), Vec::new(), true)?;
let fd = fs.open("z", O_RDWR, 0)?;
fs.write(fd, "Big test".as_bytes())?;
fs.seek(fd, 0, SEEK_SET)?;
fs.write(fd, "Q".as_bytes())?;
fs.seek(fd, 2, SEEK_CUR)?;
fs.write(fd, ")".as_bytes())?;
fs.seek(fd, -1, SEEK_END)?;
fs.write(fd, "$$$$".as_bytes())?;
let mut statbuff = Box::new(KStat::default());
fs.fstat(fd, statbuff.as_mut())?;
assert_eq!(statbuff.st_size, 11);
fs.close(fd)?;
let vec = fs.unmount("z")?;
assert_eq!(vec, "Qig)tes$$$$".as_bytes());
Ok(())
}
}

View File

@ -0,0 +1,126 @@
use crate::syscall_defs::*;
use crate::*;
use std::io::{Write, Read};
use super::*;
/// A file whose content is in memory and managed by the waterbox host
pub struct RegularFile {
data: Vec<u8>,
hash: Option<Vec<u8>>,
position: usize,
}
impl RegularFile {
pub fn new(data: Vec<u8>, writable: bool) -> RegularFile {
let hash = if writable {
None
} else {
Some(bin::hash(&data[..]))
};
RegularFile {
data,
hash,
position: 0,
}
}
}
impl IStateable for RegularFile {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
match &self.hash {
Some(hash) => {
bin::write_magic(stream, "RegularFile")?;
bin::write_hash(stream, &hash[..])?;
bin::write(stream, &self.position)?;
Ok(())
},
None => Err(anyhow!("Cannot save state while transient files are active"))
}
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
match &self.hash {
Some(hash) => {
bin::verify_magic(stream, "RegularFile")?;
bin::verify_hash(stream, &hash[..])?;
bin::read(stream, &mut self.position)?;
Ok(())
}
None => Err(anyhow!("Cannot load state while transient files are active"))
}
}
}
impl FileObject for RegularFile {
fn can_read(&self) -> bool {
true
}
fn read(&mut self, buf: &mut [u8]) -> Result<i64, SyscallError> {
let n = std::cmp::min(buf.len(), self.data.len() - self.position);
let dst = &mut buf[0..n];
let src = &self.data[self.position..self.position + n];
dst.copy_from_slice(src);
self.position += n;
Ok(n as i64)
}
fn can_write(&self) -> bool {
match self.hash {
None => true,
Some(_) => false
}
}
fn write(&mut self, buf: &[u8]) -> Result<i64, SyscallError> {
if !self.can_write() {
return Err(EBADF)
}
let n = buf.len();
let newpos = self.position + n;
if newpos > self.data.len() {
self.data.resize(newpos, 0);
}
let dst = &mut self.data[self.position..newpos];
dst.copy_from_slice(buf);
self.position = newpos;
Ok(n as i64)
}
fn seek(&mut self, offset: i64, whence: i32) -> Result<i64, SyscallError> {
let newpos = match whence {
SEEK_SET => {
0
},
SEEK_CUR => {
self.position as i64 + offset
},
SEEK_END => {
self.data.len() as i64 + offset
}
_ => return Err(EINVAL)
};
if newpos < 0 || newpos > self.data.len() as i64 {
return Err(EINVAL)
}
self.position = newpos as usize;
Ok(newpos)
}
fn truncate(&mut self, size: i64) -> SyscallResult {
if !self.can_write() {
return Err(EBADF)
}
if size < 0 {
return Err(EINVAL)
}
self.data.resize(size as usize, 0);
self.position = std::cmp::min(self.position, size as usize);
Ok(())
}
fn reset(&mut self) {
self.position = 0;
}
fn stat(&self, statbuff: &mut KStat) -> SyscallResult {
fill_stat(statbuff, true, self.can_write(), true, self.data.len() as i64)
}
fn can_unmount(&self) -> bool {
true
}
fn unmount(self: Box<Self>) -> Vec<u8> {
self.data
}
}

View File

@ -0,0 +1,51 @@
use crate::syscall_defs::*;
use crate::*;
use std::io::{Write, Read};
use super::*;
/// stdout, stderr
pub struct SysOutObj {
pub host_handle: Box<dyn Write>,
}
impl IStateable for SysOutObj {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
bin::write_magic(stream, "SysOutObj")?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
bin::verify_magic(stream, "SysOutObj")?;
Ok(())
}
}
impl FileObject for SysOutObj {
fn can_read(&self) -> bool {
false
}
fn read(&mut self, _buf: &mut [u8]) -> Result<i64, SyscallError> {
Err(EBADF)
}
fn can_write(&self) -> bool {
true
}
fn write(&mut self, buf: &[u8]) -> Result<i64, SyscallError> {
// do not propogate host errors up to the waterbox!
let _ = self.host_handle.write_all(buf);
Ok(buf.len() as i64)
}
fn seek(&mut self, _offset: i64, _whence: i32) -> Result<i64, SyscallError> {
Err(ESPIPE)
}
fn truncate(&mut self, _size: i64) -> SyscallResult {
Err(EINVAL)
}
fn stat(&self, statbuff: &mut KStat) -> SyscallResult {
fill_stat(statbuff, false, true, false, 0)
}
fn can_unmount(&self) -> bool {
false
}
fn unmount(self: Box<Self>) -> Vec<u8> {
panic!()
}
fn reset(&mut self) {}
}

View File

@ -0,0 +1,313 @@
use crate::*;
use crate::{memory_block::ActivatedMemoryBlock, syscall_defs::*};
use memory_block::{MemoryBlock, Protection};
use std::{os::raw::c_char, ffi::CStr};
use fs::{FileDescriptor, FileSystem};
use elf::ElfLoader;
use cinterface::MemoryLayoutTemplate;
pub struct WaterboxHost {
fs: FileSystem,
program_break: usize,
elf: ElfLoader,
layout: WbxSysLayout,
memory_block: Box<MemoryBlock>,
active: bool,
sealed: bool,
}
impl WaterboxHost {
pub fn new(wbx: &[u8], module_name: &str, layout_template: &MemoryLayoutTemplate) -> anyhow::Result<Box<WaterboxHost>> {
let layout = layout_template.make_layout()?;
let mut memory_block = MemoryBlock::new(layout.all());
let mut b = memory_block.enter();
let elf = ElfLoader::new(wbx, module_name, &layout, &mut b)?;
let fs = FileSystem::new();
drop(b);
let mut res = Box::new(WaterboxHost {
fs,
program_break: layout.sbrk.start,
elf,
layout,
memory_block,
active: false,
sealed: false,
});
let mut active = res.activate();
active.h.elf.connect_syscalls(&mut active.b, &mut active.sys);
active.h.elf.native_init(&mut active.b);
drop(active);
Ok(res)
}
pub fn active(&self) -> bool {
self.active
}
pub fn activate(&mut self) -> Box<ActivatedWaterboxHost> {
let h = unsafe { &mut *(self as *mut WaterboxHost) };
let b = self.memory_block.enter();
let sys = WbxSysArea {
layout: self.layout,
syscall: WbxSysSyscall {
ud: 0,
syscall,
}
};
let mut res = Box::new(ActivatedWaterboxHost {
tag: TAG,
h,
b,
sys
});
res.sys.syscall.ud = res.as_mut() as *mut ActivatedWaterboxHost as usize;
res.h.active = true;
res
}
}
const TAG: u64 = 0xd01487803948acff;
pub struct ActivatedWaterboxHost<'a> {
tag: u64,
h: &'a mut WaterboxHost,
b: ActivatedMemoryBlock<'a>,
sys: WbxSysArea,
}
impl<'a> Drop for ActivatedWaterboxHost<'a> {
fn drop(&mut self) {
self.h.active = false;
}
}
impl<'a> ActivatedWaterboxHost<'a> {
pub fn get_proc_addr(&self, name: &str) -> usize {
self.h.elf.get_proc_addr(name)
}
fn check_sealed(&self) -> anyhow::Result<()> {
if !self.h.sealed {
Err(anyhow!("Not sealed!"))
} else {
Ok(())
}
}
pub fn seal(&mut self) -> anyhow::Result<()> {
if self.h.sealed {
return Err(anyhow!("Already sealed!"))
}
self.h.elf.clear_syscalls(&mut self.b);
self.h.elf.seal(&mut self.b);
self.h.elf.connect_syscalls(&mut self.b, &self.sys);
self.h.elf.co_clean(&mut self.b);
self.b.seal();
self.h.sealed = true;
Ok(())
}
pub fn mount_file(&mut self, name: String, data: Vec<u8>, writable: bool) -> anyhow::Result<()> {
self.h.fs.mount(name, data, writable)
}
pub fn unmount_file(&mut self, name: &str) -> anyhow::Result<Vec<u8>> {
self.h.fs.unmount(name)
}
}
const SAVE_START_MAGIC: &str = "ActivatedWaterboxHost_v1";
const SAVE_END_MAGIC: &str = "ʇsoHxoqɹǝʇɐMpǝʇɐʌᴉʇɔ∀";
impl<'a> IStateable for ActivatedWaterboxHost<'a> {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
self.check_sealed()?;
bin::write_magic(stream, SAVE_START_MAGIC)?;
self.h.fs.save_state(stream)?;
bin::write(stream, &self.h.program_break)?;
self.h.elf.save_state(stream)?;
self.b.save_state(stream)?;
bin::write_magic(stream, SAVE_END_MAGIC)?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
self.check_sealed()?;
bin::verify_magic(stream, SAVE_START_MAGIC)?;
self.h.fs.load_state(stream)?;
bin::read(stream, &mut self.h.program_break)?;
self.h.elf.load_state(stream)?;
self.b.load_state(stream)?;
bin::verify_magic(stream, SAVE_END_MAGIC)?;
self.h.elf.connect_syscalls(&mut self.b, &self.sys);
Ok(())
}
}
fn unimp(nr: SyscallNumber) -> SyscallResult {
eprintln!("Stopped on unimplemented syscall {}", lookup_syscall(&nr));
unsafe { std::intrinsics::breakpoint() }
Err(ENOSYS)
}
fn gethost<'a>(ud: usize) -> &'a mut ActivatedWaterboxHost<'a> {
let res = unsafe { &mut *(ud as *mut ActivatedWaterboxHost) };
if res.tag != TAG {
unsafe { std::intrinsics::breakpoint() }
std::process::abort();
}
res
}
fn arg_to_prot(arg: usize) -> Result<Protection, SyscallError> {
use Protection::*;
if arg != arg & (PROT_READ | PROT_WRITE | PROT_EXEC) {
Err(EINVAL)
} else if arg & PROT_EXEC != 0 {
if arg & PROT_WRITE != 0 {
Ok(RWX)
} else {
Ok(RX)
}
} else if arg & PROT_WRITE != 0 {
Ok(RW)
} else if arg & PROT_READ != 0 {
Ok(R)
} else {
Ok(None)
}
}
fn arg_to_fd(arg: usize) -> Result<FileDescriptor, SyscallError> {
if arg < 0x80000000 {
Ok(FileDescriptor(arg as i32))
} else {
Err(EBADFD)
}
}
fn arg_to_str(arg: usize) -> Result<String, SyscallError> {
let cs = unsafe { CStr::from_ptr(arg as *const c_char) };
match cs.to_str() {
Ok(s) => Ok(s.to_string()),
Err(_) => Err(EINVAL),
}
}
fn arg_to_statbuff<'a>(arg: usize) -> &'a mut KStat {
unsafe { &mut *(arg as *mut KStat) }
}
pub extern "win64" fn syscall(nr: SyscallNumber, ud: usize, a1: usize, a2: usize, a3: usize, a4: usize, _a5: usize, _a6: usize) -> SyscallReturn {
let mut h = gethost(ud);
match nr {
NR_MMAP => {
let prot = arg_to_prot(a3)?;
let flags = a4;
if flags & MAP_ANONYMOUS == 0 {
// anonymous + private is easy
// anonymous by itself is hard
// nothing needs either right now
return syscall_err(EOPNOTSUPP)
}
if flags & 0xf00 != 0 {
// various unsupported flags
return syscall_err(EOPNOTSUPP)
}
let arena_addr = h.sys.layout.mmap;
let res = h.b.mmap(AddressRange { start: a1, size: a2 }, prot, arena_addr)?;
syscall_ok(res)
},
NR_MREMAP => {
let arena_addr = h.sys.layout.mmap;
let res = h.b.mremap(AddressRange { start: a1, size: a2 }, a3, arena_addr)?;
syscall_ok(res)
},
NR_MPROTECT => {
let prot = arg_to_prot(a3)?;
let res = h.b.mprotect(AddressRange { start: a1, size: a2 }, prot);
syscall_ret(res)
},
NR_MUNMAP => syscall_ret(h.b.munmap(AddressRange { start: a1, size: a2 })),
NR_MADVISE => {
match a3 {
MADV_DONTNEED => syscall_ret(h.b.madvise_dontneed(AddressRange { start: a1, size: a2 })),
_ => syscall_ok(0),
}
},
NR_STAT => {
let name = arg_to_str(a1)?;
syscall_ret(h.h.fs.stat(&name, arg_to_statbuff(a2)))
},
NR_FSTAT => {
syscall_ret(h.h.fs.fstat(arg_to_fd(a1)?, arg_to_statbuff(a2)))
},
NR_IOCTL => syscall_ok(0),
NR_READ => {
unsafe {
syscall_ret_i64(h.h.fs.read(arg_to_fd(a1)?, std::slice::from_raw_parts_mut(a2 as *mut u8, a3)))
}
},
NR_WRITE => {
unsafe {
syscall_ret_i64(h.h.fs.write(arg_to_fd(a1)?, std::slice::from_raw_parts(a2 as *const u8, a3)))
}
},
NR_READV => {
let fd = arg_to_fd(a1)?;
unsafe {
let mut ret = 0;
let iov = std::slice::from_raw_parts_mut(a2 as *mut Iovec, a3);
for io in iov {
if io.iov_base != 0 {
ret += h.h.fs.read(fd, io.slice_mut())?;
}
}
syscall_ok(ret as usize)
}
},
NR_WRITEV => {
let fd = arg_to_fd(a1)?;
unsafe {
let mut ret = 0;
let iov = std::slice::from_raw_parts(a2 as *const Iovec, a3);
for io in iov {
if io.iov_base != 0 {
ret += h.h.fs.write(fd, io.slice())?;
}
}
syscall_ok(ret as usize)
}
},
NR_OPEN => {
syscall_ret_val(h.h.fs.open(&arg_to_str(a1)?, a2 as i32, a3 as i32).map(|x| x.0 as usize))
},
NR_CLOSE => syscall_ret(h.h.fs.close(arg_to_fd(a1)?)),
NR_LSEEK => syscall_ret_i64(h.h.fs.seek(arg_to_fd(a1)?, a2 as i64, a3 as i32)),
NR_TRUNCATE => syscall_ret(h.h.fs.truncate(&arg_to_str(a1)?, a2 as i64)),
NR_FTRUNCATE => syscall_ret(h.h.fs.ftruncate(arg_to_fd(a1)?, a2 as i64)),
// TODO: 99% sure nothing calls this
NR_SET_THREAD_AREA => syscall_err(ENOSYS),
// TODO: What calls this?
NR_SET_TID_ADDRESS => syscall_ok(8675309),
NR_CLOCK_GETTIME => {
let ts = a2 as *mut TimeSpec;
unsafe {
(*ts).tv_sec = 1495889068;
(*ts).tv_nsec = 0;
}
syscall_ok(0)
},
NR_BRK => {
// TODO: This could be done on the C side
let addr = h.sys.layout.sbrk;
let old = h.h.program_break;
let res = if a1 != align_down(a1) {
old
} else if a1 < addr.start || a1 > addr.end() {
old
} else if a1 > old {
h.b.mmap_fixed(AddressRange { start: old, size: a1 - old }, Protection::RW).unwrap();
a1
} else {
old
};
h.h.program_break = res;
syscall_ok(res)
},
_ => syscall_ret(unimp(nr)),
}
}

View File

@ -1,9 +1,13 @@
#![crate_type = "cdylib"]
// TODO: Turn this off once we've built the exported public API
#![feature(try_trait)]
#![feature(core_intrinsics)]
#![allow(dead_code)]
use std::io::{Read, Write, Error};
use std::io::{Read, Write};
use anyhow::anyhow;
use syscall_defs::{SyscallNumber, SyscallReturn};
const PAGESIZE: usize = 0x1000;
const PAGEMASK: usize = 0xfff;
@ -11,13 +15,19 @@ const PAGESHIFT: i32 = 12;
mod memory_block;
mod syscall_defs;
mod bin;
mod elf;
mod fs;
mod host;
mod cinterface;
pub trait IStateable {
fn save_sate(&mut self, stream: Box<dyn Write>) -> Result<(), Error>;
fn load_state(&mut self, stream: Box<dyn Read>) -> Result<(), Error>;
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()>;
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()>;
}
#[derive(Debug, Clone, Copy)]
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AddressRange {
pub start: usize,
pub size: usize,
@ -37,6 +47,72 @@ impl AddressRange {
pub unsafe fn slice_mut(&self) -> &'static mut [u8] {
std::slice::from_raw_parts_mut(self.start as *mut u8, self.size)
}
/// Unsafe: Pointers are unchecked and mut is not required (TODO: but why?)
pub unsafe fn zero(&self) {
std::ptr::write_bytes(self.start as *mut u8, 0, self.size);
}
/// Expands an address range to page alignment
pub fn align_expand(&self) -> AddressRange {
return AddressRange {
start: align_down(self.start),
size: align_up(self.end()) - align_down(self.start),
}
}
}
impl IStateable for AddressRange {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
bin::write(stream, &self.start)?;
bin::write(stream, &self.size)?;
Ok(())
}
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
bin::read(stream, &mut self.start)?;
bin::read(stream, &mut self.size)?;
Ok(())
}
}
fn align_down(p: usize) -> usize {
p & !PAGEMASK
}
fn align_up(p: usize) -> usize {
((p - 1) | PAGEMASK) + 1
}
/// Information about memory layout injected into the guest application
#[repr(C)]
#[derive(Copy, Clone)]
pub struct WbxSysLayout {
pub elf: AddressRange,
pub sbrk: AddressRange,
pub sealed: AddressRange,
pub invis: AddressRange,
pub plain: AddressRange,
pub mmap: AddressRange,
}
impl WbxSysLayout {
pub fn all(&self) -> AddressRange {
AddressRange {
start: self.elf.start,
size: self.mmap.end() - self.elf.start
}
}
}
/// Information for making syscalls injected into the guest application
#[repr(C)]
#[derive(Copy, Clone)]
pub struct WbxSysSyscall {
pub ud: usize,
pub syscall: extern "win64" fn(nr: SyscallNumber, ud: usize, a1: usize, a2: usize, a3: usize, a4: usize, a5: usize, a6: usize) -> SyscallReturn,
}
/// Data that is injected into the guest application
#[repr(C)]
#[derive(Copy, Clone)]
pub struct WbxSysArea {
pub layout: WbxSysLayout,
pub syscall: WbxSysSyscall,
}
#[cfg(test)]

View File

@ -1,16 +1,18 @@
mod pageblock;
mod pal;
mod tripguard;
mod tests;
use std::sync::MutexGuard;
use std::ops::{DerefMut, Deref};
use std::ops::DerefMut;
use pageblock::PageBlock;
use crate::*;
use getset::Getters;
use crate::syscall_defs::*;
use itertools::Itertools;
use std::io;
use std::sync::atomic::AtomicU32;
use crate::bin;
use sha2::{Sha256, Digest};
/// Tracks one lock for each 4GB memory area
mod lock_list {
@ -26,10 +28,12 @@ mod lock_list {
unsafe fn extend<T>(o: &T) -> &'static T {
std::mem::transmute::<&T, &'static T>(o)
}
/// adds a lock if it does not exist; no effect if it already does.
pub fn maybe_add(lock_index: u32) {
let map = &mut LOCK_LIST.lock().unwrap();
map.entry(lock_index).or_insert_with(|| Box::new(Mutex::new(None)));
}
/// Gets the lock for a particular index.
pub fn get(lock_index: u32) -> &'static Mutex<Option<MemoryBlockRef>> {
let map = &mut LOCK_LIST.lock().unwrap();
unsafe {
@ -38,13 +42,6 @@ mod lock_list {
}
}
fn align_down(p: usize) -> usize {
p & !PAGEMASK
}
fn align_up(p: usize) -> usize {
((p - 1) | PAGEMASK) + 1
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Protection {
None,
@ -73,8 +70,17 @@ impl PageAllocation {
_ => false,
}
}
pub fn readable(&self) -> bool {
use PageAllocation::*;
match self {
Allocated(Protection::None) => false,
Free => false,
_ => true,
}
}
}
/// Stores information about the original data content of a memory area, before it got dirty
#[derive(Debug)]
enum Snapshot {
None,
@ -89,6 +95,8 @@ struct Page {
/// if true, the page has changed from its original state
pub dirty: bool,
pub snapshot: Snapshot,
/// If true, the page content is not stored in states (but status still is).
pub invisible: bool,
}
impl Page {
pub fn new() -> Page {
@ -96,10 +104,12 @@ impl Page {
status: PageAllocation::Free,
dirty: false,
snapshot: Snapshot::ZeroFilled,
invisible: false,
}
}
/// Take a snapshot if one is not yet stored
/// unsafe: caller must ensure pages are mapped and addr is correct
/// Does not check dirty or invisible
pub unsafe fn maybe_snapshot(&mut self, addr: usize) {
if match self.snapshot { Snapshot:: None => true, _ => false } {
let mut snapshot = PageBlock::new();
@ -124,6 +134,67 @@ impl Page {
}
}
/// Used internally to talk about regions of memory together with their allocation status
struct PageRange<'a> {
pub start: usize,
pub pages: &'a mut [Page]
}
impl<'a> PageRange<'a> {
pub fn addr(&self) -> AddressRange {
AddressRange {
start: self.start,
size: self.pages.len() << PAGESHIFT
}
}
pub fn split_at_size(&mut self, size: usize) -> (PageRange, PageRange) {
let (sl, sr) = self.pages.split_at_mut(size >> PAGESHIFT);
(
PageRange {
start: self.start,
pages: sl
},
PageRange {
start: self.start + size,
pages: sr
}
)
}
pub fn iter(&self) -> std::slice::Iter<Page> {
self.pages.iter()
}
pub fn iter_mut(&mut self) -> std::slice::IterMut<Page> {
self.pages.iter_mut()
}
pub fn iter_with_addr(&self) -> impl Iterator<Item = (AddressRange, &Page)> {
let mut start = self.start;
self.pages.iter().map(move |p| {
let page_start = start;
start += PAGESIZE;
(AddressRange { start: page_start, size: PAGESIZE}, p)
})
}
pub fn iter_mut_with_addr(&mut self) -> impl Iterator<Item = (AddressRange, &mut Page)> {
let mut start = self.start;
self.pages.iter_mut().map(move |p| {
let page_start = start;
start += PAGESIZE;
(AddressRange { start: page_start, size: PAGESIZE}, p)
})
}
/// fuse two adjacent ranges. panics if they do not exactly touch
pub fn fuse(left: Self, right: Self) -> PageRange<'a> {
unsafe {
let lp = left.pages.as_mut_ptr();
let rp = right.pages.as_mut_ptr();
assert_eq!(lp.add(left.pages.len()), rp);
PageRange {
start: left.start,
pages: std::slice::from_raw_parts_mut(lp, left.pages.len() + right.pages.len())
}
}
}
}
static NEXT_DEBUG_ID: AtomicU32 = AtomicU32::new(0);
#[derive(Getters)]
@ -135,32 +206,28 @@ pub struct MemoryBlock {
addr: AddressRange,
#[get]
sealed: bool,
#[get]
hash: Vec<u8>,
lock_index: u32,
handle: pal::Handle,
lock_count: u32,
mutex_guard: Option<MutexGuard<'static, Option<MemoryBlockRef>>>,
debug_id: u32,
active: bool,
}
pub struct MemoryBlockGuard<'a> {
block: &'a mut MemoryBlock,
type BlockGuard = MutexGuard<'static, Option<MemoryBlockRef>>;
pub struct ActivatedMemoryBlock<'block> {
b: &'block mut MemoryBlock,
mutex_guard: Option<BlockGuard>,
}
impl<'a> Drop for MemoryBlockGuard<'a> {
impl<'block> Drop for ActivatedMemoryBlock<'block> {
fn drop(&mut self) {
self.block.deactivate();
}
}
impl<'a> Deref for MemoryBlockGuard<'a> {
type Target = MemoryBlock;
fn deref(&self) -> &MemoryBlock {
self.block
}
}
impl<'a> DerefMut for MemoryBlockGuard<'a> {
fn deref_mut(&mut self) -> &mut MemoryBlock {
self.block
unsafe {
let guard = std::mem::replace(&mut self.mutex_guard, None);
self.b.deactivate(guard.unwrap());
}
}
}
@ -188,13 +255,13 @@ impl MemoryBlock {
pages,
addr,
sealed: false,
hash: Vec::new(),
lock_index,
handle,
lock_count: 0,
mutex_guard: None,
debug_id,
active: false,
});
// res.trace("new");
res
@ -203,80 +270,77 @@ impl MemoryBlock {
pub fn trace(&self, name: &str) {
let ptr = unsafe { std::mem::transmute::<&Self, usize>(self) };
let tid = unsafe { std::mem::transmute::<std::thread::ThreadId, u64>(std::thread::current().id()) };
eprintln!("{}#{} {} [{}]@[{}] thr{}",
name, self.debug_id, ptr, self.lock_count, self.lock_index, tid)
eprintln!("{}#{} {} [{}] thr{}",
name, self.debug_id, ptr, self.lock_index, tid)
}
pub fn enter(&mut self) -> MemoryBlockGuard {
self.activate();
MemoryBlockGuard {
block: self,
pub fn enter(&mut self) -> ActivatedMemoryBlock {
unsafe {
let mutex_guard = self.activate();
ActivatedMemoryBlock {
b: self,
mutex_guard: Some(mutex_guard),
}
}
}
/// lock self, and potentially swap this block into memory
pub fn activate(&mut self) {
/// lock memory region and potentially swap this block into memory
unsafe fn activate(&mut self) -> BlockGuard {
// self.trace("activate");
unsafe {
if !self.active() {
let area = lock_list::get(self.lock_index);
let mut guard = area.lock().unwrap();
assert!(!self.active);
let area = lock_list::get(self.lock_index);
let mut guard = area.lock().unwrap();
let other_opt = guard.deref_mut();
match *other_opt {
Some(MemoryBlockRef(other)) => {
if other != self {
assert!(!(*other).active());
(*other).swapout();
self.swapin();
*other_opt = Some(MemoryBlockRef(self));
}
},
None => {
self.swapin();
*other_opt = Some(MemoryBlockRef(self));
}
let other_opt = guard.deref_mut();
match *other_opt {
Some(MemoryBlockRef(other)) => {
if other != self {
assert!(!(*other).active);
(*other).swapout();
self.swapin();
*other_opt = Some(MemoryBlockRef(self));
}
self.mutex_guard = Some(guard);
},
None => {
self.swapin();
*other_opt = Some(MemoryBlockRef(self));
}
self.lock_count += 1;
}
self.active = true;
guard
}
/// unlock self, and potentially swap this block out of memory
pub fn deactivate(&mut self) {
/// unlock memory region, and potentially swap this block out of memory
#[allow(unused_variables)] // unused stuff in release mode only
#[allow(unused_mut)]
unsafe fn deactivate(&mut self, mut guard: BlockGuard) {
// self.trace("deactivate");
unsafe {
assert!(self.active());
self.lock_count -= 1;
if !self.active() {
let mut guard = std::mem::replace(&mut self.mutex_guard, None).unwrap();
#[cfg(debug_assertions)]
{
// in debug mode, forcibly evict to catch dangling pointers
let other_opt = guard.deref_mut();
match *other_opt {
Some(MemoryBlockRef(other)) => {
if other != self {
panic!();
}
self.swapout();
*other_opt = None;
},
None => {
panic!()
}
assert!(self.active);
#[cfg(debug_assertions)]
{
// in debug mode, forcibly evict to catch dangling pointers
let other_opt = guard.deref_mut();
match *other_opt {
Some(MemoryBlockRef(other)) => {
if other != self {
panic!();
}
self.swapout();
*other_opt = None;
},
None => {
panic!()
}
}
}
self.active = false;
}
unsafe fn swapin(&mut self) {
// self.trace("swapin");
assert!(pal::map(&self.handle, self.addr));
tripguard::register(self);
MemoryBlock::refresh_protections(self.addr.start, self.pages.as_slice());
self.refresh_all_protections();
}
unsafe fn swapout(&mut self) {
// self.trace("swapout");
@ -285,34 +349,14 @@ impl MemoryBlock {
tripguard::unregister(self);
}
pub fn active(&self) -> bool {
self.lock_count > 0
}
}
impl Drop for MemoryBlock {
fn drop(&mut self) {
// self.trace("drop");
assert!(!self.active());
let area = lock_list::get(self.lock_index);
let mut guard = area.lock().unwrap();
let other_opt = guard.deref_mut();
match *other_opt {
Some(MemoryBlockRef(other)) => {
if other == self {
unsafe { self.swapout(); }
*other_opt = None;
}
},
None => ()
fn page_range(&mut self) -> PageRange {
PageRange {
start: self.addr.start,
pages: &mut self.pages[..]
}
let h = std::mem::replace(&mut self.handle, pal::bad());
unsafe { pal::close(h); }
}
}
impl MemoryBlock {
fn validate_range(&mut self, addr: AddressRange) -> Result<&mut [Page], i32> {
fn validate_range(&mut self, addr: AddressRange) -> Result<PageRange, SyscallError> {
if addr.start < self.addr.start
|| addr.end() > self.addr.end()
|| addr.size == 0
@ -322,16 +366,22 @@ impl MemoryBlock {
} else {
let pstart = (addr.start - self.addr.start) >> PAGESHIFT;
let psize = (addr.size) >> PAGESHIFT;
Ok(&mut self.pages[pstart..pstart + psize])
Ok(PageRange {
start: addr.start,
pages: &mut self.pages[pstart..pstart + psize]
})
}
}
fn refresh_protections(mut start: usize, pages: &[Page]) {
/// Refresh the correct protections in underlying host RAM on a page range. Use after
/// temporary pal::protect(...) modifications, or to apply the effect of a dirty/prot change on the page
fn refresh_protections(range: &PageRange) {
struct Chunk {
addr: AddressRange,
prot: Protection,
};
let chunks = pages.iter()
let mut start = range.start;
let chunks = range.iter()
.map(|p| {
let cstart = start;
start += PAGESIZE;
@ -356,16 +406,21 @@ impl MemoryBlock {
}
}
fn set_protections(start: usize, pages: &mut [Page], status: PageAllocation) {
for p in pages.iter_mut() {
fn refresh_all_protections(&mut self) {
MemoryBlock::refresh_protections(&self.page_range())
}
/// Applies new protections to a pagerange, including special RWStack handling on Windows
fn set_protections(range: &mut PageRange, status: PageAllocation) {
for p in range.iter_mut() {
p.status = status;
}
MemoryBlock::refresh_protections(start, pages);
MemoryBlock::refresh_protections(&range);
#[cfg(windows)]
if status == PageAllocation::Allocated(Protection::RWStack) {
// have to precapture snapshots here
let mut addr = start;
for p in pages {
let mut addr = range.start;
for p in range.iter_mut() {
unsafe {
p.maybe_snapshot(addr);
}
@ -399,75 +454,377 @@ impl MemoryBlock {
}
}
}
}
/// implements a subset of mmap(2)
impl Drop for MemoryBlock {
fn drop(&mut self) {
// self.trace("drop");
let area = lock_list::get(self.lock_index);
let mut guard = area.lock().unwrap();
let other_opt = guard.deref_mut();
match *other_opt {
Some(MemoryBlockRef(other)) => {
if other == self {
unsafe { self.swapout(); }
*other_opt = None;
}
},
None => ()
}
let h = std::mem::replace(&mut self.handle, pal::bad());
unsafe { pal::close(h); }
}
}
impl<'block> ActivatedMemoryBlock<'block> {
/// Looks for some free pages inside an arena
fn find_free_pages<'a>(arena: &'a mut PageRange<'a>, npages: usize) -> Result<PageRange<'a>, SyscallError> {
struct Chunk<'a> {
range: PageRange<'a>,
free: bool,
}
let range = arena.iter_mut_with_addr()
.map(|(a, p)| Chunk {
free: p.status == PageAllocation::Free,
range: PageRange { start: a.start, pages: std::slice::from_mut(p) },
})
.coalesce(|x, y| {
if x.free == y.free {
Ok(Chunk {
free: x.free,
range: PageRange::fuse(x.range, y.range)
})
} else {
Err((x, y))
}
})
.filter(|c| c.free && c.range.pages.len() >= npages)
.map(|c| c.range)
.sorted_by(|x, y| x.pages.len().cmp(&y.pages.len()))
.next();
match range {
Some(r) => {
if r.pages.len() == npages {
Ok(r)
} else {
Ok(PageRange {
start: r.start,
pages: &mut r.pages[0..npages]
})
}
},
None => Err(ENOMEM)
}
}
/// implements a subset of mmap(2) for anonymous, movable address mappings
fn mmap_movable(&mut self, size: usize, prot: Protection, arena_addr: AddressRange) -> Result<usize, SyscallError> {
if size != align_down(size) {
return Err(EINVAL)
}
let mut arena = self.b.validate_range(arena_addr).unwrap();
match ActivatedMemoryBlock::find_free_pages(&mut arena, size >> PAGESHIFT) {
Ok(mut range) => {
MemoryBlock::set_protections(&mut range, PageAllocation::Allocated(prot));
Ok(range.start)
},
Err(e) => Err(e),
}
}
/// implements a subset of mmap(2) for anonymous, fixed address mappings
pub fn mmap_fixed(&mut self, addr: AddressRange, prot: Protection) -> SyscallResult {
self.get_stack_dirty(); // not needed here technically?
let pages = self.validate_range(addr)?;
if pages.iter().any(|p| p.status != PageAllocation::Free) {
let mut range = self.b.validate_range(addr)?;
if range.iter().any(|p| p.status != PageAllocation::Free) {
// assume MAP_FIXED_NOREPLACE at all times
return Err(EEXIST)
}
MemoryBlock::set_protections(addr.start, pages, PageAllocation::Allocated(prot));
MemoryBlock::set_protections(&mut range, PageAllocation::Allocated(prot));
Ok(())
}
/// implements a subset of mremap(2) when MREMAP_MAYMOVE is not set, and MREMAP_FIXED is not
fn mremap_nomove(&mut self, addr: AddressRange, new_size: usize) -> SyscallResult {
self.b.get_stack_dirty();
if new_size > addr.size {
let full_addr = AddressRange { start: addr.start, size: new_size };
let mut range = self.b.validate_range(full_addr)?;
let (old_range, mut new_range) = range.split_at_size(addr.size);
if old_range.iter().any(|p| p.status == PageAllocation::Free) {
return Err(EINVAL)
}
if new_range.iter().any(|p| p.status != PageAllocation::Free) {
return Err(EEXIST)
}
MemoryBlock::set_protections(&mut new_range, old_range.pages[0].status);
Ok(())
} else {
let range = self.b.validate_range(addr)?;
if range.iter().any(|p| p.status == PageAllocation::Free) {
return Err(EINVAL)
}
self.munmap_impl(AddressRange { start: addr.start + new_size, size: addr.size - new_size }, false)
}
}
/// implements a subset of mremap(2) when MREMAP_MAYMOVE is set, and MREMAP_FIXED is not
fn mremap_maymove(&mut self, addr: AddressRange, new_size: usize, arena_addr: AddressRange) -> Result<usize, SyscallError> {
// This could be a lot more clever, but it's a difficult problem and doesn't come up often.
// So I use a "simple" solution here.
self.b.get_stack_dirty();
if new_size != align_down(new_size) {
return Err(EINVAL)
}
// save a copy of src, and unmap
let mut src = self.b.validate_range(addr)?;
if src.iter().any(|p| p.status == PageAllocation::Free) {
return Err(EINVAL)
}
let src_addr = src.addr();
let mut old_status = Vec::new();
old_status.reserve_exact(src.pages.len());
let mut old_data = vec![0u8; src_addr.size];
for p in src.iter() {
old_status.push(p.status);
}
unsafe {
pal::protect(src_addr, Protection::R);
old_data.copy_from_slice(src_addr.slice());
}
ActivatedMemoryBlock::free_pages_impl(&mut src, false);
// find new location to map to, and copy into there
let mut arena = self.b.validate_range(arena_addr).unwrap();
let mut dest = match ActivatedMemoryBlock::find_free_pages(&mut arena, new_size >> PAGESHIFT) {
Ok(r) => r,
Err(_) => {
// woops! reallocate at the old address.
// Or just panic because that probably won't happen
panic!("Failure in realloc")
},
};
let nbcopy = std::cmp::min(addr.size, new_size);
let npcopy = nbcopy >> PAGESHIFT;
unsafe {
pal::protect(dest.addr(), Protection::RW);
dest.addr().slice_mut()[0..nbcopy].copy_from_slice(&old_data[0..nbcopy]);
}
for (status, pdst) in old_status.iter().zip(dest.iter_mut()) {
pdst.status = *status;
// this is conservative; there are situations where dirty might be false,
// but we're unlikely to see them with real world realloc usage
pdst.dirty = true;
}
for pdst in dest.pages[npcopy..].iter_mut() {
pdst.status = old_status[0];
}
MemoryBlock::refresh_protections(&dest);
Ok(dest.start)
}
/// implements a subset of mprotect(2)
pub fn mprotect(&mut self, addr: AddressRange, prot: Protection) -> SyscallResult {
self.get_stack_dirty();
let pages = self.validate_range(addr)?;
if pages.iter().any(|p| p.status == PageAllocation::Free) {
self.b.get_stack_dirty();
let mut range = self.b.validate_range(addr)?;
if range.iter().any(|p| p.status == PageAllocation::Free) {
return Err(ENOMEM)
}
MemoryBlock::set_protections(addr.start, pages, PageAllocation::Allocated(prot));
MemoryBlock::set_protections(&mut range, PageAllocation::Allocated(prot));
Ok(())
}
/// implements a subset of munmap(2)
pub fn munmap(&mut self, addr: AddressRange) -> SyscallResult {
self.get_stack_dirty();
let pages = self.validate_range(addr)?;
if pages.iter().any(|p| p.status == PageAllocation::Free) {
self.munmap_impl(addr, false)
}
pub fn mmap(&mut self, addr: AddressRange, prot: Protection, arena_addr: AddressRange) -> Result<usize, SyscallError> {
if addr.size == 0 {
return Err(EINVAL)
}
if addr.start == 0 {
self.mmap_movable(addr.size, prot, arena_addr)
} else {
self.mmap_fixed(addr, prot)?;
Ok(addr.start)
}
}
pub fn mremap(&mut self, addr: AddressRange, new_size: usize, arena_addr: AddressRange) -> Result<usize, SyscallError> {
if addr.size == 0 || new_size == 0 {
return Err(EINVAL)
}
if addr.start == 0 {
self.mremap_maymove(addr, new_size, arena_addr)
} else {
self.mremap_nomove(addr, new_size)?;
Ok(addr.start)
}
}
/// release pages, assuming the range has been fully validated already
fn free_pages_impl(range: &mut PageRange, advise_only: bool) {
let addr = range.addr();
// we do not save the current state of unmapped pages, and if they are later remapped,
// the expectation is that they will start out as zero filled. accordingly, the most
// sensible way to do this is to zero them now
unsafe {
pal::protect(addr, Protection::RW);
std::ptr::write_bytes(addr.start as *mut u8, 0, addr.size);
addr.zero();
// simple state size optimization: we can undirty pages in this case depending on the initial state
for p in pages.iter_mut() {
p.dirty = match p.snapshot {
for p in range.iter_mut() {
p.dirty = !p.invisible && match p.snapshot {
Snapshot::ZeroFilled => false,
_ => true
};
}
}
MemoryBlock::set_protections(addr.start, pages, PageAllocation::Free);
if advise_only {
MemoryBlock::refresh_protections(range);
} else {
MemoryBlock::set_protections(range, PageAllocation::Free);
}
}
/// munmap or MADV_DONTNEED
fn munmap_impl(&mut self, addr: AddressRange, advise_only: bool) -> SyscallResult {
self.b.get_stack_dirty();
let mut range = self.b.validate_range(addr)?;
if range.iter().any(|p| p.status == PageAllocation::Free) {
return Err(EINVAL)
}
ActivatedMemoryBlock::free_pages_impl(&mut range, advise_only);
Ok(())
}
/// Marks an address range as invisible. Its page content will not be saved in states (but
/// their allocation status still will be.) Cannot be revoked. Must be done before sealing.
/// The pages need not be currently mapped; they will always be invisible regardless of that.
/// !!Not actually saved in states, as is assumed to be unchanging for a particular layout.!!
pub fn mark_invisible(&mut self, addr: AddressRange) -> SyscallResult {
// The limitations on this method are mostly because we want to not need a snapshot or dirty
// tracking for invisible pages. But if we didn't have one and later the pages became visible,
// we'd need one and wouldn't be able to reconstruct one.
assert!(!self.b.sealed);
let mut range = self.b.validate_range(addr)?;
for p in range.iter_mut() {
p.dirty = true;
p.invisible = true;
}
MemoryBlock::refresh_protections(&range);
Ok(())
}
/// implements a subset of madvise(2)
pub fn madvise_dontneed(&mut self, addr: AddressRange) -> SyscallResult {
self.munmap_impl(addr, true)
}
pub fn seal(&mut self) {
assert!(!self.sealed);
for p in self.pages.iter_mut() {
if p.dirty {
assert!(!self.b.sealed);
for p in self.b.pages.iter_mut() {
if p.dirty && !p.invisible {
p.dirty = false;
} else {
p.snapshot = Snapshot::ZeroFilled;
p.snapshot = Snapshot::None;
}
}
self.b.refresh_all_protections();
self.b.sealed = true;
self.b.hash = {
let mut hasher = Sha256::new();
bin::write(&mut hasher, &self.b.addr).unwrap();
for p in self.b.pages.iter() {
match &p.snapshot {
Snapshot::None => bin::writeval(&mut hasher, 1).unwrap(),
Snapshot::ZeroFilled => bin::writeval(&mut hasher, 2).unwrap(),
Snapshot::Data(d) => { hasher.write(d.slice()).unwrap(); },
}
}
hasher.finalize()[..].to_owned()
};
}
}
impl IStateable for MemoryBlock {
fn save_sate(&mut self, stream: Box<dyn Write>) -> Result<(), io::Error> {
assert!(self.sealed);
self.get_stack_dirty();
const MAGIC: &str = "ActivatedMemoryBlock";
impl<'block> IStateable for ActivatedMemoryBlock<'block> {
fn save_state(&mut self, stream: &mut dyn Write) -> anyhow::Result<()> {
if !self.b.sealed {
return Err(anyhow!("Must seal first"))
}
bin::write_magic(stream, MAGIC)?;
bin::write_hash(stream, &self.b.hash[..])?;
self.b.get_stack_dirty();
self.b.addr.save_state(stream)?;
for (paddr, p) in self.b.page_range().iter_with_addr() {
bin::write(stream, &p.status)?;
if !p.invisible {
bin::write(stream, &p.dirty)?;
if p.dirty {
unsafe {
if !p.status.readable() {
assert!(pal::protect(paddr, Protection::R));
}
stream.write_all(paddr.slice())?;
if !p.status.readable() {
assert!(pal::protect(paddr, Protection::None));
}
}
}
}
}
Ok(())
}
fn load_state(&mut self, stream: Box<dyn Read>) -> Result<(), io::Error> {
assert!(self.sealed);
self.get_stack_dirty();
fn load_state(&mut self, stream: &mut dyn Read) -> anyhow::Result<()> {
assert!(self.b.sealed);
bin::verify_magic(stream, MAGIC)?;
match bin::verify_hash(stream, &self.b.hash[..]) {
Ok(_) => (),
Err(_) => eprintln!("Unexpected MemoryBlock hash mismatch."),
}
self.b.get_stack_dirty();
{
let mut addr = AddressRange { start:0, size: 0 };
addr.load_state(stream)?;
if addr != self.b.addr {
return Err(anyhow!("Bad state data (addr) for ActivatedMemoryBlock"))
}
}
unsafe {
pal::protect(self.b.addr, Protection::RW);
for (paddr, p) in self.b.page_range().iter_mut_with_addr() {
let status = bin::readval::<PageAllocation>(stream)?;
if !p.invisible {
let dirty = bin::readval::<bool>(stream)?;
match (p.dirty, dirty) {
(false, false) => (),
(false, true) => {
p.maybe_snapshot(paddr.start);
stream.read_exact(paddr.slice_mut())?;
},
(true, false) => {
match &p.snapshot {
Snapshot::ZeroFilled => paddr.zero(),
Snapshot::Data(b) => {
std::ptr::copy_nonoverlapping(b.as_ptr(), paddr.start as *mut u8, PAGESIZE)
},
Snapshot::None => panic!("Missing snapshot for dirty region"),
}
}
(true, true) => {
stream.read_exact(paddr.slice_mut())?;
}
}
p.dirty = dirty;
}
p.status = status;
}
self.b.refresh_all_protections();
}
Ok(())
}
}
@ -482,127 +839,3 @@ impl Eq for MemoryBlock {}
#[derive(Debug)]
pub struct MemoryBlockRef(*mut MemoryBlock);
unsafe impl Send for MemoryBlockRef {}
#[cfg(test)]
mod tests {
use std::mem::transmute;
use super::*;
/// new / drop, activate / deactivate
#[test]
fn test_create() {
drop(MemoryBlock::new(AddressRange { start: 0x36300000000, size: 0x50000 }));
drop(MemoryBlock::new(AddressRange { start: 0x36b00000000, size: 0x2000 }));
{
let mut b = MemoryBlock::new(AddressRange { start: 0x36100000000, size: 0x65000 });
b.activate();
b.deactivate();
b.enter();
}
{
let mut b = MemoryBlock::new(AddressRange { start: 0x36e00000000, size: 0x5000 });
b.activate();
b.activate();
let mut guard = b.enter();
guard.activate();
guard.deactivate();
drop(guard);
b.deactivate();
b.deactivate();
b.enter();
}
}
/// simple test of dirt detection
#[test]
fn test_dirty() -> SyscallResult {
unsafe {
let addr = AddressRange { start: 0x36f00000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(addr, Protection::RW)?;
let ptr = g.addr.slice_mut();
ptr[0x2003] = 5;
assert!(g.pages[2].dirty);
Ok(())
}
}
/// dirt detection away from the start of a block
#[test]
fn test_offset() -> SyscallResult {
unsafe {
let addr = AddressRange { start: 0x36f00000000, size: 0x20000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(AddressRange { start: 0x36f00003000, size: 0x1000 }, Protection::RW)?;
let ptr = g.addr.slice_mut();
ptr[0x3663] = 12;
assert!(g.pages[3].dirty);
Ok(())
}
}
/// dirt detection in RWStack area when $rsp points there
#[test]
fn test_stk_norm() -> SyscallResult {
unsafe {
let addr = AddressRange { start: 0x36200000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(addr, Protection::RWStack)?;
let ptr = g.addr.slice_mut();
ptr[0xeeee] = 0xee;
ptr[0x44] = 0x44;
assert!(g.pages[0].dirty);
assert!(g.pages[14].dirty);
assert_eq!(ptr[0x8000], 0);
// This is an unfair test, but it's just documenting the current limitations of the system.
// Ideally, page 8 would be clean because we read from it but did not write to it.
// Due to limitations of RWStack tracking on windows, it is dirty.
#[cfg(windows)]
assert!(g.pages[8].dirty);
#[cfg(unix)]
assert!(!g.pages[8].dirty);
Ok(())
}
}
/// dirt detection in RWStack area when $rsp points there
#[test]
fn test_stack() -> SyscallResult {
use std::convert::TryInto;
unsafe {
let addr = AddressRange { start: 0x36f00000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(addr, Protection::RW)?;
let ptr = g.addr.slice_mut();
let mut i = 0;
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xe0 ; i += 1; // mov rax,rsp
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xfc ; i += 1; // mov rsp,rdi
ptr[i] = 0x50 ; i += 1; // push rax
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xc4 ; i += 1; // mov rsp,rax
ptr[i] = 0xb0 ; i += 1; ptr[i] = 0x2a ; i += 1; // mov al,0x2a
ptr[i] = 0xc3 ; // ret
g.mprotect(AddressRange { start: 0x36f00000000, size: 0x1000 }, Protection::RX)?;
g.mprotect(AddressRange { start: 0x36f00008000, size: 0x8000 }, Protection::RWStack)?;
let tmp_rsp = addr.end();
let res = transmute::<usize, extern "sysv64" fn(rsp: usize) -> u8>(addr.start)(tmp_rsp);
assert_eq!(res, 42);
assert!(g.pages[0].dirty);
assert!(!g.pages[1].dirty);
assert!(!g.pages[14].dirty);
assert!(g.pages[15].dirty);
let real_rsp = isize::from_le_bytes(ptr[addr.size - 8..].try_into().unwrap());
let current_rsp = &real_rsp as *const isize as isize;
assert!((real_rsp - current_rsp).abs() < 0x10000);
Ok(())
}
}
}

View File

@ -32,6 +32,12 @@ impl PageBlock {
std::slice::from_raw_parts_mut(self.ptr.as_ptr(), PAGESIZE)
}
}
pub fn as_ptr(&self) -> *const u8 {
self.ptr.as_ptr()
}
pub fn as_mut_ptr(&mut self) -> *mut u8 {
self.ptr.as_ptr()
}
}
impl Drop for PageBlock {

View File

@ -0,0 +1,411 @@
#![cfg(test)]
use std::mem::transmute;
use super::*;
type TestResult = anyhow::Result<()>;
/// new / drop, activate / deactivate
#[test]
fn test_create() {
// these tests don't test much anymore...
drop(MemoryBlock::new(AddressRange { start: 0x36300000000, size: 0x50000 }));
drop(MemoryBlock::new(AddressRange { start: 0x36b00000000, size: 0x2000 }));
{
let mut b = MemoryBlock::new(AddressRange { start: 0x36100000000, size: 0x65000 });
b.enter();
b.enter();
}
{
let mut b = MemoryBlock::new(AddressRange { start: 0x36e00000000, size: 0x5000 });
let guard = b.enter();
drop(guard);
b.enter();
}
}
/// simple test of dirt detection
#[test]
fn test_dirty() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36f00000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(addr, Protection::RW)?;
let ptr = g.b.addr.slice_mut();
ptr[0x2003] = 5;
assert!(g.b.pages[2].dirty);
Ok(())
}
}
/// dirt detection away from the start of a block
#[test]
fn test_offset() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36f00000000, size: 0x20000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(AddressRange { start: 0x36f00003000, size: 0x1000 }, Protection::RW)?;
let ptr = g.b.addr.slice_mut();
ptr[0x3663] = 12;
assert!(g.b.pages[3].dirty);
Ok(())
}
}
/// dirt detection in RWStack area when $rsp does not point there, and it was just a conventional write
#[test]
fn test_stk_norm() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36200000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(addr, Protection::RWStack)?;
let ptr = g.b.addr.slice_mut();
ptr[0xeeee] = 0xee;
ptr[0x44] = 0x44;
assert!(g.b.pages[0].dirty);
assert!(g.b.pages[14].dirty);
assert_eq!(ptr[0x8000], 0);
// This is an unfair test, but it's just documenting the current limitations of the system.
// Ideally, page 8 would be clean because we read from it but did not write to it.
// Due to limitations of RWStack tracking on windows, it is dirty.
#[cfg(windows)]
assert!(g.b.pages[8].dirty);
#[cfg(unix)]
assert!(!g.b.pages[8].dirty);
Ok(())
}
}
/// dirt detection in RWStack area when $rsp points there
#[test]
fn test_stack() -> TestResult {
use std::convert::TryInto;
unsafe {
let addr = AddressRange { start: 0x36f00000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(addr, Protection::RW)?;
let ptr = g.b.addr.slice_mut();
let mut i = 0;
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xe0 ; i += 1; // mov rax,rsp
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xfc ; i += 1; // mov rsp,rdi
ptr[i] = 0x50 ; i += 1; // push rax
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xc4 ; i += 1; // mov rsp,rax
ptr[i] = 0xb0 ; i += 1; ptr[i] = 0x2a ; i += 1; // mov al,0x2a
ptr[i] = 0xc3 ; // ret
g.mprotect(AddressRange { start: 0x36f00000000, size: 0x1000 }, Protection::RX)?;
g.mprotect(AddressRange { start: 0x36f00008000, size: 0x8000 }, Protection::RWStack)?;
let tmp_rsp = addr.end();
let res = transmute::<usize, extern "sysv64" fn(rsp: usize) -> u8>(addr.start)(tmp_rsp);
assert_eq!(res, 42);
assert!(g.b.pages[0].dirty);
assert!(!g.b.pages[1].dirty);
assert!(!g.b.pages[14].dirty);
assert!(g.b.pages[15].dirty);
let real_rsp = isize::from_le_bytes(ptr[addr.size - 8..].try_into().unwrap());
let current_rsp = &real_rsp as *const isize as isize;
assert!((real_rsp - current_rsp).abs() < 0x10000);
Ok(())
}
}
#[test]
fn test_state_basic() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36c00000000, size: 0x4000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
let ptr = g.b.addr.slice_mut();
g.mmap_fixed(addr, Protection::RW)?;
ptr[0x0000] = 20;
ptr[0x1000] = 40;
ptr[0x2000] = 60;
ptr[0x3000] = 80;
g.seal();
let mut state0 = Vec::new();
g.save_state(&mut state0)?;
// no pages should be in the state
assert!(state0.len() < 0x1000);
ptr[0x1000] = 100;
ptr[0x3000] = 44;
let mut state1 = Vec::new();
g.save_state(&mut state1)?;
// two pages should be in the state
assert!(state1.len() > 0x2000);
assert!(state1.len() < 0x3000);
g.load_state(&mut state0.as_slice())?;
assert_eq!(ptr[0x0000], 20);
assert_eq!(ptr[0x1000], 40);
assert_eq!(ptr[0x2000], 60);
assert_eq!(ptr[0x3000], 80);
g.load_state(&mut state1.as_slice())?;
assert_eq!(ptr[0x0000], 20);
assert_eq!(ptr[0x1000], 100);
assert_eq!(ptr[0x2000], 60);
assert_eq!(ptr[0x3000], 44);
Ok(())
}
}
#[test]
fn test_state_unreadable() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36c00000000, size: 0x1000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
let ptr = g.b.addr.slice_mut();
g.mmap_fixed(addr, Protection::RW)?;
g.seal();
ptr[200] = 200;
ptr[500] = 100;
g.mprotect(addr, Protection::None)?;
let mut state0 = Vec::new();
g.save_state(&mut state0)?;
g.mprotect(addr, Protection::RW)?;
ptr[300] = 50;
ptr[600] = 11;
g.mprotect(addr, Protection::None)?;
let mut state1 = Vec::new();
g.save_state(&mut state1)?;
g.load_state(&mut state0.as_slice())?;
g.mprotect(addr, Protection::R)?;
assert_eq!(ptr[200], 200);
assert_eq!(ptr[500], 100);
assert_eq!(ptr[300], 0);
assert_eq!(ptr[600], 0);
g.load_state(&mut state1.as_slice())?;
g.mprotect(addr, Protection::R)?;
assert_eq!(ptr[200], 200);
assert_eq!(ptr[500], 100);
assert_eq!(ptr[300], 50);
assert_eq!(ptr[600], 11);
Ok(())
}
}
#[test]
fn test_thready_stack() -> TestResult {
use std::sync::{Arc, Barrier};
use std::thread;
let barrier = Arc::new(Barrier::new(16));
let mut ress = Vec::<thread::JoinHandle<TestResult>>::new();
for i in 0..16 {
let blocker = barrier.clone();
ress.push(thread::spawn(move|| {
unsafe {
let addr = AddressRange { start: 0x36000000000 + i * 0x100000000, size: PAGESIZE * 2 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
blocker.wait();
g.mmap_fixed(addr, Protection::RWX)?;
g.mprotect(AddressRange { start: addr.start + PAGESIZE, size: PAGESIZE }, Protection::RWStack)?;
let ptr = g.b.addr.slice_mut();
let mut i = 0;
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xe0 ; i += 1; // mov rax,rsp
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xfc ; i += 1; // mov rsp,rdi
ptr[i] = 0x50 ; i += 1; // push rax
ptr[i] = 0x48 ; i += 1; ptr[i] = 0x89 ; i += 1; ptr[i] = 0xc4 ; i += 1; // mov rsp,rax
ptr[i] = 0xb0 ; i += 1; ptr[i] = 0x2a ; i += 1; // mov al,0x2a
ptr[i] = 0xc3 ; // ret
g.seal();
assert!(!g.b.pages[0].dirty);
assert!(!g.b.pages[1].dirty);
let tmp_rsp = addr.end();
let res = transmute::<usize, extern "sysv64" fn(rsp: usize) -> u8>(addr.start)(tmp_rsp);
assert_eq!(res, 42);
assert!(!g.b.pages[0].dirty);
assert!(g.b.pages[1].dirty);
Ok(())
}
}));
}
for h in ress {
match h.join() {
Ok(v) => v,
Err(_) => return Err(anyhow!("Thread error")),
}?
}
Ok(())
}
#[test]
fn test_state_invisible() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36400000000, size: 0x4000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
let ptr = g.b.addr.slice_mut();
g.mmap_fixed(addr, Protection::RW)?;
ptr[0x0055] = 11;
ptr[0x1055] = 22;
g.mark_invisible(AddressRange { start: 0x36400001000, size: 0x2000 })?;
ptr[0x2055] = 33;
ptr[0x3055] = 44;
g.seal();
ptr[0x0055] = 0x11;
ptr[0x1055] = 0x22;
ptr[0x2055] = 0x33;
ptr[0x3055] = 0x44;
let mut state0 = Vec::new();
g.save_state(&mut state0)?;
// two pages should be in the state
assert!(state0.len() > 0x2000);
assert!(state0.len() < 0x3000);
ptr[0x0055] = 0x55;
ptr[0x1055] = 0x66;
ptr[0x2055] = 0x77;
ptr[0x3055] = 0x88;
g.load_state(&mut state0.as_slice())?;
assert_eq!(ptr[0x0055], 0x11);
// Some current cores require this behavior, where the invisible values are actually left untouched.
// (VB for config settings?)
// In the long term, it might be nice to redefine things so that invisible means invisible and ephemeral,
// and forcibly zero any active invisible page on loadstate.
assert_eq!(ptr[0x1055], 0x66);
assert_eq!(ptr[0x2055], 0x77);
assert_eq!(ptr[0x3055], 0x44);
Ok(())
}
}
#[test]
fn test_dontneed() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36500000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.seal();
let ptr = g.b.addr.slice_mut();
g.mmap_fixed(addr, Protection::RW)?;
for i in 0..addr.size {
ptr[i] = i as u8;
}
let addr2 = AddressRange { start: addr.start + 0x3000, size: 0x5000 };
g.madvise_dontneed(addr2)?;
let ptr2 = addr2.slice_mut();
for i in 0..addr2.size {
assert_eq!(ptr2[i], 0);
}
let mut state0 = Vec::new();
g.save_state(&mut state0)?;
assert!(state0.len() < 0xc000);
Ok(())
}
}
#[test]
fn test_remap_nomove() -> TestResult {
let addr = AddressRange { start: 0x36600000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
g.mmap_fixed(AddressRange { start: addr.start, size: 0x4000 }, Protection::RWX)?;
g.mremap_nomove(AddressRange { start: addr.start, size: 0x4000 }, 0x6000)?;
assert_eq!(g.b.pages[3].status, PageAllocation::Allocated(Protection::RWX));
assert_eq!(g.b.pages[5].status, PageAllocation::Allocated(Protection::RWX));
g.mremap_nomove(AddressRange { start: addr.start, size: 0x6000 }, 0x3000)?;
assert_eq!(g.b.pages[2].status, PageAllocation::Allocated(Protection::RWX));
assert_eq!(g.b.pages[3].status, PageAllocation::Free);
assert_eq!(g.b.pages[5].status, PageAllocation::Free);
Ok(())
}
#[test]
fn test_mmap_move() -> TestResult {
let addr = AddressRange { start: 0x36700000000, size: 0x10000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
let p0 = g.mmap_movable(0x10000, Protection::RW, addr)?;
assert_eq!(p0, 0x36700000000);
g.munmap(AddressRange { start: 0x36700002000, size: 0x2000 })?;
g.munmap(AddressRange { start: 0x3670000a000, size: 0x1000 })?;
let p1: usize = g.mmap_movable(0x1000, Protection::RW, addr)?;
assert_eq!(p1, 0x3670000a000); // fit in smallest hole
Ok(())
}
#[test]
fn test_mremap_move_expand() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36800000000, size: 0x4000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
let ptr = g.b.addr.slice_mut();
let initial_addr = AddressRange { start: 0x36800002000, size: 0x1000 };
g.mmap_fixed(initial_addr, Protection::RW)?;
ptr[0x2004] = 11;
let p1 = g.mremap_maymove(initial_addr, 0x2000, addr)?;
assert_eq!(p1, addr.start);
assert_eq!(ptr[4], 11);
g.mmap_fixed(initial_addr, Protection::RW)?;
assert_eq!(ptr[0x2004], 0);
}
Ok(())
}
#[test]
fn test_mremap_move_shrink() -> TestResult {
unsafe {
let addr = AddressRange { start: 0x36900000000, size: 0x4000 };
let mut b = MemoryBlock::new(addr);
let mut g = b.enter();
let ptr = g.b.addr.slice_mut();
let initial_addr = AddressRange { start: 0x36900001000, size: 0x3000 };
g.mmap_fixed(initial_addr, Protection::RW)?;
ptr[0x1004] = 11;
let p1 = g.mremap_maymove(initial_addr, 0x1000, addr)?;
assert_eq!(p1, addr.start);
assert_eq!(ptr[4], 11);
g.mmap_fixed(initial_addr, Protection::RW)?;
assert_eq!(ptr[0x1004], 0);
}
Ok(())
}

View File

@ -1,5 +1,4 @@
use std::ptr::null_mut;
use super::MemoryBlock;
use std::sync::Mutex;
use crate::*;
@ -21,7 +20,7 @@ struct GlobalData {
pub unsafe fn register(block: *mut MemoryBlock) {
let mut data = GLOBAL_DATA.lock().unwrap();
if !data.initialized {
initialize();
trip_pal::initialize();
data.initialized = true;
}
data.active_blocks.push(MemoryBlockRef(block));
@ -60,81 +59,87 @@ unsafe fn trip(addr: usize) -> TripResult {
}
page.maybe_snapshot(page_start_addr);
page.dirty = true;
assert!(pal::protect(AddressRange { start: page_start_addr, size: PAGESIZE }, page.native_prot()));
TripResult::Handled
if pal::protect(AddressRange { start: page_start_addr, size: PAGESIZE }, page.native_prot()) {
TripResult::Handled
} else {
std::process::abort();
}
}
#[cfg(windows)]
fn initialize() {
mod trip_pal {
use super::*;
use winapi::um::errhandlingapi::*;
use winapi::um::winnt::*;
use winapi::vc::excpt::*;
unsafe extern "system" fn handler(p_info: *mut EXCEPTION_POINTERS) -> i32 {
let p_record = &mut *(*p_info).ExceptionRecord;
let flags = p_record.ExceptionInformation[0];
match p_record.ExceptionCode {
STATUS_ACCESS_VIOLATION if (flags & 1) != 0 => (), // write exception
STATUS_GUARD_PAGE_VIOLATION => (), // guard exception
_ => return EXCEPTION_CONTINUE_SEARCH
}
let fault_address = p_record.ExceptionInformation[1] as usize;
match trip(fault_address) {
TripResult::Handled => EXCEPTION_CONTINUE_EXECUTION,
TripResult::NotHandled => EXCEPTION_CONTINUE_SEARCH,
}
}
unsafe {
let res = AddVectoredExceptionHandler(1 /* CALL_FIRST */, Some(handler));
assert!(res != null_mut(), "AddVectoredExceptionHandler failed");
}
}
#[cfg(unix)]
type SaHandler = unsafe extern fn(i32) -> ();
#[cfg(unix)]
type SaSigaction = unsafe extern fn(i32, *const siginfo_t, *const ucontext_t) -> ();
#[cfg(unix)]
use libc::*;
#[cfg(unix)]
static mut ALTSTACK: [u8; SIGSTKSZ] = [0; SIGSTKSZ];
#[cfg(unix)]
static mut SA_OLD: Option<Box<sigaction>> = None;
#[cfg(unix)]
fn initialize() {
use std::mem::{transmute, zeroed};
unsafe extern fn handler(sig: i32, info: *const siginfo_t, ucontext: *const ucontext_t) {
let fault_address = (*info).si_addr() as usize;
let write = (*ucontext).uc_mcontext.gregs[REG_ERR as usize] & 2 != 0;
let rethrow = !write || match trip(fault_address) {
TripResult::NotHandled => true,
_ => false
};
if rethrow {
if SA_OLD.as_ref().unwrap().sa_flags & SA_SIGINFO != 0 {
transmute::<usize, SaSigaction>(SA_OLD.as_ref().unwrap().sa_sigaction)(sig, info, ucontext);
} else {
transmute::<usize, SaHandler>(SA_OLD.as_ref().unwrap().sa_sigaction)(sig);
pub fn initialize() {
unsafe extern "system" fn handler(p_info: *mut EXCEPTION_POINTERS) -> i32 {
let p_record = &*(*p_info).ExceptionRecord;
let flags = p_record.ExceptionInformation[0];
match p_record.ExceptionCode {
STATUS_ACCESS_VIOLATION if (flags & 1) != 0 => (), // write exception
STATUS_GUARD_PAGE_VIOLATION => (), // guard exception
_ => return EXCEPTION_CONTINUE_SEARCH
}
abort();
let fault_address = p_record.ExceptionInformation[1] as usize;
match trip(fault_address) {
TripResult::Handled => EXCEPTION_CONTINUE_EXECUTION,
TripResult::NotHandled => EXCEPTION_CONTINUE_SEARCH,
}
}
unsafe {
let res = AddVectoredExceptionHandler(1 /* CALL_FIRST */, Some(handler));
assert!(!res.is_null(), "AddVectoredExceptionHandler failed");
}
}
unsafe {
SA_OLD = Some(Box::new(zeroed::<sigaction>()));
let ss = stack_t {
ss_flags: 0,
ss_sp: &mut ALTSTACK[0] as *mut u8 as *mut c_void,
ss_size: SIGSTKSZ
};
assert!(sigaltstack(&ss, null_mut()) == 0, "sigaltstack failed");
let mut sa = sigaction {
sa_mask: zeroed::<sigset_t>(),
sa_sigaction: transmute::<SaSigaction, usize>(handler),
sa_flags: SA_ONSTACK | SA_SIGINFO,
sa_restorer: None,
};
sigfillset(&mut sa.sa_mask);
assert!(sigaction(SIGSEGV, &sa, &mut **SA_OLD.as_mut().unwrap() as *mut sigaction) == 0, "sigaction failed");
}
#[cfg(unix)]
mod trip_pal {
use libc::*;
use super::*;
type SaHandler = unsafe extern fn(i32) -> ();
type SaSigaction = unsafe extern fn(i32, *const siginfo_t, *const ucontext_t) -> ();
static mut SA_OLD: Option<Box<sigaction>> = None;
pub fn initialize() {
use std::mem::{transmute, zeroed};
unsafe extern fn handler(sig: i32, info: *const siginfo_t, ucontext: *const ucontext_t) {
let fault_address = (*info).si_addr() as usize;
let write = (*ucontext).uc_mcontext.gregs[REG_ERR as usize] & 2 != 0;
let rethrow = !write || match trip(fault_address) {
TripResult::NotHandled => true,
_ => false
};
if rethrow {
let sa_old = SA_OLD.as_ref().unwrap();
if sa_old.sa_flags & SA_SIGINFO != 0 {
transmute::<usize, SaSigaction>(sa_old.sa_sigaction)(sig, info, ucontext);
} else {
transmute::<usize, SaHandler>(sa_old.sa_sigaction)(sig);
}
abort();
}
}
unsafe {
SA_OLD = Some(Box::new(zeroed()));
let ss = stack_t {
ss_flags: 0,
ss_sp: Box::into_raw(Box::new(zeroed::<[u8; SIGSTKSZ]>)) as *mut c_void,
ss_size: SIGSTKSZ
};
assert!(sigaltstack(&ss, 0 as *mut stack_t) == 0, "sigaltstack failed");
let mut sa = sigaction {
sa_mask: zeroed(),
sa_sigaction: transmute::<SaSigaction, usize>(handler),
sa_flags: SA_ONSTACK | SA_SIGINFO,
sa_restorer: None,
};
sigfillset(&mut sa.sa_mask);
assert!(sigaction(SIGSEGV, &sa, &mut **SA_OLD.as_mut().unwrap() as *mut sigaction) == 0, "sigaction failed");
}
}
}

View File

@ -2,147 +2,751 @@
// There are various crates that contain these, but they're #[cfg]'ed to the HOST system.
// We want exactly the ones that waterbox guest MUSL uses, exactly the way they're defined there
use std::{ops::Try, fmt};
/// the result of a syscall in Rust-friendly form; OK or errno
pub type SyscallResult = Result<(), i32>;
pub type SyscallResult = Result<(), SyscallError>;
/// map a syscall result as the kernel would return it
pub fn map_syscall_result(result: SyscallResult) -> isize {
pub fn syscall_ret(result: SyscallResult) -> SyscallReturn {
match result {
Ok(()) => 0,
Err(i) => -i as isize,
Ok(()) => SyscallReturn::from_ok(0),
Err(e) => SyscallReturn::from_error(e)
}
}
/// map a syscall result as the kernel would return it
pub fn syscall_ret_val(result: Result<usize, SyscallError>) -> SyscallReturn {
match result {
Ok(v) => SyscallReturn::from_ok(v),
Err(e) => SyscallReturn::from_error(e)
}
}
pub fn syscall_ret_i64(result: Result<i64, SyscallError>) -> SyscallReturn {
match result {
Ok(v) => SyscallReturn::from_ok(v as usize),
Err(e) => SyscallReturn::from_error(e)
}
}
/// map a syscall result as the kernel would return it
pub fn syscall_err(result: SyscallError) -> SyscallReturn {
SyscallReturn::from_error(result)
}
/// map a syscall result as the kernel would return it
pub fn syscall_ok(result: usize) -> SyscallReturn {
SyscallReturn::from_ok(result)
}
#[repr(transparent)]
pub struct SyscallReturn(pub usize);
impl SyscallReturn {
pub const ERROR_THRESH: usize = -4096 as isize as usize;
}
impl Try for SyscallReturn {
type Ok = usize;
type Error = SyscallError;
fn into_result(self) -> Result<Self::Ok, Self::Error> {
if self.0 <= SyscallReturn::ERROR_THRESH {
Ok(self.0)
} else {
Err(SyscallError(-(self.0 as i32)))
}
}
fn from_error(v: Self::Error) -> Self {
SyscallReturn(-v.0 as isize as usize)
}
fn from_ok(v: Self::Ok) -> Self {
assert!(v <= SyscallReturn::ERROR_THRESH);
SyscallReturn(v)
}
}
pub const EPERM: i32 = 1;
pub const ENOENT: i32 = 2;
pub const ESRCH: i32 = 3;
pub const EINTR: i32 = 4;
pub const EIO: i32 = 5;
pub const ENXIO: i32 = 6;
pub const E2BIG: i32 = 7;
pub const ENOEXEC: i32 = 8;
pub const EBADF: i32 = 9;
pub const ECHILD: i32 = 10;
pub const EAGAIN: i32 = 11;
pub const ENOMEM: i32 = 12;
pub const EACCES: i32 = 13;
pub const EFAULT: i32 = 14;
pub const ENOTBLK: i32 = 15;
pub const EBUSY: i32 = 16;
pub const EEXIST: i32 = 17;
pub const EXDEV: i32 = 18;
pub const ENODEV: i32 = 19;
pub const ENOTDIR: i32 = 20;
pub const EISDIR: i32 = 21;
pub const EINVAL: i32 = 22;
pub const ENFILE: i32 = 23;
pub const EMFILE: i32 = 24;
pub const ENOTTY: i32 = 25;
pub const ETXTBSY: i32 = 26;
pub const EFBIG: i32 = 27;
pub const ENOSPC: i32 = 28;
pub const ESPIPE: i32 = 29;
pub const EROFS: i32 = 30;
pub const EMLINK: i32 = 31;
pub const EPIPE: i32 = 32;
pub const EDOM: i32 = 33;
pub const ERANGE: i32 = 34;
pub const EDEADLK: i32 = 35;
pub const ENAMETOOLONG: i32 = 36;
pub const ENOLCK: i32 = 37;
pub const ENOSYS: i32 = 38;
pub const ENOTEMPTY: i32 = 39;
pub const ELOOP: i32 = 40;
pub const EWOULDBLOCK: i32 = EAGAIN;
pub const ENOMSG: i32 = 42;
pub const EIDRM: i32 = 43;
pub const ECHRNG: i32 = 44;
pub const EL2NSYNC: i32 = 45;
pub const EL3HLT: i32 = 46;
pub const EL3RST: i32 = 47;
pub const ELNRNG: i32 = 48;
pub const EUNATCH: i32 = 49;
pub const ENOCSI: i32 = 50;
pub const EL2HLT: i32 = 51;
pub const EBADE: i32 = 52;
pub const EBADR: i32 = 53;
pub const EXFULL: i32 = 54;
pub const ENOANO: i32 = 55;
pub const EBADRQC: i32 = 56;
pub const EBADSLT: i32 = 57;
pub const EDEADLOCK: i32 = EDEADLK;
pub const EBFONT: i32 = 59;
pub const ENOSTR: i32 = 60;
pub const ENODATA: i32 = 61;
pub const ETIME: i32 = 62;
pub const ENOSR: i32 = 63;
pub const ENONET: i32 = 64;
pub const ENOPKG: i32 = 65;
pub const EREMOTE: i32 = 66;
pub const ENOLINK: i32 = 67;
pub const EADV: i32 = 68;
pub const ESRMNT: i32 = 69;
pub const ECOMM: i32 = 70;
pub const EPROTO: i32 = 71;
pub const EMULTIHOP: i32 = 72;
pub const EDOTDOT: i32 = 73;
pub const EBADMSG: i32 = 74;
pub const EOVERFLOW: i32 = 75;
pub const ENOTUNIQ: i32 = 76;
pub const EBADFD: i32 = 77;
pub const EREMCHG: i32 = 78;
pub const ELIBACC: i32 = 79;
pub const ELIBBAD: i32 = 80;
pub const ELIBSCN: i32 = 81;
pub const ELIBMAX: i32 = 82;
pub const ELIBEXEC: i32 = 83;
pub const EILSEQ: i32 = 84;
pub const ERESTART: i32 = 85;
pub const ESTRPIPE: i32 = 86;
pub const EUSERS: i32 = 87;
pub const ENOTSOCK: i32 = 88;
pub const EDESTADDRREQ: i32 = 89;
pub const EMSGSIZE: i32 = 90;
pub const EPROTOTYPE: i32 = 91;
pub const ENOPROTOOPT: i32 = 92;
pub const EPROTONOSUPPORT: i32 = 93;
pub const ESOCKTNOSUPPORT: i32 = 94;
pub const EOPNOTSUPP: i32 = 95;
pub const ENOTSUP: i32 = EOPNOTSUPP;
pub const EPFNOSUPPORT: i32 = 96;
pub const EAFNOSUPPORT: i32 = 97;
pub const EADDRINUSE: i32 = 98;
pub const EADDRNOTAVAIL: i32 = 99;
pub const ENETDOWN: i32 = 100;
pub const ENETUNREACH: i32 = 101;
pub const ENETRESET: i32 = 102;
pub const ECONNABORTED: i32 = 103;
pub const ECONNRESET: i32 = 104;
pub const ENOBUFS: i32 = 105;
pub const EISCONN: i32 = 106;
pub const ENOTCONN: i32 = 107;
pub const ESHUTDOWN: i32 = 108;
pub const ETOOMANYREFS: i32 = 109;
pub const ETIMEDOUT: i32 = 110;
pub const ECONNREFUSED: i32 = 111;
pub const EHOSTDOWN: i32 = 112;
pub const EHOSTUNREACH: i32 = 113;
pub const EALREADY: i32 = 114;
pub const EINPROGRESS: i32 = 115;
pub const ESTALE: i32 = 116;
pub const EUCLEAN: i32 = 117;
pub const ENOTNAM: i32 = 118;
pub const ENAVAIL: i32 = 119;
pub const EISNAM: i32 = 120;
pub const EREMOTEIO: i32 = 121;
pub const EDQUOT: i32 = 122;
pub const ENOMEDIUM: i32 = 123;
pub const EMEDIUMTYPE: i32 = 124;
pub const ECANCELED: i32 = 125;
pub const ENOKEY: i32 = 126;
pub const EKEYEXPIRED: i32 = 127;
pub const EKEYREVOKED: i32 = 128;
pub const EKEYREJECTED: i32 = 129;
pub const EOWNERDEAD: i32 = 130;
pub const ENOTRECOVERABLE: i32 = 131;
pub const ERFKILL: i32 = 132;
pub const EHWPOISON: i32 = 133;
macro_rules! lookup {
($P:ident: $T:ident { $($N:ident = $E:expr; )+ }) => (
$(pub const $N: $T = $T($E);)+
pub fn $P(val: &$T) -> &'static str {
match val {
$($T($E) => stringify!($E),)+
_ => "????"
}
}
);
}
#[derive(Debug, Eq, PartialEq)]
#[repr(transparent)]
pub struct SyscallError(pub i32);
impl From<i32> for SyscallError {
fn from(err: i32) -> SyscallError {
SyscallError(err)
}
}
impl fmt::Display for SyscallError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "errno {}", lookup_errno(self))
}
}
impl std::error::Error for SyscallError {}
lookup! { lookup_errno: SyscallError {
EPERM = 1;
ENOENT = 2;
ESRCH = 3;
EINTR = 4;
EIO = 5;
ENXIO = 6;
E2BIG = 7;
ENOEXEC = 8;
EBADF = 9;
ECHILD = 10;
EAGAIN = 11;
ENOMEM = 12;
EACCES = 13;
EFAULT = 14;
ENOTBLK = 15;
EBUSY = 16;
EEXIST = 17;
EXDEV = 18;
ENODEV = 19;
ENOTDIR = 20;
EISDIR = 21;
EINVAL = 22;
ENFILE = 23;
EMFILE = 24;
ENOTTY = 25;
ETXTBSY = 26;
EFBIG = 27;
ENOSPC = 28;
ESPIPE = 29;
EROFS = 30;
EMLINK = 31;
EPIPE = 32;
EDOM = 33;
ERANGE = 34;
EDEADLK = 35;
ENAMETOOLONG = 36;
ENOLCK = 37;
ENOSYS = 38;
ENOTEMPTY = 39;
ELOOP = 40;
// EWOULDBLOCK = EAGAIN;
ENOMSG = 42;
EIDRM = 43;
ECHRNG = 44;
EL2NSYNC = 45;
EL3HLT = 46;
EL3RST = 47;
ELNRNG = 48;
EUNATCH = 49;
ENOCSI = 50;
EL2HLT = 51;
EBADE = 52;
EBADR = 53;
EXFULL = 54;
ENOANO = 55;
EBADRQC = 56;
EBADSLT = 57;
// EDEADLOCK = EDEADLK;
EBFONT = 59;
ENOSTR = 60;
ENODATA = 61;
ETIME = 62;
ENOSR = 63;
ENONET = 64;
ENOPKG = 65;
EREMOTE = 66;
ENOLINK = 67;
EADV = 68;
ESRMNT = 69;
ECOMM = 70;
EPROTO = 71;
EMULTIHOP = 72;
EDOTDOT = 73;
EBADMSG = 74;
EOVERFLOW = 75;
ENOTUNIQ = 76;
EBADFD = 77;
EREMCHG = 78;
ELIBACC = 79;
ELIBBAD = 80;
ELIBSCN = 81;
ELIBMAX = 82;
ELIBEXEC = 83;
EILSEQ = 84;
ERESTART = 85;
ESTRPIPE = 86;
EUSERS = 87;
ENOTSOCK = 88;
EDESTADDRREQ = 89;
EMSGSIZE = 90;
EPROTOTYPE = 91;
ENOPROTOOPT = 92;
EPROTONOSUPPORT = 93;
ESOCKTNOSUPPORT = 94;
EOPNOTSUPP = 95;
// ENOTSUP = EOPNOTSUPP;
EPFNOSUPPORT = 96;
EAFNOSUPPORT = 97;
EADDRINUSE = 98;
EADDRNOTAVAIL = 99;
ENETDOWN = 100;
ENETUNREACH = 101;
ENETRESET = 102;
ECONNABORTED = 103;
ECONNRESET = 104;
ENOBUFS = 105;
EISCONN = 106;
ENOTCONN = 107;
ESHUTDOWN = 108;
ETOOMANYREFS = 109;
ETIMEDOUT = 110;
ECONNREFUSED = 111;
EHOSTDOWN = 112;
EHOSTUNREACH = 113;
EALREADY = 114;
EINPROGRESS = 115;
ESTALE = 116;
EUCLEAN = 117;
ENOTNAM = 118;
ENAVAIL = 119;
EISNAM = 120;
EREMOTEIO = 121;
EDQUOT = 122;
ENOMEDIUM = 123;
EMEDIUMTYPE = 124;
ECANCELED = 125;
ENOKEY = 126;
EKEYEXPIRED = 127;
EKEYREVOKED = 128;
EKEYREJECTED = 129;
EOWNERDEAD = 130;
ENOTRECOVERABLE = 131;
ERFKILL = 132;
EHWPOISON = 133;
}}
pub const S_IFMT: u32 = 0o0170000;
pub const S_IFDIR: u32 = 0o0040000;
pub const S_IFCHR: u32 = 0o0020000;
pub const S_IFBLK: u32 = 0o0060000;
pub const S_IFREG: u32 = 0o0100000;
pub const S_IFIFO: u32 = 0o0010000;
pub const S_IFLNK: u32 = 0o0120000;
pub const S_IFSOCK: u32 = 0o0140000;
pub const S_ISUID: u32 = 0o04000;
pub const S_ISGID: u32 = 0o02000;
pub const S_ISVTX: u32 = 0o01000;
pub const S_IRUSR: u32 = 0o0400;
pub const S_IWUSR: u32 = 0o0200;
pub const S_IXUSR: u32 = 0o0100;
pub const S_IRWXU: u32 = 0o0700;
pub const S_IRGRP: u32 = 0o0040;
pub const S_IWGRP: u32 = 0o0020;
pub const S_IXGRP: u32 = 0o0010;
pub const S_IRWXG: u32 = 0o0070;
pub const S_IROTH: u32 = 0o0004;
pub const S_IWOTH: u32 = 0o0002;
pub const S_IXOTH: u32 = 0o0001;
pub const S_IRWXO: u32 = 0o0007;
/// Kernel stat object
#[repr(C)]
#[derive(Default)]
pub struct KStat {
pub st_dev: u64,
pub st_ino: u64,
pub st_nlink: u64,
pub st_mode: u32,
pub st_uid: u32,
pub st_gid: u32,
pub __pad0: u32,
pub st_rdev: u64,
pub st_size: i64,
pub st_blksize: i64,
pub st_blocks: i64,
pub st_atime_sec: i64,
pub st_atime_nsec: i64,
pub st_mtime_sec: i64,
pub st_mtime_nsec: i64,
pub st_ctime_sec: i64,
pub st_ctime_nsec: i64,
pub __unused0: i64,
pub __unused1: i64,
pub __unused2: i64,
}
pub const SEEK_SET: i32 = 0;
pub const SEEK_CUR: i32 = 1;
pub const SEEK_END: i32 = 2;
pub const O_ACCMODE: i32 = O_PATH | O_RDONLY | O_WRONLY | O_RDWR;
pub const O_PATH: i32 = 0o010000000;
pub const O_RDONLY: i32 = 0;
pub const O_WRONLY: i32 = 1;
pub const O_RDWR: i32 = 2;
#[repr(C)]
pub struct Iovec {
pub iov_base: usize,
pub iov_len: usize,
}
impl Iovec {
pub unsafe fn slice(&self) -> &[u8] {
std::slice::from_raw_parts(self.iov_base as *const u8, self.iov_len)
}
pub unsafe fn slice_mut(&self) -> &mut [u8] {
std::slice::from_raw_parts_mut(self.iov_base as *mut u8, self.iov_len)
}
}
#[derive(Debug, Eq, PartialEq)]
#[repr(transparent)]
pub struct SyscallNumber(pub usize);
lookup! { lookup_syscall: SyscallNumber {
NR_READ = 0;
NR_WRITE = 1;
NR_OPEN = 2;
NR_CLOSE = 3;
NR_STAT = 4;
NR_FSTAT = 5;
NR_LSTAT = 6;
NR_POLL = 7;
NR_LSEEK = 8;
NR_MMAP = 9;
NR_MPROTECT = 10;
NR_MUNMAP = 11;
NR_BRK = 12;
NR_RT_SIGACTION = 13;
NR_RT_SIGPROCMASK = 14;
NR_RT_SIGRETURN = 15;
NR_IOCTL = 16;
NR_PREAD64 = 17;
NR_PWRITE64 = 18;
NR_READV = 19;
NR_WRITEV = 20;
NR_ACCESS = 21;
NR_PIPE = 22;
NR_SELECT = 23;
NR_SCHED_YIELD = 24;
NR_MREMAP = 25;
NR_MSYNC = 26;
NR_MINCORE = 27;
NR_MADVISE = 28;
NR_SHMGET = 29;
NR_SHMAT = 30;
NR_SHMCTL = 31;
NR_DUP = 32;
NR_DUP2 = 33;
NR_PAUSE = 34;
NR_NANOSLEEP = 35;
NR_GETITIMER = 36;
NR_ALARM = 37;
NR_SETITIMER = 38;
NR_GETPID = 39;
NR_SENDFILE = 40;
NR_SOCKET = 41;
NR_CONNECT = 42;
NR_ACCEPT = 43;
NR_SENDTO = 44;
NR_RECVFROM = 45;
NR_SENDMSG = 46;
NR_RECVMSG = 47;
NR_SHUTDOWN = 48;
NR_BIND = 49;
NR_LISTEN = 50;
NR_GETSOCKNAME = 51;
NR_GETPEERNAME = 52;
NR_SOCKETPAIR = 53;
NR_SETSOCKOPT = 54;
NR_GETSOCKOPT = 55;
NR_CLONE = 56;
NR_FORK = 57;
NR_VFORK = 58;
NR_EXECVE = 59;
NR_EXIT = 60;
NR_WAIT4 = 61;
NR_KILL = 62;
NR_UNAME = 63;
NR_SEMGET = 64;
NR_SEMOP = 65;
NR_SEMCTL = 66;
NR_SHMDT = 67;
NR_MSGGET = 68;
NR_MSGSND = 69;
NR_MSGRCV = 70;
NR_MSGCTL = 71;
NR_FCNTL = 72;
NR_FLOCK = 73;
NR_FSYNC = 74;
NR_FDATASYNC = 75;
NR_TRUNCATE = 76;
NR_FTRUNCATE = 77;
NR_GETDENTS = 78;
NR_GETCWD = 79;
NR_CHDIR = 80;
NR_FCHDIR = 81;
NR_RENAME = 82;
NR_MKDIR = 83;
NR_RMDIR = 84;
NR_CREAT = 85;
NR_LINK = 86;
NR_UNLINK = 87;
NR_SYMLINK = 88;
NR_READLINK = 89;
NR_CHMOD = 90;
NR_FCHMOD = 91;
NR_CHOWN = 92;
NR_FCHOWN = 93;
NR_LCHOWN = 94;
NR_UMASK = 95;
NR_GETTIMEOFDAY = 96;
NR_GETRLIMIT = 97;
NR_GETRUSAGE = 98;
NR_SYSINFO = 99;
NR_TIMES = 100;
NR_PTRACE = 101;
NR_GETUID = 102;
NR_SYSLOG = 103;
NR_GETGID = 104;
NR_SETUID = 105;
NR_SETGID = 106;
NR_GETEUID = 107;
NR_GETEGID = 108;
NR_SETPGID = 109;
NR_GETPPID = 110;
NR_GETPGRP = 111;
NR_SETSID = 112;
NR_SETREUID = 113;
NR_SETREGID = 114;
NR_GETGROUPS = 115;
NR_SETGROUPS = 116;
NR_SETRESUID = 117;
NR_GETRESUID = 118;
NR_SETRESGID = 119;
NR_GETRESGID = 120;
NR_GETPGID = 121;
NR_SETFSUID = 122;
NR_SETFSGID = 123;
NR_GETSID = 124;
NR_CAPGET = 125;
NR_CAPSET = 126;
NR_RT_SIGPENDING = 127;
NR_RT_SIGTIMEDWAIT = 128;
NR_RT_SIGQUEUEINFO = 129;
NR_RT_SIGSUSPEND = 130;
NR_SIGALTSTACK = 131;
NR_UTIME = 132;
NR_MKNOD = 133;
NR_USELIB = 134;
NR_PERSONALITY = 135;
NR_USTAT = 136;
NR_STATFS = 137;
NR_FSTATFS = 138;
NR_SYSFS = 139;
NR_GETPRIORITY = 140;
NR_SETPRIORITY = 141;
NR_SCHED_SETPARAM = 142;
NR_SCHED_GETPARAM = 143;
NR_SCHED_SETSCHEDULER = 144;
NR_SCHED_GETSCHEDULER = 145;
NR_SCHED_GET_PRIORITY_MAX = 146;
NR_SCHED_GET_PRIORITY_MIN = 147;
NR_SCHED_RR_GET_INTERVAL = 148;
NR_MLOCK = 149;
NR_MUNLOCK = 150;
NR_MLOCKALL = 151;
NR_MUNLOCKALL = 152;
NR_VHANGUP = 153;
NR_MODIFY_LDT = 154;
NR_PIVOT_ROOT = 155;
NR__SYSCTL = 156;
NR_PRCTL = 157;
NR_ARCH_PRCTL = 158;
NR_ADJTIMEX = 159;
NR_SETRLIMIT = 160;
NR_CHROOT = 161;
NR_SYNC = 162;
NR_ACCT = 163;
NR_SETTIMEOFDAY = 164;
NR_MOUNT = 165;
NR_UMOUNT2 = 166;
NR_SWAPON = 167;
NR_SWAPOFF = 168;
NR_REBOOT = 169;
NR_SETHOSTNAME = 170;
NR_SETDOMAINNAME = 171;
NR_IOPL = 172;
NR_IOPERM = 173;
NR_CREATE_MODULE = 174;
NR_INIT_MODULE = 175;
NR_DELETE_MODULE = 176;
NR_GET_KERNEL_SYMS = 177;
NR_QUERY_MODULE = 178;
NR_QUOTACTL = 179;
NR_NFSSERVCTL = 180;
NR_GETPMSG = 181;
NR_PUTPMSG = 182;
NR_AFS_SYSCALL = 183;
NR_TUXCALL = 184;
NR_SECURITY = 185;
NR_GETTID = 186;
NR_READAHEAD = 187;
NR_SETXATTR = 188;
NR_LSETXATTR = 189;
NR_FSETXATTR = 190;
NR_GETXATTR = 191;
NR_LGETXATTR = 192;
NR_FGETXATTR = 193;
NR_LISTXATTR = 194;
NR_LLISTXATTR = 195;
NR_FLISTXATTR = 196;
NR_REMOVEXATTR = 197;
NR_LREMOVEXATTR = 198;
NR_FREMOVEXATTR = 199;
NR_TKILL = 200;
NR_TIME = 201;
NR_FUTEX = 202;
NR_SCHED_SETAFFINITY = 203;
NR_SCHED_GETAFFINITY = 204;
NR_SET_THREAD_AREA = 205;
NR_IO_SETUP = 206;
NR_IO_DESTROY = 207;
NR_IO_GETEVENTS = 208;
NR_IO_SUBMIT = 209;
NR_IO_CANCEL = 210;
NR_GET_THREAD_AREA = 211;
NR_LOOKUP_DCOOKIE = 212;
NR_EPOLL_CREATE = 213;
NR_EPOLL_CTL_OLD = 214;
NR_EPOLL_WAIT_OLD = 215;
NR_REMAP_FILE_PAGES = 216;
NR_GETDENTS64 = 217;
NR_SET_TID_ADDRESS = 218;
NR_RESTART_SYSCALL = 219;
NR_SEMTIMEDOP = 220;
NR_FADVISE64 = 221;
NR_TIMER_CREATE = 222;
NR_TIMER_SETTIME = 223;
NR_TIMER_GETTIME = 224;
NR_TIMER_GETOVERRUN = 225;
NR_TIMER_DELETE = 226;
NR_CLOCK_SETTIME = 227;
NR_CLOCK_GETTIME = 228;
NR_CLOCK_GETRES = 229;
NR_CLOCK_NANOSLEEP = 230;
NR_EXIT_GROUP = 231;
NR_EPOLL_WAIT = 232;
NR_EPOLL_CTL = 233;
NR_TGKILL = 234;
NR_UTIMES = 235;
NR_VSERVER = 236;
NR_MBIND = 237;
NR_SET_MEMPOLICY = 238;
NR_GET_MEMPOLICY = 239;
NR_MQ_OPEN = 240;
NR_MQ_UNLINK = 241;
NR_MQ_TIMEDSEND = 242;
NR_MQ_TIMEDRECEIVE = 243;
NR_MQ_NOTIFY = 244;
NR_MQ_GETSETATTR = 245;
NR_KEXEC_LOAD = 246;
NR_WAITID = 247;
NR_ADD_KEY = 248;
NR_REQUEST_KEY = 249;
NR_KEYCTL = 250;
NR_IOPRIO_SET = 251;
NR_IOPRIO_GET = 252;
NR_INOTIFY_INIT = 253;
NR_INOTIFY_ADD_WATCH = 254;
NR_INOTIFY_RM_WATCH = 255;
NR_MIGRATE_PAGES = 256;
NR_OPENAT = 257;
NR_MKDIRAT = 258;
NR_MKNODAT = 259;
NR_FCHOWNAT = 260;
NR_FUTIMESAT = 261;
NR_NEWFSTATAT = 262;
NR_UNLINKAT = 263;
NR_RENAMEAT = 264;
NR_LINKAT = 265;
NR_SYMLINKAT = 266;
NR_READLINKAT = 267;
NR_FCHMODAT = 268;
NR_FACCESSAT = 269;
NR_PSELECT6 = 270;
NR_PPOLL = 271;
NR_UNSHARE = 272;
NR_SET_ROBUST_LIST = 273;
NR_GET_ROBUST_LIST = 274;
NR_SPLICE = 275;
NR_TEE = 276;
NR_SYNC_FILE_RANGE = 277;
NR_VMSPLICE = 278;
NR_MOVE_PAGES = 279;
NR_UTIMENSAT = 280;
NR_EPOLL_PWAIT = 281;
NR_SIGNALFD = 282;
NR_TIMERFD_CREATE = 283;
NR_EVENTFD = 284;
NR_FALLOCATE = 285;
NR_TIMERFD_SETTIME = 286;
NR_TIMERFD_GETTIME = 287;
NR_ACCEPT4 = 288;
NR_SIGNALFD4 = 289;
NR_EVENTFD2 = 290;
NR_EPOLL_CREATE1 = 291;
NR_DUP3 = 292;
NR_PIPE2 = 293;
NR_INOTIFY_INIT1 = 294;
NR_PREADV = 295;
NR_PWRITEV = 296;
NR_RT_TGSIGQUEUEINFO = 297;
NR_PERF_EVENT_OPEN = 298;
NR_RECVMMSG = 299;
NR_FANOTIFY_INIT = 300;
NR_FANOTIFY_MARK = 301;
NR_PRLIMIT64 = 302;
NR_NAME_TO_HANDLE_AT = 303;
NR_OPEN_BY_HANDLE_AT = 304;
NR_CLOCK_ADJTIME = 305;
NR_SYNCFS = 306;
NR_SENDMMSG = 307;
NR_SETNS = 308;
NR_GETCPU = 309;
NR_PROCESS_VM_READV = 310;
NR_PROCESS_VM_WRITEV = 311;
NR_KCMP = 312;
NR_FINIT_MODULE = 313;
NR_SCHED_SETATTR = 314;
NR_SCHED_GETATTR = 315;
NR_RENAMEAT2 = 316;
NR_SECCOMP = 317;
NR_GETRANDOM = 318;
NR_MEMFD_CREATE = 319;
NR_KEXEC_FILE_LOAD = 320;
NR_BPF = 321;
NR_EXECVEAT = 322;
NR_USERFAULTFD = 323;
NR_MEMBARRIER = 324;
NR_MLOCK2 = 325;
NR_COPY_FILE_RANGE = 326;
NR_PREADV2 = 327;
NR_PWRITEV2 = 328;
NR_PKEY_MPROTECT = 329;
NR_PKEY_ALLOC = 330;
NR_PKEY_FREE = 331;
NR_STATX = 332;
NR_IO_PGETEVENTS = 333;
NR_RSEQ = 334;
NR_PIDFD_SEND_SIGNAL = 424;
NR_IO_URING_SETUP = 425;
NR_IO_URING_ENTER = 426;
NR_IO_URING_REGISTER = 427;
NR_OPEN_TREE = 428;
NR_MOVE_MOUNT = 429;
NR_FSOPEN = 430;
NR_FSCONFIG = 431;
NR_FSMOUNT = 432;
NR_FSPICK = 433;
NR_PIDFD_OPEN = 434;
NR_CLONE3 = 435;
}}
pub const MAP_FAILED: usize = 0xffffffffffffffff;
pub const MAP_SHARED: usize = 0x01;
pub const MAP_PRIVATE: usize = 0x02;
pub const MAP_SHARED_VALIDATE: usize = 0x03;
pub const MAP_TYPE: usize = 0x0f;
pub const MAP_FIXED: usize = 0x10;
pub const MAP_ANON: usize = 0x20;
pub const MAP_32BIT: usize = 0x40;
pub const MAP_ANONYMOUS: usize = MAP_ANON;
pub const MAP_NORESERVE: usize = 0x4000;
pub const MAP_GROWSDOWN: usize = 0x0100;
pub const MAP_DENYWRITE: usize = 0x0800;
pub const MAP_EXECUTABLE: usize = 0x1000;
pub const MAP_LOCKED: usize = 0x2000;
pub const MAP_POPULATE: usize = 0x8000;
pub const MAP_NONBLOCK: usize = 0x10000;
pub const MAP_STACK: usize = 0x20000;
pub const MAP_HUGETLB: usize = 0x40000;
pub const MAP_SYNC: usize = 0x80000;
pub const MAP_FIXED_NOREPLACE: usize = 0x100000;
pub const MAP_FILE: usize = 0;
pub const MAP_HUGE_SHIFT: usize = 26;
pub const MAP_HUGE_MASK: usize = 0x3f;
pub const MAP_HUGE_64KB: usize = 16 << 26;
pub const MAP_HUGE_512KB: usize = 19 << 26;
pub const MAP_HUGE_1MB: usize = 20 << 26;
pub const MAP_HUGE_2MB: usize = 21 << 26;
pub const MAP_HUGE_8MB: usize = 23 << 26;
pub const MAP_HUGE_16MB: usize = 24 << 26;
pub const MAP_HUGE_32MB: usize = 25 << 26;
pub const MAP_HUGE_256MB: usize = 28 << 26;
pub const MAP_HUGE_512MB: usize = 29 << 26;
pub const MAP_HUGE_1GB: usize = 30 << 26;
pub const MAP_HUGE_2GB: usize = 31 << 26;
pub const MAP_HUGE_16GB: usize = 34 << 26;
pub const PROT_NONE: usize = 0;
pub const PROT_READ: usize = 1;
pub const PROT_WRITE: usize = 2;
pub const PROT_EXEC: usize = 4;
pub const PROT_GROWSDOWN: usize = 0x01000000;
pub const PROT_GROWSUP: usize = 0x02000000;
pub const MS_ASYNC: usize = 1;
pub const MS_INVALIDATE: usize = 2;
pub const MS_SYNC: usize = 4;
pub const MCL_CURRENT: usize = 1;
pub const MCL_FUTURE: usize = 2;
pub const MCL_ONFAULT: usize = 4;
pub const POSIX_MADV_NORMAL: usize = 0;
pub const POSIX_MADV_RANDOM: usize = 1;
pub const POSIX_MADV_SEQUENTIAL: usize = 2;
pub const POSIX_MADV_WILLNEED: usize = 3;
pub const POSIX_MADV_DONTNEED: usize = 4;
pub const MADV_NORMAL: usize = 0;
pub const MADV_RANDOM: usize = 1;
pub const MADV_SEQUENTIAL: usize = 2;
pub const MADV_WILLNEED: usize = 3;
pub const MADV_DONTNEED: usize = 4;
pub const MADV_FREE: usize = 8;
pub const MADV_REMOVE: usize = 9;
pub const MADV_DONTFORK: usize = 10;
pub const MADV_DOFORK: usize = 11;
pub const MADV_MERGEABLE: usize = 12;
pub const MADV_UNMERGEABLE: usize = 13;
pub const MADV_HUGEPAGE: usize = 14;
pub const MADV_NOHUGEPAGE: usize = 15;
pub const MADV_DONTDUMP: usize = 16;
pub const MADV_DODUMP: usize = 17;
pub const MADV_WIPEONFORK: usize = 18;
pub const MADV_KEEPONFORK: usize = 19;
pub const MADV_COLD: usize = 20;
pub const MADV_PAGEOUT: usize = 21;
pub const MADV_HWPOISON: usize = 100;
pub const MADV_SOFT_OFFLINE: usize = 101;
pub const MREMAP_MAYMOVE: usize = 1;
pub const MREMAP_FIXED: usize = 2;
pub const MLOCK_ONFAULT: usize = 0x01;
pub const MFD_CLOEXEC: usize = 0x0001;
pub const MFD_ALLOW_SEALING: usize = 0x0002;
pub const MFD_HUGETLB: usize = 0x0004;
#[repr(C)]
pub struct TimeSpec {
pub tv_sec: i64,
pub tv_nsec: i64,
}