From 7f9fe3b5277f3ad40b99a1b31188e3bec0a4535d Mon Sep 17 00:00:00 2001 From: Joshua Drake Date: Fri, 8 Oct 2010 16:01:37 +0000 Subject: [PATCH] bring metasm to tip git-svn-id: file:///home/svn/framework3/trunk@10600 4d416f70-5f16-0410-b530-b9f4589650da --- lib/metasm/metasm.rb | 12 +- lib/metasm/metasm/decompile.rb | 5 +- lib/metasm/metasm/disassemble.rb | 9 +- lib/metasm/metasm/disassemble_api.rb | 15 +- lib/metasm/metasm/dynldr.rb | 97 +- lib/metasm/metasm/exe_format/coff.rb | 3 + lib/metasm/metasm/exe_format/coff_decode.rb | 2 +- lib/metasm/metasm/exe_format/coff_encode.rb | 6 +- lib/metasm/metasm/exe_format/elf.rb | 5 +- lib/metasm/metasm/exe_format/elf_decode.rb | 13 +- lib/metasm/metasm/exe_format/main.rb | 1 + lib/metasm/metasm/exe_format/pe.rb | 9 +- lib/metasm/metasm/gui/dasm_graph.rb | 141 +- lib/metasm/metasm/gui/dasm_main.rb | 3 + lib/metasm/metasm/gui/gtk.rb | 1 + lib/metasm/metasm/ia32/compile_c.rb | 2 +- lib/metasm/metasm/ia32/encode.rb | 1 + lib/metasm/metasm/ia32/opcodes.rb | 1 + lib/metasm/metasm/ia32/parse.rb | 4 +- lib/metasm/metasm/main.rb | 199 +- lib/metasm/metasm/os/main.rb | 16 +- lib/metasm/metasm/os/windows.rb | 69 +- lib/metasm/metasm/parse.rb | 28 +- lib/metasm/metasm/parse_c.rb | 44 +- lib/metasm/metasm/preprocessor.rb | 143 +- lib/metasm/metasm/render.rb | 40 +- lib/metasm/metasm/x86_64/opcodes.rb | 1 + lib/metasm/misc/bottleneck.rb | 50 +- lib/metasm/samples/dynamic_ruby.rb | 1972 +++++++++++++++---- lib/metasm/samples/lindebug.rb | 8 +- lib/metasm/tests/dynldr.rb | 11 + 31 files changed, 2254 insertions(+), 657 deletions(-) diff --git a/lib/metasm/metasm.rb b/lib/metasm/metasm.rb index 36e71f077d..0229d8171c 100644 --- a/lib/metasm/metasm.rb +++ b/lib/metasm/metasm.rb @@ -47,8 +47,7 @@ module Metasm 'X86_64' => 'x86_64', 'Sh4' => 'sh4', 'Dalvik' => 'dalvik', 'C' => ['parse_c', 'compile_c'], 'MZ' => 'exe_format/mz', 'PE' => 'exe_format/pe', - 'ELF' => ['exe_format/elf_encode', 'exe_format/elf_decode'], - 'COFF' => ['exe_format/coff_encode', 'exe_format/coff_decode'], + 'ELF' => 'exe_format/elf', 'COFF' => 'exe_format/coff', 'Shellcode' => 'exe_format/shellcode', 'AutoExe' => 'exe_format/autoexe', 'AOut' => 'exe_format/a_out', 'MachO' => 'exe_format/macho', 'DEX' => 'exe_format/dex', @@ -76,7 +75,6 @@ def self.autorequire_const_missing(c) const_get c end - def self.require(f) # temporarily put the current file directory in the ruby include path if not $:.include? Metasmdir @@ -121,6 +119,10 @@ require 'metasm/os/main' # remove an 1.9 warning, couldn't find a compatible way... if {}.respond_to? :key - puts "using ruby1.9 workaround for Hash.index" if $DEBUG - class Hash ; alias index key end + puts "using ruby1.9 workaround for Hash#index warning" if $DEBUG + class Hash + alias index_premetasm index rescue nil + undef index rescue nil + alias index key + end end diff --git a/lib/metasm/metasm/decompile.rb b/lib/metasm/metasm/decompile.rb index af6922add2..8d9c1f4660 100644 --- a/lib/metasm/metasm/decompile.rb +++ b/lib/metasm/metasm/decompile.rb @@ -204,6 +204,9 @@ class Decompiler def new_global_var(addr, type, scope=nil) addr = @dasm.normalize(addr) + # (almost) NULL ptr + return if addr.kind_of? Fixnum and addr >= 0 and addr < 32 + # check preceding structure we're hitting # TODO check what we step over when defining a new static struct 0x100.times { |i_| @@ -485,7 +488,7 @@ class Decompiler when C::Goto if jumpto[s.target] r = jumpto[s.target].dup - r.value = C::CExpression[r.value.reduce(@c_parser)] if r.kind_of? C::Return and r.value # deep_dup + r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression r end when C::Return diff --git a/lib/metasm/metasm/disassemble.rb b/lib/metasm/metasm/disassemble.rb index 6a6bf1e982..eb5aff1831 100644 --- a/lib/metasm/metasm/disassemble.rb +++ b/lib/metasm/metasm/disassemble.rb @@ -657,6 +657,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace split_block(di.block, di.address) if not di.block_head? # this updates di.block di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default bf = di.block + elsif di == true + bf = @function[addr] end elsif bf = @function[addr] detect_function_thunk_noreturn(from) if bf.noreturn @@ -1943,20 +1945,23 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra vals = [] edata.ptr = off dups = dumplen/elemlen + elemsym = "u#{elemlen*8}".to_sym while edata.ptr < edata.data.length - if vals.length > dups and vals.uniq.length > 1 + if vals.length > dups and vals.last != vals.first + # we have a dup(), unread the last element which is different vals.pop addr = Expression[addr, :-, elemlen].reduce edata.ptr -= elemlen break end break if vals.length == dups and vals.uniq.length > 1 - vals << edata.decode_imm("u#{elemlen*8}".to_sym, @cpu.endianness) + vals << edata.decode_imm(elemsym, @cpu.endianness) addr += elemlen if i = (1-elemlen..0).find { |i_| t = addr + i_ @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_] } + # i < 0 edata.ptr += i addr += i break diff --git a/lib/metasm/metasm/disassemble_api.rb b/lib/metasm/metasm/disassemble_api.rb index 8425f7ec89..78c3b0556a 100644 --- a/lib/metasm/metasm/disassemble_api.rb +++ b/lib/metasm/metasm/disassemble_api.rb @@ -700,18 +700,11 @@ class Disassembler found = [] @sections.each { |sec_addr, e| - chunkoff = 0 - while chunkoff < e.data.length - chunk = e.data[chunkoff, chunksz+margin].to_str - off = 0 - while match_off = (chunk[off..-1] =~ pat) - break if off+match_off >= chunksz - match_addr = sec_addr + chunkoff + off + match_off + e.pattern_scan(pat, chunksz, margin) { |eo| + match_addr = sec_addr + eo found << match_addr if not block_given? or yield(match_addr) - off += match_off + 1 - end - chunkoff += chunksz - end + false + } } found end diff --git a/lib/metasm/metasm/dynldr.rb b/lib/metasm/metasm/dynldr.rb index 4e86e37fa5..15c3a03f32 100644 --- a/lib/metasm/metasm/dynldr.rb +++ b/lib/metasm/metasm/dynldr.rb @@ -52,7 +52,12 @@ extern VALUE *rb_eArgError __attribute__((import)); #define Qtrue ((VALUE)2) #define Qnil ((VALUE)4) -#if #{RUBY_VERSION >= '1.9' ? 1 : 0} +// allows generating a ruby1.9 dynldr.so from ruby1.8 +#ifndef DYNLDR_RUBY_19 +#define DYNLDR_RUBY_19 #{RUBY_VERSION >= '1.9' ? 1 : 0} +#endif + +#if DYNLDR_RUBY_19 #define T_STRING 0x05 #define T_ARRAY 0x07 #define T_FIXNUM 0x15 @@ -78,7 +83,7 @@ extern VALUE *rb_eArgError __attribute__((import)); VALUE rb_uint2inum(VALUE); VALUE rb_ull2inum(unsigned long long); VALUE rb_num2ulong(VALUE); -VALUE rb_str_new(const char* ptr, unsigned long len); // alloc + memcpy + 0term +VALUE rb_str_new(const char* ptr, long len); // alloc + memcpy + 0term VALUE rb_ary_new2(int len); VALUE rb_float_new(double); @@ -128,7 +133,7 @@ static VALUE dynldr; static VALUE memory_read(VALUE self, VALUE addr, VALUE len) { - return rb_str_new((char*)VAL2INT(addr), (unsigned long)VAL2INT(len)); + return rb_str_new((char*)VAL2INT(addr), (long)VAL2INT(len)); } static VALUE memory_read_int(VALUE self, VALUE addr) @@ -162,18 +167,34 @@ static VALUE str_ptr(VALUE self, VALUE str) return INT2VAL((uintptr_t)STR_PTR(str)); } +// return the VALUE of an object (different of .object_id for Symbols, maybe others) +static VALUE rb_obj_to_value(VALUE self, VALUE obj) +{ + return INT2VAL((uintptr_t)obj); +} + +// return the ruby object at VALUE +// USE WITH CAUTION, passing invalid values will segfault the interpreter/GC +static VALUE rb_value_to_obj(VALUE self, VALUE val) +{ + return VAL2INT(val); +} + // load a symbol from a lib byname, byordinal if integral static VALUE sym_addr(VALUE self, VALUE lib, VALUE func) { uintptr_t h, p; - if (TYPE(lib) != T_STRING) + if (TYPE(lib) == T_STRING) + h = os_load_lib(STR_PTR(lib)); + else if (TYPE(lib) == T_FIXNUM) + h = VAL2INT(lib); + else rb_raise(*rb_eArgError, "Invalid lib"); + if (TYPE(func) != T_STRING && TYPE(func) != T_FIXNUM) rb_raise(*rb_eArgError, "Invalid func"); - h = os_load_lib(STR_PTR(lib)); - if (TYPE(func) == T_FIXNUM) p = os_load_sym_ord(h, VAL2INT(func)); else @@ -322,6 +343,8 @@ int Init_dynldr(void) __attribute__((export_as(Init_))) // t rb_define_singleton_method(dynldr, "memory_write", memory_write, 2); rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2); rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1); + rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1); + rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1); rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2); rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3); rb_define_const(dynldr, "CALLBACK_TARGET", INT2VAL((VALUE)&callback_handler)); @@ -462,13 +485,9 @@ do_invoke_fastcall: add eax, 8 mov [ebp+16], eax - mov eax,[ebp+12] - test eax, eax - jz _do_invoke_call - dec eax - test eax, eax - jz _do_invoke_call - dec eax + mov eax, [ebp+12] + sub eax, 2 + jb _do_invoke_call jmp _do_invoke_copy do_invoke: @@ -583,7 +602,7 @@ EOS def self.compile_binary_module_hack(bin) # this is a hack # we need the module to use ruby symbols - # but we don't know the actual lib filename (depends on ruby version, + # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...) case bin.class.name.gsub(/.*::/, '') when 'ELF' @@ -626,7 +645,7 @@ EOS bin.arch_encode_thunk(text, i) # encode a jmp [importtable] end - # update to the offset table + # update the offset table asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table" } # dont forget the final 0 @@ -701,13 +720,12 @@ EOS # parse a C string into the @cp parser, create it if needed def self.parse_c(src) - @cp ||= C::Parser.new(host_exe.new(host_cpu)) - @cp.parse(src) + cp.parse(src) end # compile a C fragment into a Shellcode, honors the host ABI def self.compile_c(src) - # XXX could we reuse @cp ? (for its macros etc) + # XXX could we reuse self.cp ? (for its macros etc) cp = C::Parser.new(host_exe.new(host_cpu)) cp.parse(src) sc = Shellcode.new(host_cpu) @@ -733,9 +751,9 @@ EOS proto += "\n;" # allow 'int foo()' and '#include ' parse_c(proto) - @cp.toplevel.symbol.dup.each_value { |v| + cp.toplevel.symbol.dup.each_value { |v| next if not v.kind_of? C::Variable # enums - @cp.toplevel.symbol.delete v.name + cp.toplevel.symbol.delete v.name lib = fromlib || lib_from_sym(v.name) addr = sym_addr(lib, v.name) if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff @@ -756,7 +774,7 @@ EOS } # constant definition from macro/enum - @cp.numeric_constants.each { |k, v| + cp.numeric_constants.each { |k, v| n = k.upcase n = "C#{n}" if n !~ /^[A-Z]/ const_set(n, v) if v.kind_of? Integer and not constants.map { |c| c.to_s }.include?(n) @@ -773,7 +791,7 @@ EOS flags = 0 flags |= 1 if proto.has_attribute('stdcall') flags |= 2 if proto.has_attribute('fastcall') - flags |= 4 if proto.type.type.integral? and @cp.sizeof(nil, proto.type.type) == 8 + flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8 flags |= 8 if proto.type.type.float? class << self ; self ; end.send(:define_method, name) { |*a| raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.length}" if a.length != proto.type.args.length and not proto.type.varargs @@ -792,10 +810,10 @@ EOS when String; str_ptr(val) when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] ||= []) << cb ; cb # TODO when Hash, Array; if formal.type.pointed.kind_of? C::Struct; yadda yadda ; end - else val.to_i + else val.to_i rescue 0 # NaN, Infinity, etc end - if opts[:expand_i64] and formal and formal.type.integral? and @cp.sizeof(formal) == 8 and host_cpu.size == 32 + if opts[:expand_i64] and formal and formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32 val = [val & 0xffff_ffff, (val >> 32) & 0xffff_ffff] val.reverse! if host_cpu.endianness != :little end @@ -822,7 +840,7 @@ EOS # C raw cb arg -> ruby object def self.convert_arg_c2rb(formal, rawargs) val = rawargs.shift - if formal.type.integral? and @cp.sizeof(formal) == 64 and host_cpu.size == 32 + if formal.type.integral? and cp.sizeof(formal) == 64 and host_cpu.size == 32 if host.cpu.endianness == :little val |= rawargs.shift << 32 else @@ -841,7 +859,7 @@ EOS ret end - def self.cp; @cp ||= nil ; end + def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end def self.cp=(c); @cp = c ; end # allocate a callback for a given C prototype (string) @@ -849,10 +867,10 @@ EOS def self.callback_alloc_c(proto, &b) proto += ';' # allow 'int foo()' parse_c(proto) - v = @cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first - if (v and v.initializer) or @cp.toplevel.statements.find { |st| st.kind_of? C::Asm } - @cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } - @cp.toplevel.symbol.delete v.name if v + v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first + if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm } + cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } + cp.toplevel.symbol.delete v.name if v sc = compile_c(proto) ptr = memory_alloc(sc.encoded.length) sc.base_addr = ptr @@ -863,7 +881,7 @@ EOS elsif not v raise 'empty prototype' else - @cp.toplevel.symbol.delete v.name + cp.toplevel.symbol.delete v.name callback_alloc_cobj(v, b) end end @@ -878,8 +896,8 @@ EOS cb[:id] = id cb[:proc] = b cb[:proto] = proto - cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [@cp.sizeof(a), @cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') - cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [@cp.sizeof(a), @cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall + cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') + cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall @@callback_table[id] = cb id end @@ -914,19 +932,21 @@ EOS # compile a bunch of C functions, defines methods in this module to call them # returns the raw pointer to the code page - # if given a block, run the block and then undefine all the C functions + # if given a block, run the block and then undefine all the C functions & free memory def self.new_func_c(src) sc = compile_c(src) ptr = memory_alloc(sc.encoded.length) sc.base_addr = ptr - # TODO fixup external calls - this will need OS ABI compat (eg win64) + bd = sc.encoded.binding(ptr) + sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } + sc.encoded.fixup(bd) memory_write ptr, sc.encode_string memory_perm ptr, sc.encoded.length, 'rwx' parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way... defs = [] - @cp.toplevel.symbol.dup.each_value { |v| + cp.toplevel.symbol.dup.each_value { |v| next if not v.kind_of? C::Variable - @cp.toplevel.symbol.delete v.name + cp.toplevel.symbol.delete v.name next if not v.type.kind_of? C::Function or not v.initializer next if not off = sc.encoded.export[v.name] new_caller_for(v, v.name, ptr+off) @@ -1116,6 +1136,9 @@ EOS # on PaX-enabled systems, this may need a non-mprotect-restricted ruby interpreter def self.memory_perm(addr, len, perm) perm = perm.to_s.downcase + len += (addr & 0xfff) + 0xfff + len &= ~0xfff + addr &= ~0xfff p = 0 p |= PROT_READ if perm.include? 'r' p |= PROT_WRITE if perm.include? 'w' diff --git a/lib/metasm/metasm/exe_format/coff.rb b/lib/metasm/metasm/exe_format/coff.rb index e37f57a31d..57e4c184c6 100644 --- a/lib/metasm/metasm/exe_format/coff.rb +++ b/lib/metasm/metasm/exe_format/coff.rb @@ -409,3 +409,6 @@ class COFFArchive < ExeFormat end end end + +require 'metasm/exe_format/coff_encode' +require 'metasm/exe_format/coff_decode' diff --git a/lib/metasm/metasm/exe_format/coff_decode.rb b/lib/metasm/metasm/exe_format/coff_decode.rb index 764930e587..fc430ce991 100644 --- a/lib/metasm/metasm/exe_format/coff_decode.rb +++ b/lib/metasm/metasm/exe_format/coff_decode.rb @@ -76,7 +76,7 @@ class COFF if coff.sect_at_rva(@func_p) @exports = [] addrs = [] - @num_exports.times { |i| addrs << coff.decode_word } + @num_exports.times { addrs << coff.decode_word } @num_exports.times { |i| e = Export.new e.ordinal = i + @ordinal_base diff --git a/lib/metasm/metasm/exe_format/coff_encode.rb b/lib/metasm/metasm/exe_format/coff_encode.rb index f5e6eead61..42d232d71f 100644 --- a/lib/metasm/metasm/exe_format/coff_encode.rb +++ b/lib/metasm/metasm/exe_format/coff_encode.rb @@ -439,11 +439,11 @@ class COFF # encodes a thunk to imported function def arch_encode_thunk(edata, import) - case @cpu - when Ia32 + case @cpu.shortname + when 'ia32', 'x64' shellcode = lambda { |c| Shellcode.new(@cpu).share_namespace(self).assemble(c).encoded } if @cpu.generate_PIC - if @cpu.size == 64 + if @cpu.shortname == 'x64' edata << shellcode["#{import.thunk}: jmp [rip-$_+#{import.target}]"] return end diff --git a/lib/metasm/metasm/exe_format/elf.rb b/lib/metasm/metasm/exe_format/elf.rb index a5c29e06e6..0dc6d6934d 100644 --- a/lib/metasm/metasm/exe_format/elf.rb +++ b/lib/metasm/metasm/exe_format/elf.rb @@ -736,6 +736,9 @@ class FatELF < ExeFormat end end +require 'metasm/exe_format/elf_encode' +require 'metasm/exe_format/elf_decode' + # TODO symbol version info __END__ /* @@ -906,5 +909,3 @@ typedef struct { #define SYMINFO_CURRENT 1 #define SYMINFO_NUM 2 - P - diff --git a/lib/metasm/metasm/exe_format/elf_decode.rb b/lib/metasm/metasm/exe_format/elf_decode.rb index 9a400553cb..ff090cde16 100644 --- a/lib/metasm/metasm/exe_format/elf_decode.rb +++ b/lib/metasm/metasm/exe_format/elf_decode.rb @@ -860,8 +860,13 @@ class ELF def init_disassembler d = super() d.backtrace_maxblocks_data = 4 - case @cpu - when Ia32 + if d.get_section_at(0) + # fixes call [constructor] => 0 + d.decoded[0] = true + d.function[0] = @cpu.disassembler_default_func + end + case @cpu.shortname + when 'ia32', 'x64' old_cp = d.c_parser d.c_parser = nil d.parse_c < context) # TODO hook on (non)resolution of :w xref def get_xrefs_x(dasm, di) - if @cpu.kind_of? Ia32 and a = di.instruction.args.first and a.kind_of? Ia32::ModRM and a.seg and a.seg.val == 4 and + if @cpu.shortname =~ /ia32|x64/ and a = di.instruction.args.first and a.kind_of? Ia32::ModRM and a.seg and a.seg.val == 4 and w = get_xrefs_rw(dasm, di).find { |type, ptr, len| type == :w and ptr.externals.include? 'segment_base_fs' } and dasm.backtrace(Expression[w[1], :-, 'segment_base_fs'], di.address) == [Expression[0]] sehptr = w[1] @@ -239,8 +238,8 @@ EOS def init_disassembler d = super() d.backtrace_maxblocks_data = 4 - case @cpu - when Ia32 + case @cpu.shortname + when 'ia32', 'x64' old_cp = d.c_parser d.c_parser = nil d.parse_c '__stdcall void *GetProcAddress(int, char *);' diff --git a/lib/metasm/metasm/gui/dasm_graph.rb b/lib/metasm/metasm/gui/dasm_graph.rb index 332a9d12a0..281eaa0d1a 100644 --- a/lib/metasm/metasm/gui/dasm_graph.rb +++ b/lib/metasm/metasm/gui/dasm_graph.rb @@ -85,9 +85,20 @@ class Graph @madetree = false end + # gives a text representation of the current graph state + def dump_layout(groups=@groups) + groups.map { |g| "#{groups.index(g)} -> #{g.to.map { |t| groups.index(t) }.sort.inspect}" } + end + def auto_arrange_step + # TODO fix + # 0->[1, 2] 1->[3] 2->[3, 4] 3->[] 4->[1] + # push 0 jz l3 push 1 jz l4 push 2 l3: push 3 l4: hlt + # and more generally all non-looping graphs where this algo creates backward links + groups = @groups return if groups.length <= 1 + maketree = lambda { |roots| next if @madetree @madetree = true @@ -116,7 +127,7 @@ class Graph g.to.each { |gg| walk[gg] } } - roots.each { |g| trim[g, g.from] } + roots.each { |g| trim[g, g.from] unless g.from.empty? } roots.each { |g| walk[g] } # handle loops now (unmarked nodes) @@ -319,7 +330,7 @@ class Graph # unknown pattern, group as we can.. group_other = lambda { -puts 'graph arrange: unknown configuration', groups.map { |g| "#{groups.index(g)} -> #{g.to.map { |t| groups.index(t) }.inspect}" } +puts 'graph arrange: unknown configuration', dump_layout g1 = groups.find_all { |g| g.from.empty? } g1 << groups[rand(groups.length)] if g1.empty? g2 = g1.map { |g| g.to }.flatten.uniq - g1 @@ -408,8 +419,28 @@ puts 'graph arrange: unknown configuration', groups.map { |g| "#{groups.index(g) end } + boxxy = @box.sort_by { |bb| bb.y } + # fill gaps that we created + @box.each { |b| + bottom = b.y+b.h + next if not follower = boxxy.find { |bb| bb.y+bb.h > bottom } + + # preserve line[] constructs margins + gap = follower.y-16*follower.from.length - (bottom+16*b.to.length) + next if gap <= 0 + + @box.each { |bb| + if bb.y+bb.h <= bottom + bb.y += gap/2 + else + bb.y -= gap/2 + end + } + boxxy = @box.sort_by { |bb| bb.y } + } + @box[0,0].each { |b| - # TODO elastic positionning (ignore up arrows ?) & collision detection (box vs box and box vs arrow) + # TODO elastic positionning (ignore up arrows ?) & collision detection (box/box + box/arrow) f = b.from[0] t = b.to[0] if b.to.length == 1 and b.from.length == 1 and b.y+b.hf.y+f.h @@ -672,21 +703,32 @@ class GraphViewWidget < DrawableWidget def paint_arrow(b1, b2) x1, y1 = b1.x+b1.w/2-@curcontext.view_x, b1.y+b1.h-@curcontext.view_y x2, y2 = b2.x+b2.w/2-@curcontext.view_x, b2.y-1-@curcontext.view_y + x1o, x2o = x1, x2 margin = @margin x1 += (-(b1.to.length-1)/2 + b1.to.index(b2)) * margin/2 x2 += (-(b2.from.length-1)/2 + b2.from.index(b1)) * margin/2 return if (y1+margin < 0 and y2 < 0) or (y1 > height/@zoom and y2-margin > height/@zoom) # just clip on y - margin, x1, y1, x2, y2, b1w, b2w = [margin, x1, y1, x2, y2, b1.w, b2.w].map { |v| v*@zoom } + margin, x1, y1, x2, y2, b1w, b2w, x1o, x2o = [margin, x1, y1, x2, y2, b1.w, b2.w, x1o, x2o].map { |v| v*@zoom } - # gtk wraps coords around 0x8000 + # XXX gtk wraps coords around 0x8000 if x1.abs > 0x7000 ; y1 /= x1.abs/0x7000 ; x1 /= x1.abs/0x7000 ; end if y1.abs > 0x7000 ; x1 /= y1.abs/0x7000 ; y1 /= y1.abs/0x7000 ; end if x2.abs > 0x7000 ; y2 /= x2.abs/0x7000 ; x2 /= x2.abs/0x7000 ; end if y2.abs > 0x7000 ; x2 /= y2.abs/0x7000 ; y2 /= y2.abs/0x7000 ; end + # straighten vertical arrows if possible + if y2 > y1 and (x1-x2).abs <= margin + if b1.to.length == 1 + x1 = x2 + elsif b2.from.length == 1 + x2 = x1 + end + end + set_color_arrow(b1, b2) if margin > 1 + # draw arrow tip draw_line(x1, y1, x1, y1+margin) draw_line(x2, y2-margin+1, x2, y2) draw_line(x2-margin/2, y2-margin/2, x2, y2) @@ -695,23 +737,26 @@ class GraphViewWidget < DrawableWidget y2 -= margin-1 end if y2+margin >= y1-margin-1 + # straight vertical down arrow draw_line(x1, y1, x2, y2) if x1 != y1 or x2 != y2 - elsif x1-b1w/2-margin >= x2+b2w/2+margin # z - draw_line(x1, y1, x1-b1w/2-margin, y1) - draw_line(x1-b1w/2-margin, y1, x2+b2w/2+margin, y2) - draw_line(x2+b2w/2+margin, y2, x2, y2) - draw_line(x1, y1+1, x1-b1w/2-margin, y1+1) # double - draw_line(x1-b1w/2-margin+1, y1, x2+b2w/2+margin+1, y2) - draw_line(x2+b2w/2+margin, y2+1, x2, y2+1) + + # else arrow up, need to sneak around boxes + elsif x1o-b1w/2-margin >= x2o+b2w/2+margin # z + draw_line(x1, y1, x1o-b1w/2-margin, y1) + draw_line(x1o-b1w/2-margin, y1, x2o+b2w/2+margin, y2) + draw_line(x2o+b2w/2+margin, y2, x2, y2) + draw_line(x1, y1+1, x1o-b1w/2-margin, y1+1) # double + draw_line(x1o-b1w/2-margin+1, y1, x2o+b2w/2+margin+1, y2) + draw_line(x2o+b2w/2+margin, y2+1, x2, y2+1) elsif x1+b1w/2+margin <= x2-b2w/2-margin # invert z - draw_line(x1, y1, x1+b1w/2+margin, y1) - draw_line(x1+b1w/2+margin, y1, x2-b2w/2-margin, y2) - draw_line(x2-b2w/2-margin, y2, x2, y2) + draw_line(x1, y1, x1o+b1w/2+margin, y1) + draw_line(x1o+b1w/2+margin, y1, x2o-b2w/2-margin, y2) + draw_line(x2o-b2w/2-margin, y2, x2, y2) draw_line(x1, y1+1, x1+b1w/2+margin, y1+1) # double - draw_line(x1+b1w/2+margin+1, y1, x2-b2w/2-margin+1, y2) - draw_line(x2-b2w/2-margin, y2+1, x2, y2+1) + draw_line(x1o+b1w/2+margin+1, y1, x2o-b2w/2-margin+1, y2) + draw_line(x2o-b2w/2-margin, y2+1, x2, y2+1) else # turn around - x = (x1 <= x2 ? [x1-b1w/2-margin, x2-b2w/2-margin].min : [x1+b1w/2+margin, x2+b2w/2+margin].max) + x = (x1 <= x2 ? [x1o-b1w/2-margin, x2o-b2w/2-margin].min : [x1o+b1w/2+margin, x2o+b2w/2+margin].max) draw_line(x1, y1, x, y1) draw_line(x, y1, x, y2) draw_line(x, y2, x2, y2) @@ -936,8 +981,8 @@ class GraphViewWidget < DrawableWidget def keypress_ctrl(key) case key - when ?f - @parent_widget.inputbox('text to search (regex)') { |pat| + when ?F + @parent_widget.inputbox('text to search in curfunc (regex)') { |pat| re = /#{pat}/i list = [['addr', 'instr']] @curcontext.box.each { |b| @@ -1089,41 +1134,57 @@ class GraphViewWidget < DrawableWidget puts 'autoarrange done' when ?u gui_update + when ?R load __FILE__ - when ?S + when ?S # reset @curcontext.auto_arrange_init(@selected_boxes.empty? ? @curcontext.box : @selected_boxes) + puts 'reset', @curcontext.dump_layout, '' zoom_all redraw - when ?T + when ?T # step auto_arrange @curcontext.auto_arrange_step + puts @curcontext.dump_layout, '' zoom_all redraw - when ?L + when ?L # post auto_arrange @curcontext.auto_arrange_post zoom_all redraw - when ?V + when ?V # shrink @selected_boxes.each { |b_| - dx = (b_.from+b_.to).map { |bb| bb.x+bb.w/2 - b_.x-b_.w/2 } + dx = (b_.from + b_.to).map { |bb| bb.x+bb.w/2 - b_.x-b_.w/2 } dx = dx.inject(0) { |s, xx| s+xx }/dx.length - if dx > 0 - xmax = b_.from.map { |bb| bb.x if b_.from.find { |bbb| - bbb.x+bbb.w/2 < bb.x+bb.w/2 and bbb.y+bbb.h < bb.y - } }.compact.min - bx = b_.x+dx - bx = [bx, xmax-b_.w/2-@margin].min if xmax - b_.x = bx if bx > b_.x - else - xmin = b_.from.map { |bb| bb.x+bb.w if b_.from.find { |bbb| - bbb.x+bbb.w/2 < bb.x+bb.w/2 and bbb.y+bbb.h < bb.y - } }.compact.max - bx = b_.x+dx - bx = [bx, xmin+b_.w/2+@margin].max if xmin - b_.x = bx if bx < b_.x - end + b_.x += dx } redraw + when ?I # create arbitrary boxes/links + if @selected_boxes.empty? + @fakebox ||= 0 + b = @curcontext.new_box "id_#@fakebox", + :addresses => [], :line_address => [], + :line_text_col => [[[" blublu #@fakebox", :text]]] + b.w = @font_width * 15 + b.h = @font_height * 2 + b.x = rand(200) - 100 + b.y = rand(200) - 100 + + @fakebox += 1 + else + b1, *bl = @selected_boxes + bl = [b1] if bl.empty? # loop + bl.each { |b2| + if b1.to.include? b2 + b1.to.delete b2 + b2.from.delete b1 + else + b1.to << b2 + b2.from << b1 + end + } + end + redraw + when ?1 # (numeric) zoom to 1:1 if @zoom == 1.0 zoom_all diff --git a/lib/metasm/metasm/gui/dasm_main.rb b/lib/metasm/metasm/gui/dasm_main.rb index 85e6c9d16b..02e825cfc0 100644 --- a/lib/metasm/metasm/gui/dasm_main.rb +++ b/lib/metasm/metasm/gui/dasm_main.rb @@ -572,6 +572,9 @@ class DisasmWidget < ContainerChoiceWidget return if not popup = DasmWindow.new popup.display(@dasm, @entrypoints) w = popup.dasm_widget + w.bg_color_callback = @bg_color_callback if bg_color_callback + w.keyboard_callback = @keyboard_callback + w.keyboard_callback_ctrl = @keyboard_callback_ctrl w.clones = @clones.concat w.clones w.focus_addr(*focus) popup diff --git a/lib/metasm/metasm/gui/gtk.rb b/lib/metasm/metasm/gui/gtk.rb index c00fcfac69..e263c97cd3 100644 --- a/lib/metasm/metasm/gui/gtk.rb +++ b/lib/metasm/metasm/gui/gtk.rb @@ -161,6 +161,7 @@ class DrawableWidget < Gtk::DrawingArea key = { :page_up => :pgup, :page_down => :pgdown, :next => :pgdown, :escape => :esc, :return => :enter, :l1 => :f11, :l2 => :f12, + :prior => :pgup, :space => ?\ , :asciitilde => ?~, :quoteleft => ?`, diff --git a/lib/metasm/metasm/ia32/compile_c.rb b/lib/metasm/metasm/ia32/compile_c.rb index 4bfc30383c..f9a31abf55 100644 --- a/lib/metasm/metasm/ia32/compile_c.rb +++ b/lib/metasm/metasm/ia32/compile_c.rb @@ -907,7 +907,7 @@ class CCompiler < C::Compiler ptr = make_volatile(ptr, expr.lexpr.type) if ptr.kind_of? Address instr 'call', ptr f = expr.lexpr - f = f.rexpr while f.kind_of? C::CExpression and not f.op and f.type == f.rexpr.type + f = f.rexpr while f.kind_of? C::CExpression and not f.op and f.rexpr.kind_of? C::Typed and f.type == f.rexpr.type if not f.type.attributes.to_a.include? 'stdcall' and (not f.kind_of?(C::Variable) or not f.attributes.to_a.include? 'stdcall') al = typesize[:ptr] argsz = expr.rexpr.inject(0) { |sum, a| sum + (sizeof(a) + al - 1) / al * al } diff --git a/lib/metasm/metasm/ia32/encode.rb b/lib/metasm/metasm/ia32/encode.rb index 41c96c760e..f4545e7cdf 100644 --- a/lib/metasm/metasm/ia32/encode.rb +++ b/lib/metasm/metasm/ia32/encode.rb @@ -181,6 +181,7 @@ class Ia32 base = op.bin.dup oi = op.args.zip(i.args) set_field = lambda { |f, v| + v ||= 0 # ST => ST(0) fld = op.fields[f] base[fld[0]] |= v << fld[1] } diff --git a/lib/metasm/metasm/ia32/opcodes.rb b/lib/metasm/metasm/ia32/opcodes.rb index ded0fab6b6..07688d51b2 100644 --- a/lib/metasm/metasm/ia32/opcodes.rb +++ b/lib/metasm/metasm/ia32/opcodes.rb @@ -176,6 +176,7 @@ class Ia32 addop('mov', [0x8C], 0, {:d => [0, 1], :seg3 => [1, 3]}, :seg3) { |op| op.args.reverse! } addop 'out', [0xE6], nil, {:w => [0, 0]}, :reg_eax, :u8 addop 'out', [0xE6], nil, {:w => [0, 0]}, :u8 + addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_dx, :reg_eax addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_eax, :reg_dx addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_eax # implicit arguments addop 'out', [0xEE], nil, {:w => [0, 0]} diff --git a/lib/metasm/metasm/ia32/parse.rb b/lib/metasm/metasm/ia32/parse.rb index 26259007d2..a5b110b971 100644 --- a/lib/metasm/metasm/ia32/parse.rb +++ b/lib/metasm/metasm/ia32/parse.rb @@ -168,6 +168,8 @@ end # parses an arbitrary ia32 instruction argument def parse_argument(lexer) + lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String + # reserved names (registers/segments etc) @args_token ||= [Reg, SimdReg, SegReg, DbgReg, CtrlReg, FpReg].map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true } @@ -238,7 +240,7 @@ end cond = true if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM) - cond = (!arg.sz or arg.sz == s) + cond = (!arg.sz or arg.sz == s or spec == :reg_dx) end cond and diff --git a/lib/metasm/metasm/main.rb b/lib/metasm/metasm/main.rb index 9a6322b60a..e3bc165d79 100644 --- a/lib/metasm/metasm/main.rb +++ b/lib/metasm/metasm/main.rb @@ -341,18 +341,30 @@ class Expression < ExpressionType # in operands order, and allows nesting using sub-arrays # ex: Expression[[:-, 42], :*, [1, :+, [4, :*, 7]]] # with a single argument, return it if already an Expression, else construct a new one (using unary +/-) - def self.[](l, op = nil, r = nil) - raise ArgumentError, 'invalid Expression[nil]' if not l and not r and not op - return l if l.kind_of? Expression and not op - l, op, r = nil, :-, -l if not op and l.kind_of? ::Numeric and l < 0 - l, op, r = nil, :+, l if not op - l, op, r = nil, l, op if not r + def self.[](l, op=nil, r=nil) + if not r # need to shift args + if not op + raise ArgumentError, 'invalid Expression[nil]' if not l + return l if l.kind_of? Expression + if l.kind_of? ::Numeric and l < 0 + r = -l + op = :'-' + else + r = l + op = :'+' + end + else + r = op + op = l + end + l = nil + else l = self[*l] if l.kind_of? ::Array + end r = self[*r] if r.kind_of? ::Array new(op, r, l) end - # checks if a given Expression/Integer is in the type range # returns true if it is, false if it overflows, and nil if cannot be determined (eg unresolved variable) def self.in_range?(val, type) @@ -391,7 +403,7 @@ class Expression < ExpressionType # will not match 1+2 and 2+1 def ==(o) # shortcircuit recursion - o.object_id == object_id or (o.class == self.class and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr) + o.object_id == object_id or (o.kind_of?(Expression) and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr) end # make it useable as Hash key (see +==+) @@ -517,18 +529,8 @@ class Expression < ExpressionType 0 elsif l == 1 Expression[r, :'!=', 0].reduce_rec - elsif r == 0 # (no sideeffects) && 0 => 0 - sideeffect = lambda { |e| - if e.kind_of? Expression - not [:+, :-, :*, :/, :&, :|, :^, :>, :<, :>>, :<<, :'==', :'!=', :<=, :>=, :'&&', :'||'].include?(e.op) or - sideeffect[e.lexpr] or sideeffect[e.rexpr] - elsif e.kind_of? ExpressionType - true # fail safe - else - false - end - } - 0 if not sideeffect[l] + elsif r == 0 + 0 # XXX l could be a special ExprType with sideeffects ? end elsif @op == :'||' if l.kind_of? ::Numeric and l != 0 # shortcircuit eval @@ -599,26 +601,7 @@ class Expression < ExpressionType Expression[[l.lexpr, :&, r], l.op, [l.rexpr, :&, r]].reduce_rec # rol/ror composition elsif r.kind_of? ::Integer and l.kind_of? Expression and l.op == :| - m = Expression[[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']], :&, 'mask'] - if vars = Expression[l, :&, r].match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt', 'mask') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[ vars[:inv_sh_op]] and - ((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or - (vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of? ::Integer and - vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and - vars['mask'].kind_of?(::Integer) and vars['mask'] == (1<> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and - ((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or - (ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of? ::Integer and - ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr)) and - ivars['mask'].kind_of?(::Integer) and ivars['mask'] == (1< b+c+0 - # a+((-a)+(b+c)) => 0+b+c - neg_l = l.rexpr if l.kind_of? Expression and l.op == :- - - # recursive search & replace -lexpr by 0 - simplifier = lambda { |cur| - if (neg_l and neg_l == cur) or (cur.kind_of? Expression and cur.op == :- and not cur.lexpr and cur.rexpr == l) - # -l found - 0 - else - # recurse - if cur.kind_of? Expression and cur.op == :+ - if newl = simplifier[cur.lexpr] - Expression[newl, cur.op, cur.rexpr].reduce_rec - elsif newr = simplifier[cur.rexpr] - Expression[cur.lexpr, cur.op, newr].reduce_rec - end - end - end - } - - simplifier[r] + reduce_rec_add(l, r) end end @@ -719,6 +681,60 @@ class Expression < ExpressionType ret end + + # a+(b+(c+(-a))) => b+c+0 + # a+((-a)+(b+c)) => 0+b+c + def reduce_rec_add(l, r) + if l.kind_of? Expression and l.op == :- and not l.lexpr + neg_l = l.rexpr + else + neg_l = Expression[:-, l] + end + + # recursive search & replace -lexpr by 0 + simplifier = lambda { |cur| + if neg_l == cur + # -l found + 0 + elsif cur.kind_of? Expression and cur.op == :+ + # recurse + if newl = simplifier[cur.lexpr] + Expression[newl, cur.op, cur.rexpr].reduce_rec + elsif newr = simplifier[cur.rexpr] + Expression[cur.lexpr, cur.op, newr].reduce_rec + end + end + } + + simplifier[r] + end + + # a check to see if an Expr is the composition of two rotations (rol eax, 4 ; rol eax, 6 => rol eax, 10) + # this is a bit too ugly to stay in the main reduce_rec body. + def reduce_rec_composerol + m = Expression[[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']], :&, 'mask'] + if vars = Expression[l, :&, r].match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt', 'mask') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[ vars[:inv_sh_op]] and + ((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or + (vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of? ::Integer and + vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and + vars['mask'].kind_of?(::Integer) and vars['mask'] == (1<> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and + ((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or + (ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of? ::Integer and + ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr)) and + ivars['mask'].kind_of?(::Integer) and ivars['mask'] == (1< {'any' => 42} # Expression[42, :+, 28].match(Expression['any', :+, 'any'], 'any') => false @@ -900,11 +916,11 @@ class EncodedData # base defaults to the first export name + its offset def binding(base = nil) if not base - key = @export.keys.sort_by { |k| @export[k] }.first + key = @export.index(@export.values.min) return {} if not key base = (@export[key] == 0 ? key : Expression[key, :-, @export[key]]) end - @export.inject({}) { |binding, (n, o)| binding.update n => Expression[base, :+, o] } + @export.inject({}) { |binding, (n, o)| binding.update n => Expression.new(:+, o, base) } end # returns an array of variables that needs to be defined for a complete #fixup @@ -940,36 +956,35 @@ class EncodedData # concatenation of another +EncodedData+ (or nil/Fixnum/anything supporting String#<<) def << other - - case other when nil when ::Fixnum fill - @data = @data.realstring if defined? VirtualString and @data.kind_of? VirtualString + @data = @data.to_str if not @data.kind_of? String @data << other @virtsize += 1 when EncodedData fill if not other.data.empty? - other.reloc.each { |k, v| @reloc[k + @virtsize] = v } - cf = (other.export.keys & @export.keys).find_all { |k| other.export[k] != @export[k] - @virtsize } - raise "edata merge: label conflict #{cf.inspect}" if not cf.empty? - other.export.each { |k, v| @export[k] = v + @virtsize } + other.reloc.each { |k, v| @reloc[k + @virtsize] = v } if not other.reloc.empty? + if not other.export.empty? + other.export.each { |k, v| + if @export[k] and @export[k] != v + @virtsize + cf = (other.export.keys & @export.keys).find_all { |k_| other.export[k_] != @export[k_] - @virtsize } + raise "edata merge: label conflict #{cf.inspect}" + end + @export[k] = v + @virtsize + } other.inv_export.each { |k, v| @inv_export[@virtsize + k] = v } - if @data.empty?; @data = other.data.dup - elsif defined? VirtualString and @data.kind_of? VirtualString; @data = @data.realstring << other.data - else - if(other.data.respond_to?('force_encoding')) - other.data.force_encoding("binary") end - - @data << other.data + if @data.empty?; @data = other.data.dup + elsif not @data.kind_of?(String); @data = @data.to_str << other.data + else @data << other.data end @virtsize += other.virtsize else fill if @data.empty?; @data = other.dup - elsif defined? VirtualString and @data.kind_of? VirtualString; @data = @data.realstring << other + elsif not @data.kind_of?(String); @data = @data.to_str << other else @data << other end @virtsize += other.length @@ -1095,5 +1110,31 @@ class EncodedData raise EncodeError, 'cannot patch data: new content too long' if to - from < content.length self[from, content.length] = content end + + # returns a list of offsets where /pat/ can be found inside @data + # scan is done per chunk of chunksz bytes, with a margin for chunk-overlapping patterns + # yields each offset found, and only include it in the result if the block returns !false + def pattern_scan(pat, chunksz=nil, margin=nil) + chunksz ||= 4*1024*1024 # scan 4MB at a time + margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries + pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String + + found = [] + chunkoff = 0 + while chunkoff < @data.length + chunk = @data[chunkoff, chunksz+margin].to_str + off = 0 + while match_off = (chunk[off..-1] =~ pat) + break if off+match_off >= chunksz # match fully in margin + match_addr = chunkoff + off + match_off + found << match_addr if not block_given? or yield(match_addr) + off += match_off + 1 + # XXX +1 or +lastmatch.length ? + # 'aaaabc'.pattern_scan(/a*bc/) will match 5 times here + end + chunkoff += chunksz + end + found + end end end diff --git a/lib/metasm/metasm/os/main.rb b/lib/metasm/metasm/os/main.rb index 203293b36c..27bde77010 100644 --- a/lib/metasm/metasm/os/main.rb +++ b/lib/metasm/metasm/os/main.rb @@ -227,10 +227,11 @@ class VirtualString # returns a new VirtualString (using dup) if the request is bigger than @pagelength bytes def read_range(from, len) from += @addr_start - base, page = cache_get_page(from) if not len + base, page = cache_get_page(from) page[from - base] elsif len <= @pagelength + base, page = cache_get_page(from) s = page[from - base, len] if from+len-base > @pagelength # request crosses a page boundary base, page = cache_get_page(from+len) @@ -846,5 +847,18 @@ class Debugger instance_eval File.read(plugin_filename) end + + # see EData#pattern_scan + # scans only mapped areas of @memory, using os_process.mappings + def pattern_scan(pat) + ret = [] + os_process.mappings.each { |a, l, *o| + EncodedData.new(@memory[a, l]).pattern_scan(pat) { |o| + o += a + ret << o if not block_given? or yield(o) + } + } + ret + end end end diff --git a/lib/metasm/metasm/os/windows.rb b/lib/metasm/metasm/os/windows.rb index 81db40c90a..23fcd87c5b 100644 --- a/lib/metasm/metasm/os/windows.rb +++ b/lib/metasm/metasm/os/windows.rb @@ -22,7 +22,7 @@ typedef unsigned int UINT; typedef long LONG; typedef unsigned long ULONG, DWORD, *LPDWORD; typedef int BOOL; -typedef unsigned long long DWORD64; +typedef unsigned long long DWORD64, ULONGLONG; typedef intptr_t INT_PTR, LONG_PTR; typedef uintptr_t UINT_PTR, ULONG_PTR, DWORD_PTR, SIZE_T; @@ -665,6 +665,38 @@ Thread32Next( LPTHREADENTRY32 lpte ); +typedef struct _MEMORY_BASIC_INFORMATION32 { + DWORD BaseAddress; + DWORD AllocationBase; + DWORD AllocationProtect; // initial (alloc time) prot + DWORD RegionSize; + DWORD State; // MEM_FREE/COMMIT/RESERVE + DWORD Protect; // PAGE_EXECUTE_READWRITE etc + DWORD Type; // MEM_IMAGE/MAPPED/PRIVATE +} MEMORY_BASIC_INFORMATION32, *PMEMORY_BASIC_INFORMATION32; + +typedef struct _MEMORY_BASIC_INFORMATION64 { + ULONGLONG BaseAddress; + ULONGLONG AllocationBase; + DWORD AllocationProtect; + DWORD __alignment1; + ULONGLONG RegionSize; + DWORD State; + DWORD Protect; + DWORD Type; + DWORD __alignment2; +} MEMORY_BASIC_INFORMATION64, *PMEMORY_BASIC_INFORMATION64; + +SIZE_T +WINAPI +VirtualQueryEx( + HANDLE hProcess, + LPVOID lpAddress, + PMEMORY_BASIC_INFORMATION32 lpBuffer, + SIZE_T dwLength // sizeof lpBuffer +); + + EOS new_api_c < 0 + + prot = { + WinAPI::PAGE_NOACCESS => '---', + WinAPI::PAGE_READONLY => 'r--', + WinAPI::PAGE_READWRITE => 'rw-', + WinAPI::PAGE_WRITECOPY => 'rw-', + WinAPI::PAGE_EXECUTE => '--x', + WinAPI::PAGE_EXECUTE_READ => 'r-x', + WinAPI::PAGE_EXECUTE_READWRITE => 'rwx', + WinAPI::PAGE_EXECUTE_WRITECOPY => 'rwx' + }[info[:protect] & 0xff] + prot << 'g' if info[:protect] & WinAPI::PAGE_GUARD > 0 + prot << 'p' if info[:type] & WinAPI::MEM_PRIVATE > 0 + + list << [info[:baseaddress], info[:regionsize], prot] + end + + list + end end class << self @@ -1005,12 +1066,6 @@ class WindowsRemoteString < VirtualString return if WinAPI.readprocessmemory(@handle, addr, page, len, 0) == 0 page end - - def realstring - s = [0].pack('C') * @length - WinAPI.readprocessmemory(@handle, @addr_start, s, @length, 0) - s - end end class WinDbgAPI diff --git a/lib/metasm/metasm/parse.rb b/lib/metasm/metasm/parse.rb index 0d84a4a687..4617d0a015 100644 --- a/lib/metasm/metasm/parse.rb +++ b/lib/metasm/metasm/parse.rb @@ -40,7 +40,7 @@ class CPU raise tok, 'invalid opcode' if not opcode_list_byname[tok.raw] i.opname = tok.raw - i.backtrace = tok.backtrace.dup + i.backtrace = tok.backtrace lexer.skip_space # find arguments list @@ -317,7 +317,7 @@ class ExeFormat raise tok, "label redefinition" if new_label(lname) != lname end l = Label.new(lname) - l.backtrace = tok.backtrace.dup + l.backtrace = tok.backtrace @cursource << l lasteol = false else @@ -331,7 +331,7 @@ class ExeFormat end if lname = @locallabels_fwd.delete('endinstr') l = Label.new(lname) - l.backtrace = tok.backtrace.dup + l.backtrace = tok.backtrace @cursource << l end end @@ -377,7 +377,7 @@ class ExeFormat @lexer.unreadtok ntok end raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol - @cursource << Align.new(e, fillwith, tok.backtrace.dup) + @cursource << Align.new(e, fillwith, tok.backtrace) when '.pad' @lexer.skip_space @@ -394,12 +394,12 @@ class ExeFormat @lexer.unreadtok ntok end raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol - @cursource << Padding.new(fillwith, tok.backtrace.dup) + @cursource << Padding.new(fillwith, tok.backtrace) when '.offset' e = Expression.parse(@lexer) raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol - @cursource << Offset.new(e, tok.backtrace.dup) + @cursource << Offset.new(e, tok.backtrace) when '.padto' e = Expression.parse(@lexer) @@ -418,7 +418,7 @@ class ExeFormat @lexer.unreadtok ntok end raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol - @cursource << Padding.new(fillwith, tok.backtrace.dup) << Offset.new(e, tok.backtrace.dup) + @cursource << Padding.new(fillwith, tok.backtrace) << Offset.new(e, tok.backtrace) else @cpu.parse_parser_instruction(self, tok) @@ -441,15 +441,15 @@ class ExeFormat break end end - Data.new(type, arr, 1, tok.backtrace.dup) + Data.new(type, arr, 1, tok.backtrace) end def parse_data_data(type) raise ParseError, 'need data content' if not tok = @lexer.readtok if tok.type == :punct and tok.raw == '?' - Data.new type, :uninitialized, 1, tok.backtrace.dup + Data.new type, :uninitialized, 1, tok.backtrace elsif tok.type == :quoted - Data.new type, tok.value, 1, tok.backtrace.dup + Data.new type, tok.value, 1, tok.backtrace else @lexer.unreadtok tok raise tok, 'invalid data' if not i = Expression.parse(@lexer) @@ -470,10 +470,10 @@ class ExeFormat end end raise ntok, 'syntax error, ) expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != ')' - Data.new type, content, count, tok.backtrace.dup + Data.new type, content, count, tok.backtrace else @lexer.unreadtok ntok - Data.new type, i, 1, tok.backtrace.dup + Data.new type, i, 1, tok.backtrace end end end @@ -664,7 +664,7 @@ class Expression l = lexer.program.cursource.last if not l.kind_of? Label l = Label.new(lexer.program.new_label('instr_start')) - l.backtrace = tok.backtrace.dup + l.backtrace = tok.backtrace lexer.program.cursource << l end tok.value = l.name @@ -672,7 +672,7 @@ class Expression l = lexer.program.cursource.first if not l.kind_of? Label l = Label.new(lexer.program.new_label('section_start')) - l.backtrace = tok.backtrace.dup + l.backtrace = tok.backtrace lexer.program.cursource.unshift l end tok.value = l.name diff --git a/lib/metasm/metasm/parse_c.rb b/lib/metasm/metasm/parse_c.rb index 4417135b7c..9492896216 100644 --- a/lib/metasm/metasm/parse_c.rb +++ b/lib/metasm/metasm/parse_c.rb @@ -276,6 +276,7 @@ module C case tok.raw when ';'; break when ',' + when '}'; parser.unreadtok(tok); break else raise tok, '"," or ";" expected' end end @@ -647,7 +648,7 @@ module C raise tok || parser, '"(" expected' if not tok = parser.skipspaces or tok.type != :punct or tok.raw != '(' raise tok, 'expr expected' if not expr = CExpression.parse(parser, scope) or not expr.type.arithmetic? raise tok || parser, '")" expected' if not tok = parser.skipspaces or tok.type != :punct or tok.raw != ')' - raise tok || parser, '";" expected' if not tok = parser.skipspaces or tok.type != :punct or tok.raw != ';' + parser.checkstatementend(tok) new expr, body end @@ -821,7 +822,7 @@ module C end raise tok || parser, '")" expected' if not tok or tok.type != :punct or tok.raw != ')' ret.parse_attributes(parser) - raise tok || parser, '";" expected' if not tok = parser.skipspaces or tok.type != :punct or tok.raw != ';' + parser.checkstatementend(tok) ret end end @@ -1076,8 +1077,13 @@ module C @lexer.define_weak('__STDC__') @lexer.define_weak('__const', 'const') @lexer.define_weak('__signed', 'signed') + @lexer.define_weak('__signed__', 'signed') @lexer.define_weak('__volatile', 'volatile') - @lexer.nodefine_strong('__REDIRECT_NTH') # booh gnu + if not @lexer.definition['__builtin_constant_p'] + # magic macro to check if its arg is an immediate value + @lexer.define_weak('__builtin_constant_p', '0') + @lexer.definition['__builtin_constant_p'].args = [Preprocessor::Token.new([])] + end @lexer.nodefine_strong('alloca') # TODO __builtin_alloca @lexer.hooked_include['stddef.h'] = <> 1)) > 0 # char == unsigned char @@ -2707,11 +2721,11 @@ EOH r.join("\n") end - # returns a string containing the C definition of the toplevel function funcname, with its dependencies - def dump_definition(funcname) + # returns a string containing the C definition(s) of toplevel functions, with their dependencies + def dump_definition(*funcnames) oldst = @toplevel.statements @toplevel.statements = [] - dump_definitions([@toplevel.symbol[funcname]]) + dump_definitions(funcnames.map { |f| @toplevel.symbol[f] }) ensure @toplevel.statements = oldst end @@ -3373,7 +3387,7 @@ EOH r, dep = @rexpr.dump(scope, r, dep) when Block r.last << '(' - r, dep = Statement.dump(scope, r, dep) + r, dep = Statement.dump(@rexpr, scope, r, dep) r.last << ' )' when Label r.last << '&&' << @rexpr.name @@ -3423,7 +3437,7 @@ EOH else r, dep = CExpression.dump(@lexpr, scope, r, dep, (@lexpr.kind_of? CExpression and @lexpr.lexpr and @lexpr.op != @op)) r.last << ' ' << @op.to_s << ' ' - r, dep = CExpression.dump(@rexpr, scope, r, dep, (@rexpr.kind_of? CExpression and @rexpr.lexpr and @rexpr.op != @op)) + r, dep = CExpression.dump(@rexpr, scope, r, dep, (@rexpr.kind_of? CExpression and @rexpr.lexpr and @rexpr.op != @op and @rexpr.op != :funcall)) end end r.last << ')' if brace and @op != :'->' and @op != :'.' and @op != :'[]' and (@op or @rexpr.kind_of? CExpression) diff --git a/lib/metasm/metasm/preprocessor.rb b/lib/metasm/metasm/preprocessor.rb index 93ab2016f1..e1c92b08b9 100644 --- a/lib/metasm/metasm/preprocessor.rb +++ b/lib/metasm/metasm/preprocessor.rb @@ -551,6 +551,7 @@ class Preprocessor def ungetchar @pos = @ungetcharpos @lineno = @ungetcharlineno + nil end # returns true if no more data is available @@ -562,6 +563,7 @@ class Preprocessor # lifo def unreadtok(tok) @queue << tok if tok + nil end # calls readtok_nopp and handles preprocessor directives @@ -622,17 +624,88 @@ class Preprocessor def readtok_nopp return @queue.pop unless @queue.empty? - tok = Token.new((@backtrace.map { |bt| bt[0, 2] } + [@filename, @lineno]).flatten) + nbt = [] + @backtrace.each { |bt| nbt << bt[0] << bt[1] } + tok = Token.new(nbt << @filename << @lineno) case c = getchar when nil return nil when ?', ?" # read quoted string value - tok.type = :quoted - delimiter = c + readtok_nopp_str(tok, c) + when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_ + tok.type = :string + raw = tok.raw << c + loop do + case c = getchar + when nil; ungetchar; break # avoids 'no method "coerce" for nil' warning + when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_ + raw << c + else ungetchar; break + end + end + + when ?\ , ?\t, ?\r, ?\n, ?\f + tok.type = ((c == ?\ || c == ?\t) ? :space : :eol) + raw = tok.raw << c + loop do + case c = getchar + when nil; break + when ?\ , ?\t + when ?\n, ?\f, ?\r; tok.type = :eol + else break + end + raw << c + end + ungetchar + + when ?/ + raw = tok.raw << c + # comment + case c = getchar + when ?/ + # till eol + tok.type = :eol + raw << c + while c = getchar + raw << c + break if c == ?\n + end + when ?* + tok.type = :space + raw << c + seenstar = false + loop do + raise tok, 'unterminated c++ comment' if not c = getchar + raw << c + case c + when ?*; seenstar = true + when ?/; break if seenstar # no need to reset seenstar, already false + else seenstar = false + end + end + else + # just a slash + ungetchar + tok.type = :punct + end + + else + tok.type = :punct tok.raw << c + end + + tok + end + + # we just read a ' or a ", read until the end of the string + # tok.value will contain the raw string (with escapes interpreted etc) + def readtok_nopp_str(tok, delimiter) + tok.type = :quoted + tok.raw << delimiter tok.value = '' + c = nil loop do raise tok, 'unterminated string' if not c = getchar tok.raw << c @@ -685,72 +758,10 @@ class Preprocessor end end - when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_ - tok.type = :string - tok.raw << c - loop do - case c = getchar - when nil; ungetchar; break # avoids 'no method "coerce" for nil' warning - when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_ - tok.raw << c - else ungetchar; break - end - end - - when ?\ , ?\t, ?\r, ?\n, ?\f - tok.type = :space - tok.raw << c - loop do - case c = getchar - when nil; break - when ?\ , ?\t - when ?\n, ?\f, ?\r; tok.type = :eol - else break - end - tok.raw << c - end - ungetchar - tok.type = :eol if tok.raw.index(?\n) or tok.raw.index(?\f) - - when ?/ - tok.raw << c - # comment - case c = getchar - when ?/ - # till eol - tok.type = :eol - tok.raw << c - while c = getchar - tok.raw << c - break if c == ?\n - end - when ?* - tok.type = :space - tok.raw << c - seenstar = false - loop do - raise tok, 'unterminated c++ comment' if not c = getchar - tok.raw << c - case c - when ?*; seenstar = true - when ?/; break if seenstar # no need to reset seenstar, already false - else seenstar = false - end - end - else - # just a slash - ungetchar - tok.type = :punct - end - - else - tok.type = :punct - tok.raw << c - end - tok end + # defines a simple preprocessor macro (expands to 0 or 1 token) # does not check overwriting def define(name, value=nil, from=caller.first) diff --git a/lib/metasm/metasm/render.rb b/lib/metasm/metasm/render.rb index 8f4057fa23..aeb7183f5e 100644 --- a/lib/metasm/metasm/render.rb +++ b/lib/metasm/metasm/render.rb @@ -65,9 +65,24 @@ end class Expression include Renderable attr_accessor :render_info - def render - l, r = [@lexpr, @rexpr].map { |e| - if e.kind_of? Integer + + # this is an accessor to @@render_int, the lambda used to render integers > 10 + # usage: Expression.render_int = lambda { |e| '0x%x' % e } + # or Expression.render_int { |e| '0x%x' % e } + # XXX the returned string should be suitable for inclusion in a label name etc + def self.render_int(&b) + if b + @@render_int = b + else + @@render_int + end + end + def self.render_int=(p) + @@render_int = p + end + @@render_int = nil + + def render_integer(e) if render_info and @render_info[:char] ee = e v = [] @@ -78,7 +93,7 @@ class Expression v.reverse! if @render_info[:char] == :big if not v.empty? and v.all? { |c| c < 0x7f } # XXX endianness - next "'" + v.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1] + "'" + return "'" + v.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1] + "'" end end if e < 0 @@ -86,18 +101,23 @@ class Expression e = -e end if e < 10; e = e.to_s + elsif @@render_int + e = @@render_int[e] else e = '%xh' % e e = '0' << e unless (?0..?9).include? e[0] end e = '-' << e if neg - end e - } - nosq = {:* => [:*], :+ => [:+, :-, :*], :- => [:+, :-, :*]} - l = ['(', l, ')'] if @lexpr.kind_of? Expression and not nosq[@op].to_a.include?(@lexpr.op) - nosq[:-] = [:*] - r = ['(', r, ')'] if @rexpr.kind_of? Expression and not nosq[@op].to_a.include?(@rexpr.op) + end + + NOSQ1 = NOSQ2 = {:* => [:*], :+ => [:+, :-, :*], :- => [:+, :-, :*]} + NOSQ2[:-] = [:*] + def render + l = @lexpr.kind_of?(Integer) ? render_integer(@lexpr) : @lexpr + r = @rexpr.kind_of?(Integer) ? render_integer(@rexpr) : @rexpr + l = ['(', l, ')'] if @lexpr.kind_of? Expression and (not oa = NOSQ1[@op] or not oa.include?(@lexpr.op)) + r = ['(', r, ')'] if @rexpr.kind_of? Expression and (not oa = NOSQ2[@op] or not oa.include?(@rexpr.op)) op = @op if l or @op != :+ if op == :+ r0 = [r].flatten.first diff --git a/lib/metasm/metasm/x86_64/opcodes.rb b/lib/metasm/metasm/x86_64/opcodes.rb index feec83a817..fd97fdbd95 100644 --- a/lib/metasm/metasm/x86_64/opcodes.rb +++ b/lib/metasm/metasm/x86_64/opcodes.rb @@ -42,6 +42,7 @@ class X86_64 @opcode_list.delete_if { |o| o.args.include? :modrmmmx or # mmx is dead! o.args.include? :regmmx or # movd + o.args.include? :regfp or # no fpu beyond this line o.name == 'loadall' or o.name == 'arpl' } diff --git a/lib/metasm/misc/bottleneck.rb b/lib/metasm/misc/bottleneck.rb index dcf052bf2c..65babeb6cb 100644 --- a/lib/metasm/misc/bottleneck.rb +++ b/lib/metasm/misc/bottleneck.rb @@ -4,34 +4,58 @@ # Licence is LGPL, see LICENCE in the top-level directory # A script to help finding performance bottlenecks: -# ruby-prof myscript.rb +# +# $ ruby-prof myscript.rb # => String#+ gets called 50k times and takes 30s -# LOGCALLER='String#+' ruby -r log_caller myscript.rb +# +# $ LOGCALLER='String#+' ruby -r bottleneck myscript.rb # => String#+ called 40k times from: # stuff.rb:42 in Myclass#uglymethod from # stuff.rb:32 in Myclass#initialize +# # now you know what to rewrite -def log_caller(cls, meth, histlen=-1) - malias = meth.to_s.gsub(/[^a-z0-9_]/i, '') + '_log_caller' - mcntr = '$' + meth.to_s.gsub(/[^a-z0-9_]/i, '') + '_counter' + +def log_caller(cls, meth, singleton=false, histlen=nil) + histlen ||= ENV.fetch('LOGCALLER_MAXHIST', 16).to_i + dec_meth = 'm_' + meth.to_s.gsub(/[^\w]/) { |c| c.unpack('H*')[0] } + malias = dec_meth + '_log_caller' + mcntr = '$' + dec_meth + '_counter' eval < true, '#' => false }[sg] + cls = cls.split('::').inject(::Object) { |o, cst| o.const_get(cst) } + log_caller(cls, meth, sg) +} diff --git a/lib/metasm/samples/dynamic_ruby.rb b/lib/metasm/samples/dynamic_ruby.rb index b81752f2c0..ef9f1efd5b 100644 --- a/lib/metasm/samples/dynamic_ruby.rb +++ b/lib/metasm/samples/dynamic_ruby.rb @@ -12,36 +12,9 @@ require 'metasm' module Metasm -module RubyHack - CACHEDIR = File.expand_path('~/.metasm/jit_cache/') - # basic C defs for ruby internals - 1.8 only ! - RUBY_H = <flags >> FL_USHIFT) & 0xff) - -extern VALUE rb_cObject; -extern VALUE rb_eRuntimeError; -#define Qfalse ((VALUE)0) -#define Qtrue ((VALUE)2) -#define Qnil ((VALUE)4) -#define FIX2LONG(x) (((long)x) >> 1) - -VALUE rb_uint2inum(unsigned long); -unsigned long rb_num2ulong(VALUE); - -VALUE rb_str_new(const char* ptr, long len); // alloc + memcpy + 0term - -int rb_intern(char *); -VALUE rb_funcall(VALUE recv, int id, int nargs, ...); -VALUE rb_const_get(VALUE, int); -VALUE rb_raise(VALUE, char*); -void rb_define_method(VALUE, char *, VALUE (*)(), int); -void rb_define_singleton_method(VALUE, char *, VALUE (*)(), int); -int rb_to_id(VALUE); -struct node* rb_method_node(VALUE klass, int id); -VALUE rb_str_new(char*, int); - - -// TODO setup those vars auto or define a standard .import/.export (elf/pe/macho) -#ifdef METASM_TARGET_ELF -asm .global "rb_cObject" undef type=NOTYPE; // TODO fix elf encoder to not need this -asm .global "rb_eRuntimeError" undef type=NOTYPE; -#endif EOS NODETYPE = [ :method, :fbody, :cfunc, :scope, :block, @@ -105,116 +49,20 @@ EOS :memo, :ifunc, :dsym, :attrasgn, :last ] + new_api_c 'void rb_define_method(uintptr_t, char *, uintptr_t (*)(), int)' + new_api_c 'void *rb_method_node(uintptr_t, unsigned id)' - # create and load a ruby module that allows - # to use a ruby string as the binary code implementing a ruby method - # enable the use of .load_binary_method(class, methodname, string) - def self.load_bootstrap - c_source = <ptr; - mprotect(raw & 0xfffff000, ((raw+RString(rawcode)->len+0xfff) & 0xfffff000) - (raw&0xfffff000), 7); // RWX - rb_define_method(klass, RString(methname)->ptr, RString(rawcode)->ptr, FIX2LONG(nparams)); - return Qtrue; -} - -static VALUE memory_read(VALUE self, VALUE addr, VALUE len) -{ - return rb_str_new((char*)rb_num2ulong(addr), (int)rb_num2ulong(len)); -} - -static VALUE memory_write(VALUE self, VALUE addr, VALUE val) -{ - char *src = RString(val)->ptr; - char *dst = (char*)rb_num2ulong(addr); - int len = RString(val)->len; - while (len--) - *dst++ = *src++; - return val; -} - -static VALUE memory_read_int(VALUE self, VALUE addr) -{ - return rb_uint2inum(*(unsigned long*)rb_num2ulong(addr)); -} - -static VALUE memory_write_int(VALUE self, VALUE addr, VALUE val) -{ - *(unsigned long*)rb_num2ulong(addr) = rb_num2ulong(val); - return val; -} - -extern void *dlsym(int handle, char *symname); -#define RTLD_DEFAULT 0 -asm .global dlsym undef; - -static VALUE dl_dlsym(VALUE self, VALUE symname) -{ - return rb_uint2inum((unsigned)dlsym(RTLD_DEFAULT, RString(symname)->ptr)); -} - -static VALUE get_method_node_ptr(VALUE self, VALUE klass, VALUE id) -{ - return rb_uint2inum((unsigned)rb_method_node(klass, rb_to_id(id))); -} - -static VALUE id2ref(VALUE self, VALUE id) -{ - return rb_num2ulong(id); -} - -int Init_metasm_binload(void) -{ - VALUE metasm = rb_const_get(rb_cObject, rb_intern("Metasm")); - VALUE rubyhack = rb_const_get(metasm, rb_intern("RubyHack")); - rb_define_singleton_method(rubyhack, "set_class_method_raw", set_class_method_raw, 4); - rb_define_singleton_method(rubyhack, "memory_read", memory_read, 2); - rb_define_singleton_method(rubyhack, "memory_write", memory_write, 2); - rb_define_singleton_method(rubyhack, "memory_read_int", memory_read_int, 1); - rb_define_singleton_method(rubyhack, "memory_write_int", memory_write_int, 2); - rb_define_singleton_method(rubyhack, "get_method_node_ptr", get_method_node_ptr, 2); - rb_define_singleton_method(rubyhack, "dlsym", dl_dlsym, 1); - rb_define_singleton_method(rubyhack, "id2ref", id2ref, 1); - return 0; -} -asm .global Init_metasm_binload; - -asm .soname "metasm_binload"; -asm .nointerp; -asm .pt_gnu_stack rw; -EOS - - `mkdir -p #{CACHEDIR}` if not File.directory? CACHEDIR - stat = File.stat(__FILE__) # may be relative, do it before chdir - Dir.chdir(CACHEDIR) { - if not File.exist? 'metasm_binload.so' or File.stat('metasm_binload.so').mtime < stat.mtime - compile_c(c_source, ELF).encode_file('metasm_binload.so') - end - require 'metasm_binload' - } - # TODO Windows support - # TODO PaX support (write + mmap, in user-configurable dir?) +class << self + def set_class_method_raw(klass, meth, code, nparams) + memory_perm(str_ptr(code), code.length, 'rwx') + rb_define_method(rb_obj_to_value(klass), meth, code, nparams) end - def self.cpu - # TODO check runtime environment etc - @cpu ||= Ia32.new + def get_method_node_ptr(klass, meth) + raise "#{klass.inspect} is not a class" if not klass.kind_of? Module + rb_method_node(rb_obj_to_value(klass), meth.to_sym.to_i) end - def self.compile_c(c_src, exeformat=Shellcode) - exeformat.compile_c(cpu, c_src) - end - - load_bootstrap - # sets up rawopcodes as the method implementation for class klass # rawopcodes must implement the expected ABI or things will break horribly # this method is VERY UNSAFE, and breaks everything put in place by the ruby interpreter @@ -223,11 +71,12 @@ EOS # -2 self, arg_ary # -1 argc, VALUE*argv, self # >=0 self, arg0, arg1.. - def self.set_method_binary(klass, methodname, raw, nargs=-2) + def set_method_binary(klass, methodname, raw, nargs=nil) + nargs ||= klass.instance_method(methodname).arity if raw.kind_of? EncodedData - baseaddr = memory_read_int((raw.data.object_id << 1) + 12) + baseaddr = str_ptr(raw.data) bd = raw.binding(baseaddr) - raw.reloc_externals.uniq.each { |ext| bd[ext] = dlsym(ext) or raise "unknown symbol #{ext}" } + raw.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(0, ext) or raise "unknown symbol #{ext}" } raw.fixup(bd) raw = raw.data end @@ -236,17 +85,21 @@ EOS end # same as load_binary_method but with an object and not a class - def self.set_object_method_binary(obj, *a) + def set_singleton_method_binary(obj, *a) set_method_binary((class << obj ; self ; end), *a) end - def self.object_pointer(obj) - (obj.object_id << 1) & 0xffffffff + def read_method_ast(klass, meth) + read_node get_method_node_ptr(klass, meth) end - def self.read_node(ptr, cur=nil) - return if ptr == 0 + def read_singleton_method_ast(klass, meth) + klass = (class << klass ; self ; end) + read_method_ast(klass, meth) + end + def read_node(ptr, cur=nil) + return if ptr == 0 or ptr == 4 type = NODETYPE[(memory_read_int(ptr) >> 11) & 0xff] v1 = memory_read_int(ptr+8) @@ -266,40 +119,51 @@ EOS when :if [type, read_node(v1), read_node(v2), read_node(v3)] when :cfunc + v2 = {0xffffffff => -1, 0xfffffffe => -2, 0xffffffffffffffff => -1, 0xfffffffffffffffe => -2}[v2] || v2 [type, {:fptr => v1, # c func pointer :arity => v2}] when :scope - [type, {:localnr => memory_read_int(v1), # nr of local vars (+2 for $_/$~) - :cref => v2}, # node, starting point for const resolution + [type, {:localnr => (v1 != 0 && v1 != 4 ? memory_read_int(v1) : 0), # nr of local vars (+2 for $_/$~) + :cref => read_node(v2)[1..-1]}, # node, starting point for const/@@var resolution read_node(v3)] + when :cref + cur = nil if cur and cur[0] != type + cur ||= [type] + cur << rb_value_to_obj(v1) if v1 != 0 and v1 != 4 + n = read_node(v3, cur) + raise "block->next = #{n.inspect}" if n and n[0] != type + cur when :call, :fcall, :vcall - # TODO check fcall/vcall - ret = [type, read_node(v1), v2.id2name] + [type, read_node(v1), v2.id2name, read_node(v3)] + when :dstr + ret = [type, [:str, rb_value_to_obj(v1)]] if args = read_node(v3) raise "#{ret.inspect} with args != array: #{args.inspect}" if args[0] != :array ret.concat args[1..-1] end ret when :zarray - [:array, []] + [:array] when :lasgn [type, v3, read_node(v2)] when :iasgn, :dasgn, :dasgn_curr, :gasgn, :cvasgn [type, v1.id2name, read_node(v2)] when :masgn - [type, read_node(v1), read_node(v2)] # multiple assignment: a, b = 42 / lambda { |x, y| }.call(1, 2) + # multiple assignment: a, b = 42 / lambda { |x, y| }.call(1, 2) + # v3 = remainder storage (a, b, *c = ary => v3=c) + [type, read_node(v1), read_node(v2), read_node(v3)] when :attrasgn [type, ((v1 == 1) ? :self : read_node(v1)), v2.id2name, read_node(v3)] when :lvar [type, v3] - when :ivar, :dvar, :gvar, :cvar, :const + when :ivar, :dvar, :gvar, :cvar, :const, :attrset [type, v1.id2name] when :str # cannot use _id2ref here, probably the parser does not use standard alloced objects s = memory_read(memory_read_int(v1+12), memory_read_int(v1+16)) [type, s] when :lit - [type, id2ref(v1)] + [type, rb_value_to_obj(v1)] when :args # specialcased by rb_call0, invalid in rb_eval cnt = v3 # nr of required args, copied directly to local_vars opt = read_node(v1) # :block to execute for each missing arg / with N optargs specified, skip N 1st statements @@ -313,7 +177,14 @@ EOS [type] when :redo, :retry [type] - when :case, :when + when :case + # [:case, var_test, [:when, cnd, action, [:when, cnd2, action2, else]]] + # => [:case, var_test, [:when, cnd, action], [:when, cnd2, action], else] + cs = [type, read_node(v1), read_node(v2)] + cs << cs[-1].pop while cs[-1][0] == :when and cs[-1][3] + cs + when :when + # [:when, [:array, [test]], then, else] [type, read_node(v1), read_node(v2), read_node(v3)] when :iter # save a block for the following funcall @@ -321,161 +192,1560 @@ EOS body = read_node(v2) # the body statements (multi -> :block) subj = read_node(v3) # the stuff which is passed the block, probably a :call [type, args, body, subj] - when :while + when :while, :until [type, read_node(v1), read_node(v2), v3] - when :return, :break, :next + when :return, :break, :next, :defined [type, read_node(v1)] + when :to_ary + [type, read_node(v1)] + when :colon2 + [type, read_node(v1), v2.id2name] when :colon3 # ::Stuff [type, v2.id2name] + when :method + [type, v1, read_node(v2), v3] + when :alias + [type, v1, v2, v3] # ? + when :evstr + [type, read_node(v2)] + when :dot2, :dot3 + [type, read_node(v1), read_node(v2)] + when :splat + [type, read_node(v1)] + when :argscat + [type, read_node(v1), read_node(v2), v3] + when :block_pass + # [args, block, receiver]: foo(bar, &baz) => [:bpass, [:array, bar], [:lvar, baz], [:call, 'foo', bar]] (args in v1&v3!) + [type, read_node(v1), read_node(v2), read_node(v3)] + when :block_arg + [type, v1.id2name, v2, v3] + when :ensure + [type, read_node(v1), v2, read_node(v3)] else puts "unhandled #{type.inspect}" [type, v1, v2, v3] end end - - def self.[](a, l=nil) - if a.kind_of? Range - memory_read(a.begin, a.end-a.begin+(a.exclude_end? ? 0 : 1)) - elsif l - memory_read(a, l) - else - memory_read_int(a) - end - end - - def self.[]=(a, l, v=nil) - l, v = v, l if not v - if a.kind_of? Range - memory_write(a.begin, v) - elsif l - memory_write(a, v) - else - memory_write_int(a, v) - end - end - - def self.compile_ruby(klass, meth) - ptr = get_method_node_ptr(klass, meth) - ast = read_node(ptr) - require 'pp' - pp ast - return if not c = ruby_ast_to_c(ast) - puts c - raw = compile_c(c).encoded - set_method_binary(klass, meth, raw, klass.instance_method(meth).arity) - end - - def self.ruby_ast_to_c(ast) - return if ast[0] != :scope - cp = cpu.new_cparser - cp.parse RUBY_H - cp.parse 'void meth(VALUE self) { }' - cp.toplevel.symbol['meth'].type.type = cp.toplevel.symbol['VALUE'] - scope = cp.toplevel.symbol['meth'].initializer - RubyCompiler.new(cp).compile(ast, scope) - cp.dump_definition('meth') - end +end # class << self end -class RubyCompiler - def initialize(cp) - @cp = cp +# a ruby2c C generator for use in the current ruby interpreter +# generates C suitable for shellcode compilation & insertion in the current interpreter +# has hardcoded addresses etc +class RubyLiveCompiler + attr_accessor :cp + + RUBY_H = < 0 and (args[2] or args[3]) + compile_args_m2(func, args) + else + # fixed arity = args[1]: VALUE func(VALUE self, VALUE local_2, VALUE local_3) + args[1].times { |i| + v = C::Variable.new("local_#{i+2}", value) + @scope.symbol[v.name] = v + func.type.args << v + } + end + end + + # update func prototype to reflect arity -1 + # VALUE func(int argc, VALUE *argv, VALUE self) + def compile_args_m1(func, args) + c = C::Variable.new("arg_c", C::BaseType.new(:int, :unsigned)) + v = C::Variable.new("arg_v", C::Pointer.new(value)) + @scope.symbol[c.name] = c + @scope.symbol[v.name] = v + func.type.args.unshift v + func.type.args.unshift c + + args[1].times { |i| + local(i+2, C::CExpression[v, :'[]', [i]]) + } + + if args[2] + # [:block, [:lasgn, 2, [:lit, 4]]] + raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block + args[2][1..-1].each_with_index { |a, i| + raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn + cnd = C::CExpression[c, :>, i] + thn = C::CExpression[local(a[1], :none), :'=', [v, :'[]', [i]]] + els = C::Block.new(@scope) + ast_to_c(a, els, false) + @scope.statements << C::If.new(cnd, thn, els) + } + end + + if args[3] + raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn + skiplen = args[1] + args[2].length - 1 + alloc = fcall('rb_ary_new4', [c, :-, [skiplen]], [v, :+, [skiplen]]) + local(args[3][1], C::CExpression[[c, :>, skiplen], :'?:', [alloc, fcall('rb_ary_new')]]) + end + end + + # update func prototype to reflect arity -2 + # VALUE func(VALUE self, VALUE arg_array) + def compile_args_m2(func, args) + v = C::Variable.new("arglist", value) + @scope.symbol[v.name] = v + func.type.args << v + + args[1].times { |i| + local(i+2, fcall('rb_ary_shift', v)) + } + + # populate arguments with default values + if args[2] + # [:block, [:lasgn, 2, [:lit, 4]]] + raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block + args[2][1..-1].each { |a| + raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn + t = C::CExpression[local(a[1], :none), :'=', fcall('rb_ary_shift', v)] + e = C::Block.new(@scope) + ast_to_c([:lasgn, a[1], a[2]], e, false) + @scope.statements << C::If.new(rb_ary_len(v), t, e) + } + end + + if args[3] + raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn + local(args[3][1], C::CExpression[v]) + end + end + + # compile a case/when + # create a real C switch() for Fixnums, and put the others === in the default case + # XXX will get the wrong order for "case x; when 1; when Fixnum; when 3;" ... + def compile_case(ast, scope, want_value) + # this generates + # var = stuff_to_test() + # if (var & 1) + # switch (var >> 1) { + # case 12: + # stuff(); + # break; + # default: + # goto default_case; + # } + # else + # default_case: + # if (var == true.object_id || rb_test(rb_funcall(bla, '===', var))) + # foo(); + # else { + # default(); + # } + # + if want_value == true + ret = get_new_tmp_var('case', want_value) + want_value = ret + elsif want_value + ret = want_value + end + + var = ast_to_c(ast[1], scope, want_value || true) + if not var.kind_of? C::Variable + ret ||= get_new_tmp_var('case', want_value) + scope.statements << C::CExpression[ret, :'=', var] + var = ret + end + + # the scope to put all case int in + body_int = C::Block.new(scope) + # the scope to put the if (cs === var) cascade + body_other_head = body_other = nil + default = nil + + ast[2..-1].each { |cs| + if cs[0] == :when + raise Fail if cs[1][0] != :array + + # numeric case, add a case to body_int + if cs[1][1..-1].all? { |cd| cd[0] == :lit and (cd[1].kind_of? Fixnum or cd[1].kind_of? Range) } + cs[1][1..-1].each { |cd| + if cd[1].kind_of? Range + b = cd[1].begin + e = cd[1].end + e -= 1 if cd[1].exclude_end? + raise Fail unless b.kind_of? Integer and e.kind_of? Integer + body_int.statements << C::Case.new(b, e, nil) + else + body_int.statements << C::Case.new(cd[1], nil, nil) + end + } + cb = C::Block.new(scope) + v = ast_to_c(cs[2], cb, want_value) + cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret + cb.statements << C::Break.new + body_int.statements << cb + + # non-numeric (or mixed) case, add if ( cs === var ) + else + cnd = nil + cs[1][1..-1].each { |cd| + if (cd[0] == :lit and (cd[1].kind_of?(Fixnum) or cd[1].kind_of?(Symbol))) or + [:nil, :true, :false].include?(cd[0]) + # true C equality + cd = C::CExpression[var, :==, ast_to_c(cd, scope)] + else + # own block for ast_to_c to honor lazy evaluation + tb = C::Block.new(scope) + test = rb_test(rb_funcall(ast_to_c(cd, tb), '===', var), tb) + # discard own block unless needed + if tb.statements.empty? + cd = test + else + tb.statements << test + cd = C::CExpression[tb, value] + end + end + cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd) + } + cb = C::Block.new(scope) + v = ast_to_c(cs[2], cb, want_value) + cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret + + fu = C::If.new(cnd, cb, nil) + + if body_other + body_other.belse = fu + else + body_other_head = fu + end + body_other = fu + end + + # default case statement + else + cb = C::Block.new(scope) + v = ast_to_c(cs, cb, want_value) + cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret + default = cb + end + } + + # if we use the value of the case, we must add an 'else: nil' + if want_value and not default + default = C::Block.new(scope) + default.statements << C::CExpression[ret, :'=', rb_nil] + end + + # assemble everything + scope.statements << + if body_int.statements.empty? + if body_other + body_other.belse = default + body_other_head + else + raise Fail, "empty case? #{ast.inspect}" if not default + default + end + else + if body_other_head + @default_label_cnt ||= 0 + dfl = "default_label_#{@default_label_cnt += 1}" + body_other_head = C::Label.new(dfl, body_other_head) + body_int.statements << C::Case.new('default', nil, C::Goto.new(dfl)) + body_other.belse = default if default + end + body_int = C::Switch.new(C::CExpression[var, :>>, 1], body_int) + C::If.new(C::CExpression[var, :&, 1], body_int, body_other_head) + end + + ret + end + + # create a C::CExpr[toplevel.symbol[name], :funcall, args] + # casts int/strings in arglist to CExpr + def fcall(fname, *arglist) + args = arglist.map { |a| (a.kind_of?(Integer) or a.kind_of?(String)) ? [a] : a } + fv = @cp.toplevel.symbol[fname] + raise "need prototype for #{fname}!" if not fv + C::CExpression[fv, :funcall, args] + end + + # the VALUE typedef def value @cp.toplevel.symbol['VALUE'] end - def local(n) - @scope.symbol["local_#{n}"] + # declare a new function variable + # no initializer if init == :none + def declare_newvar(name, initializer) + v = C::Variable.new(name, value) + v.initializer = initializer if initializer != :none + @scope.symbol[v.name] = v + @scope.statements << C::Declaration.new(v) + v end + # return a string suitable for use as a variable name + # hexencode any char not in [A-z0-9_] + def escape_varname(n) + n.gsub(/[^\w]/) { |c| c.unpack('H*')[0] } + end + + # retrieve or create a local var + # pass :none to avoid initializer + def get_var(name, initializer=:none) + name = escape_varname(name) + @scope.symbol[name] ||= declare_newvar(name, initializer || rb_nil) + end + + # create a new temporary variable + # XXX put_var ? + def get_new_tmp_var(base=nil, var=nil) + return var if var.kind_of? C::Variable + @tmp_var_id ||= 0 + get_var("tmp_#{"#{base}_" if base}#{@tmp_var_id += 1}") + end + + # retrieve/create a new local variable with optionnal initializer + def local(n, init=nil) + get_var "local_#{n}", init + end + + # retrieve/create a new dynamic variable (block argument/variable) + # pass :none to avoid initializer + def dvar(n, init=nil) + get_var "dvar_#{n}", init + end + + # retrieve self (1st func arg) + def rb_self + @scope.symbol['self'] + end + + # returns a CExpr casting expr to a VALUE* + def rb_cast_pvalue(expr, idx) + C::CExpression[[[expr], C::Pointer.new(value)], :'[]', [idx]] + end + + # retrieve the current class, from self->klass + # XXX will segfault with self.kind_of? Fixnum/true/false/nil/sym + def rb_selfclass + rb_cast_pvalue(rb_self, 1) + end + + def rb_nil + C::CExpression[[nil.object_id], value] + end + def rb_true + C::CExpression[[true.object_id], value] + end + def rb_false + C::CExpression[[false.object_id], value] + end + + # call rb_intern on a string def rb_intern(n) - C::CExpression[@cp.toplevel.symbol['rb_intern'], :funcall, [n]] + # use the current interpreter's value + C::CExpression[n.to_sym.to_i] end + # create a rb_funcall construct def rb_funcall(recv, meth, *args) - C::CExpression[@cp.toplevel.symbol['rb_funcall'], :funcall, [recv, rb_intern(meth), [args.length], *args]] + fcall('rb_funcall', recv, rb_intern(meth), args.length, *args) end - def ast_to_c(ast, scope) + # ruby bool test of a var + # assigns to a temporary var, and check against false/nil + def rb_test(expr, scope) + if nil.object_id == 0 or false.object_id == 0 # just to be sure + nf = nil.object_id | false.object_id + C::CExpression[[expr, :|, nf], :'!=', nf] + else + if expr.kind_of? C::Variable + tmp = expr + else + tmp = get_new_tmp_var('test') + scope.statements << C::CExpression[tmp, :'=', expr] + end + C::CExpression[[tmp, :'!=', rb_nil], :'&&', [tmp, :'!=', rb_false]] + end + end + + # generate C code to raise a RuntimeError, reason + def rb_raise(reason, cls='rb_eRuntimeError') + fcall('rb_raise', rb_global(cls), reason) + end + + # return a C expr equivallent to TYPE(expr) == type for non-immediate types + # XXX expr evaluated 3 times + def rb_test_class_type(expr, type) + C::CExpression[[[expr, :>, [7]], :'&&', [[expr, :&, [3]], :==, [0]]], :'&&', [[rb_cast_pvalue(expr, 0), :&, [0x3f]], :'==', [type]]] + end + + # return a C expr equivallent to TYPE(expr) == T_ARRAY + def rb_test_class_ary(expr) + rb_test_class_type(expr, 9) + end + # ARY_PTR(expr) + def rb_ary_ptr(expr, idx=nil) + p = C::CExpression[[rb_cast_pvalue(expr, 4)], C::Pointer.new(value)] + idx ? C::CExpression[p, :'[]', [idx]] : p + end + # ARY_LEN(expr) + def rb_ary_len(expr) + rb_cast_pvalue(expr, 2) + end + + # TYPE(expr) == T_STRING + def rb_test_class_string(expr) + rb_test_class_type(expr, 7) + end + # STR_PTR(expr) + def rb_str_ptr(expr, idx=nil) + p = C::CExpression[[rb_cast_pvalue(expr, 3)], C::Pointer.new(C::BaseType.new(:char))] + idx ? C::CExpression[p, :'[]', [idx]] : p + end + # STR_LEN(expr) + def rb_str_len(expr) + rb_cast_pvalue(expr, 2) + end + + def rb_test_class_hash(expr) + rb_test_class_type(expr, 0xb) + end + + # returns a static pointer to the constant + def rb_const(constname, owner = resolve_const_owner(constname)) + raise Fail, "no dynamic constant resolution #{constname}" if not owner + cst = owner.const_get(constname) + C::CExpression[[RubyHack.rb_obj_to_value(cst)], value] + end + + # compile a :masgn + def rb_masgn(ast, scope, want_value) + raise Fail, "masgn with no rhs #{ast.inspect}" if not ast[2] + raise Fail, "masgn with no lhs array #{ast.inspect}" if not ast[1] or ast[1][0] != :array + if not want_value and ast[2][0] == :array and not ast[3] and ast[2].length == ast[1].length + rb_masgn_optimized(ast, scope) + return nil.object_id + end + full = get_new_tmp_var('masgn', want_value) + ary = ast_to_c(ast[2], scope, full) + scope.statements << C::CExpression[full, :'=', ary] if full != ary + ast[1][1..-1].each_with_index { |e, i| + raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if e[-1] != nil + # local_42 = full[i] + e = e.dup + e[-1] = [:rb2cstmt, rb_ary_ptr(full, i)] + ast_to_c(e, scope, false) + } + if ast[3] + raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if ast[3][-1] != nil + # local_28 = full[12..-1].to_a + e = ast[3].dup + e[-1] = [:call, [:call, [:rb2cvar, full.name], '[]', [:array, [:dot2, [:lit, ast[1].length-1], [:lit, -1]]]], 'to_a'] + ast_to_c(e, scope, false) + end + + full + end + + def rb_global(cname) + @cp.toplevel.symbol[cname] + end + + # compile an optimized :masgn with rhs.length == lhs.length (no need of a ruby array) + def rb_masgn_optimized(ast, scope) + vars = [] + ast[2][1..-1].each { |rhs| + var = get_new_tmp_var('masgn_opt') + vars << var + r = ast_to_c(rhs, scope, var) + scope.statements << C::CExpression[var, :'=', r] if var != r + } + ast[1][1..-1].each { |lhs| + var = vars.shift + lhs = lhs.dup + raise Fail, "weird masgn lhs #{lhs.inspect} in #{ast.inspect}" if lhs[-1] != nil + lhs[-1] = [:rb2cvar, var.name] + ast_to_c(lhs, scope, false) + } + end + + # the recursive AST to C compiler + # may append C statements to scope + # returns the C::CExpr holding the VALUE of the current ruby statement + # want_value is an optionnal hint as to the returned VALUE is needed or not + # if want_value is a C::Variable, the statements should try to populate this var instead of some random tmp var + # eg to simplify :if encoding unless we have 'foo = if 42;..' + def ast_to_c(ast, scope, want_value = true) ret = case ast.to_a[0] when :block - ast[1..-1].map { |a| ast_to_c(a, scope) }.last - when :lasgn - l = local(ast[1]) - scope.statements << C::CExpression[l, :'=', ast_to_c(ast[2], scope)] - l + if ast[1] + ast[1..-2].each { |a| ast_to_c(a, scope, false) } + ast_to_c(ast.last, scope, want_value) + end + when :lvar local(ast[1]) + when :lasgn + if scope == @scope + l = local(ast[1], :none) + else + # w = 4 if false ; p w => should be nil + l = local(ast[1]) + end + st = ast_to_c(ast[2], scope, l) + scope.statements << C::CExpression[l, :'=', st] if st != l + l + when :dvar + dvar(ast[1]) + when :dasgn_curr + l = dvar(ast[1]) + st = ast_to_c(ast[2], scope, l) + scope.statements << C::CExpression[l, :'=', st] if st != l + l + when :ivar + fcall('rb_ivar_get', rb_self, rb_intern(ast[1])) + when :iasgn + if want_value + tmp = get_new_tmp_var("ivar_#{ast[1]}", want_value) + scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)] + scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), tmp) + tmp + else + scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), ast_to_c(ast[2], scope)) + end + when :cvar + fcall('rb_cvar_get', rb_selfclass, rb_intern(ast[1])) + when :cvasgn + if want_value + tmp = get_new_tmp_var("cvar_#{ast[1]}", want_value) + scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)] + scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), tmp, rb_false) + tmp + else + scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), ast_to_c(ast[2], scope), rb_false) + end + when :gvar + fcall('rb_gv_get', ast[1]) + when :gasgn + if want_value + tmp = get_new_tmp_var("gvar_#{ast[1]}", want_value) + scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)] + scope.statements << fcall('rb_gv_set', ast[1], tmp) + tmp + else + scope.statements << fcall('rb_gv_set', ast[1], ast_to_c(ast[2], scope)) + end + when :attrasgn # foo.bar= 42 (same as :call, except for return value) + recv = ast_to_c(ast[1], scope) + raise Fail, "unsupported #{ast.inspect}" if not ast[3] or ast[3][0] != :array or ast[3].length != 2 + arg = ast_to_c(ast[3][1], scope) + if want_value + tmp = get_new_tmp_var('call', want_value) + scope.statements << C::CExpression[tmp, :'=', arg] + end + scope.statements << rb_funcall(recv, ast[2], arg) + tmp + + when :rb2cvar # hax, used in vararg parsing + get_var(ast[1]) + when :rb2cstmt + ast[1] + + when :block_arg + local(ast[3], fcall('rb_block_proc')) + when :lit case ast[1] when Symbol - rb_intern(ast[1]) + # XXX ID2SYM + C::CExpression[[rb_intern(ast[1].to_s), :<<, 8], :|, 0xe] + when Range + fcall('rb_range_new', ast[1].begin.object_id, ast[1].end.object_id, ast[1].exclude_end? ? 0 : 1) else # true/false/nil/fixnum ast[1].object_id end + when :self + rb_self when :str - C::CExpression[@cp.toplevel.symbol['rb_str_new'], :funcall, [ast[1], [ast[1].length]]] - when :iter - b_args, b_body, b_recv = ast[1, 3] - if b_recv[0] == :call and b_recv[2] == 'times' # TODO check its Fixnum#times - recv = ast_to_c(b_recv[1], scope) - cntr = C::Variable.new("cntr", C::BaseType.new(:int)) # TODO uniq name etc - cntr.initializer = C::CExpression[[0]] - init = C::Block.new(scope) - init.symbol[cntr.name] = cntr - body = C::Block.new(init) - scope.statements << C::For.new(init, C::CExpression[cntr, :<, [recv, :>>, 1]], C::CExpression[:'++', cntr], body) - body.symbol[cntr.name] = cntr - ast_to_c(b_body, body) - recv + fcall('rb_str_new2', ast[1]) + when :array + tmp = get_new_tmp_var('ary', want_value) + scope.statements << C::CExpression[tmp, :'=', fcall('rb_ary_new')] + ast[1..-1].each { |e| + scope.statements << fcall('rb_ary_push', tmp, ast_to_c(e, scope)) + } + tmp + when :hash + raise Fail, "bad #{ast.inspect}" if ast[1][0] != :array + tmp = get_new_tmp_var('hash', want_value) + scope.statements << C::CExpression[tmp, :'=', fcall('rb_hash_new')] + ki = nil + ast[1][1..-1].each { |k| + if not ki + ki = k else - puts "unsupported #{ast.inspect}" - nil.object_id + scope.statements << fcall('rb_hash_aset', tmp, ast_to_c(ki, scope), ast_to_c(k, scope)) + ki = nil end - when :call - f = rb_funcall(ast_to_c(ast[1], scope), ast[2], *ast[3..-1].map { |a| ast_to_c(a, scope) }) - case ast[2] - when '+', '-' - tmp = C::Variable.new('tmp', value) - if not scope.symbol_ancestors['tmp'] - scope.symbol['tmp'] = tmp - scope.statements << C::Declaration.new(tmp) + } + tmp + + when :iter + if v = optimize_iter(ast, scope, want_value) + return v end - a1 = [ast_to_c(ast[1], scope), C::BaseType.new(:int)] - a3 = [ast_to_c(ast[3], scope), C::BaseType.new(:int)] - scope.statements << - C::If.new(C::CExpression[[a1, :&, a3], :&, 1], # XXX overflow to Bignum - C::CExpression[tmp, :'=', [a1, ast[2].to_sym, [a3, :-, [1]]]], - C::CExpression[tmp, :'=', f]) + # for full support of :iter, we need access to the interpreter's ruby_block private global variable in eval.c + # we can find it by analysing rb_block_given_p, but this won't work with a static precompiled rubyhack... + # even with access to ruby_block, there we would need to redo PUSH_BLOCK, create a temporary dvar list, + # handle [:break, lol], and do all the stack magic reused in rb_yield (probably incl setjmp etc) + raise Fail, "unsupported iter #{ast[3].inspect} { | #{ast[1].inspect} | #{ast[2].inspect} }" + + when :call, :vcall, :fcall + if v = optimize_call(ast, scope, want_value) + return v + end + recv = ((ast[0] == :call) ? ast_to_c(ast[1], scope) : rb_self) + if not ast[3] + f = rb_funcall(recv, ast[2]) + elsif ast[3][0] == :array + args = ast[3][1..-1].map { |a| ast_to_c(a, scope) } + f = rb_funcall(recv, ast[2], *args) + elsif ast[3][0] == :splat + args = ast_to_c(ast[3], scope) + if not args.kind_of? C::Variable + tmp = get_new_tmp_var('args', want_value) + scope.statements << C::CExpression[tmp, :'=', args] + args = tmp + end + f = fcall('rb_funcall3', recv, rb_intern(ast[2]), rb_ary_len(args), rb_ary_ptr(args)) + # elsif ast[3][0] == :argscat + else + raise Fail, "unsupported #{ast.inspect}" + end + if want_value + tmp ||= get_new_tmp_var('call', want_value) + scope.statements << C::CExpression[tmp, :'=', f] tmp else + scope.statements << f f end - when nil, :nil, :args - nil.object_id + + when :if, :when + if ast[0] == :when and ast[1][0] == :array + cnd = nil + ast[1][1..-1].map { |cd| rb_test(ast_to_c(cd, scope), scope) }.each { |cd| + cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd) + } + else + cnd = rb_test(ast_to_c(ast[1], scope), scope) + end + + tbdy = C::Block.new(scope) + ebdy = C::Block.new(scope) if ast[3] or want_value + + if want_value + tmp = get_new_tmp_var('if', want_value) + thn = ast_to_c(ast[2], tbdy, tmp) + tbdy.statements << C::CExpression[tmp, :'=', thn] if tmp != thn + if ast[3] + els = ast_to_c(ast[3], ebdy, tmp) else - puts "unsupported #{ast.inspect}" + # foo = if bar ; baz ; end => nil if !bar + els = rb_nil + end + ebdy.statements << C::CExpression[tmp, :'=', els] if tmp != els + else + ast_to_c(ast[2], tbdy, false) + ast_to_c(ast[3], ebdy, false) + end + + scope.statements << C::If.new(cnd, tbdy, ebdy) + + tmp + + when :while, :until + pib = @iter_break + @iter_break = nil # XXX foo = while ()... + + body = C::Block.new(scope) + if ast[3] == 0 # do .. while(); + ast_to_c(ast[2], body, false) + end + t = nil + e = C::Break.new + t, e = e, t if ast[0] == :until + body.statements << C::If.new(rb_test(ast_to_c(ast[1], body), body), t, e) + if ast[3] != 0 # do .. while(); + ast_to_c(ast[2], body, false) + end + scope.statements << C::For.new(nil, nil, nil, body) + + @iter_break = pib nil.object_id + + when :and, :or, :not + # beware lazy evaluation ! + tmp = get_new_tmp_var('and', want_value) + v1 = ast_to_c(ast[1], scope, tmp) + # and/or need that tmp has the actual v1 value (returned when shortcircuit) + scope.statements << C::CExpression[tmp, :'=', v1] if v1 != tmp + v1 = tmp + case ast[0] + when :and + t = C::Block.new(scope) + v2 = ast_to_c(ast[2], t, tmp) + t.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp + when :or + e = C::Block.new(scope) + v2 = ast_to_c(ast[2], e, tmp) + e.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp + when :not + t = C::CExpression[tmp, :'=', rb_false] + e = C::CExpression[tmp, :'=', rb_true] + end + scope.statements << C::If.new(rb_test(v1, scope), t, e) + tmp + when :return + scope.statements << C::Return.new(ast_to_c(ast[1], scope)) + nil.object_id + when :break + if @iter_break + v = (ast[1] ? ast_to_c(ast[1], scope, @iter_break) : nil.object_id) + scope.statements << C::CExpression[@iter_break, :'=', [[v], value]] if @iter_break != v + end + scope.statements << C::Break.new + nil.object_id + + when nil, :args + nil.object_id + when :nil + rb_nil + when :false + rb_false + when :true + rb_true + when :const + rb_const(ast[1]) + when :colon2 + if cst = check_const(ast[1]) + rb_const(ast[2], cst) + else + fcall('rb_const_get', ast_to_c(ast[1], scope), rb_intern(ast[2])) + end + when :colon3 + rb_const(ast[1], ::Object) + when :defined + case ast[1][0] + when :ivar + fcall('rb_ivar_defined', rb_self, rb_intern(ast[1][1])) + else + raise Fail, "unsupported #{ast.inspect}" + end + when :masgn + # parallel assignment: put everything in an Array, then pop everything back? + rb_masgn(ast, scope, want_value) + + when :evstr + fcall('rb_obj_as_string', ast_to_c(ast[1], scope)) + when :dot2, :dot3 + fcall('rb_range_new', ast_to_c(ast[1], scope), ast_to_c(ast[2], scope), ast[0] == :dot2 ? 0 : 1) + when :splat + fcall('rb_Array', ast_to_c(ast[1], scope)) + when :to_ary + fcall('rb_ary_to_ary', ast_to_c(ast[1], scope)) + when :dstr + # dynamic string: "foo#{bar}baz" + tmp = get_new_tmp_var('dstr') + scope.statements << C::CExpression[tmp, :'=', fcall('rb_str_new2', ast[1][1])] + ast[2..-1].compact.each { |s| + if s[0] == :str # directly append the char* + scope.statements << fcall('rb_str_cat2', tmp, s[1]) + else + scope.statements << fcall('rb_str_append', tmp, ast_to_c(s, scope)) + end + } + tmp + when :case + compile_case(ast, scope, want_value) + when :ensure + # TODO + ret = ast_to_c(ast[1], scope, want_value) + ast_to_c(ast[3], scope, false) + ret + else + raise Fail, "unsupported #{ast.inspect}" + end + + if want_value + ret = C::CExpression[[ret], value] if ret.kind_of? Integer or ret.kind_of? String + ret + end + end + + # optional optimization of a call (eg a == 1, c+2, ...) + # return nil for normal rb_funcall, or a C::CExpr to use as retval. + def optimize_call(ast, scope, want_value) + ce = C::CExpression + op = ast[2] + int = C::BaseType.new(:ptr) # signed VALUE + args = ast[3][1..-1] if ast[3] and ast[3][0] == :array + arg0 = args[0] if args and args[0] + + if arg0 and arg0[0] == :lit and arg0[1].kind_of? Fixnum + # optimize 'x==42', 'x+42', 'x-42' + o2 = arg0[1] + return if not %w[== > < >= <= + -].include? op + if o2 < 0 and ['+', '-'].include? op + # need o2 >= 0 for overflow detection + op = {'+' => '-', '-' => '+'}[op] + o2 = -o2 + return if not o2.kind_of? Fixnum # -0x40000000 + end + + int_v = o2.object_id + recv = ast_to_c(ast[1], scope) + tmp = get_new_tmp_var('opt', want_value) + if not recv.kind_of? C::Variable + scope.statements << ce[tmp, :'=', recv] + recv = tmp + end + + case op + when '==' + # XXX assume == only return true for full equality: if not Fixnum, then always false + # which breaks 1.0 == 1 and maybe others, but its ok + scope.statements << C::If.new(ce[recv, :'==', [int_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false]) + when '>', '<', '>=', '<=' + # do the actual comparison on signed >>1 if both Fixnum + t = C::If.new( + ce[[[[recv], int], :>>, [1]], op.to_sym, [[[int_v], int], :>>, [1]]], + ce[tmp, :'=', rb_true], + ce[tmp, :'=', rb_false]) + # fallback to actual rb_funcall + e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)] + scope.statements << C::If.new(ce[recv, :&, 1], t, e) + when '+' + e = ce[recv, :+, [int_v-1]] # overflow to Bignum ? + cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :<, [[e], int]]] + t = ce[tmp, :'=', e] + e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)] + scope.statements << C::If.new(cnd, t, e) + when '-' + e = ce[recv, :-, [int_v-1]] + cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :>, [[e], int]]] + t = ce[tmp, :'=', e] + e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)] + scope.statements << C::If.new(cnd, t, e) + end + tmp + + # Symbol#== + elsif arg0 and arg0[0] == :lit and arg0[1].kind_of? Symbol and op == '==' + s_v = ast_to_c(arg0, scope) + tmp = get_new_tmp_var('opt', want_value) + recv = ast_to_c(ast[1], scope, tmp) + if not recv.kind_of? C::Variable + scope.statements << ce[tmp, :'=', recv] + recv = tmp + end + + scope.statements << C::If.new(ce[recv, :'==', [s_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false]) + tmp + + elsif arg0 and op == '<<' + tmp = get_new_tmp_var('opt', want_value) + recv = ast_to_c(ast[1], scope, tmp) + arg = ast_to_c(arg0, scope) + if recv != tmp + scope.statements << ce[tmp, :'=', recv] + recv = tmp + end + + ar = fcall('rb_ary_push', recv, arg) + st = fcall('rb_str_concat', recv, arg) + oth = rb_funcall(recv, op, arg) + oth = ce[tmp, :'=', oth] if want_value + scope.statements << C::If.new(rb_test_class_ary(recv), ar, + C::If.new(rb_test_class_string(recv), st, oth)) + tmp + + elsif arg0 and args.length == 1 and op == '[]' + return if ast[1][0] == :const # Expression[42] + tmp = get_new_tmp_var('opt', want_value) + recv = ast_to_c(ast[1], scope, tmp) + if not recv.kind_of? C::Variable + scope.statements << ce[tmp, :'=', recv] + recv = tmp + end + + idx = get_new_tmp_var('idx') + arg = ast_to_c(arg0, scope, idx) + if not arg.kind_of? C::Variable + scope.statements << ce[idx, :'=', arg] + arg = idx + end + idx = ce[[idx], int] + + ar = C::Block.new(scope) + ar.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]] + ar.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_ary_len(recv)]], nil) + ar.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_ary_len(recv)], int]]], + ce[tmp, :'=', rb_nil], + ce[tmp, :'=', rb_ary_ptr(recv, idx)]) + st = C::Block.new(scope) + st.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]] + st.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_str_len(recv)]], nil) + st.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_str_len(recv)], int]]], + ce[tmp, :'=', rb_nil], + ce[tmp, :'=', [[[[rb_str_ptr(recv, idx), :&, [0xff]], :<<, [1]], :|, [1]], value]]) + hsh = ce[tmp, :'=', fcall('rb_hash_aref', recv, arg)] + oth = ce[tmp, :'=', rb_funcall(recv, op, arg)] + scope.statements << C::If.new(rb_test_class_hash(recv), hsh, + C::If.new(ce[[arg, :&, 1], :'&&', rb_test_class_ary(recv)], ar, + C::If.new(ce[[arg, :&, 1], :'&&', rb_test_class_string(recv)], st, oth))) + tmp + + elsif ast[1] and not arg0 and op == 'empty?' + tmp = get_new_tmp_var('opt', want_value) + recv = ast_to_c(ast[1], scope, tmp) + if not recv.kind_of? C::Variable + scope.statements << ce[tmp, :'=', recv] + recv = tmp + end + + scope.statements << C::If.new(rb_test_class_ary(recv), + C::If.new(rb_ary_len(recv), + ce[tmp, :'=', rb_false], + ce[tmp, :'=', rb_true]), + ce[tmp, :'=', rb_funcall(recv, op)]) + tmp + + elsif ast[1] and not arg0 and op == 'pop' + tmp = get_new_tmp_var('opt', want_value) + recv = ast_to_c(ast[1], scope, tmp) + if not recv.kind_of? C::Variable + scope.statements << ce[tmp, :'=', recv] + recv = tmp + end + + t = fcall('rb_ary_pop', recv) + e = rb_funcall(recv, op) + if want_value + t = ce[tmp, :'=', t] + e = ce[tmp, :'=', e] + end + scope.statements << C::If.new(rb_test_class_ary(recv), t, e) + + tmp + + elsif ast[1] and op == 'kind_of?' and arg0 and (arg0[0] == :const or arg0[0] == :colon3) + # TODO check const maps to toplevel when :const + test = + case arg0[1] + when 'Symbol' + tmp = get_new_tmp_var('kindof', want_value) + ce[[ast_to_c(ast[1], scope, tmp), :'&', [0xf]], :'==', [0xe]] + #when 'Numeric', 'Integer' + when 'Fixnum' + tmp = get_new_tmp_var('kindof', want_value) + ce[ast_to_c(ast[1], scope, tmp), :'&', [0x1]] + when 'Array' + rb_test_class_ary(ast_to_c(ast[1], scope)) + when 'String' + rb_test_class_string(ast_to_c(ast[1], scope)) + else return + end +puts "shortcut may be incorrect for #{ast.inspect}" if arg0[0] == :const + tmp ||= get_new_tmp_var('kindof', want_value) + scope.statements << C::If.new(test, ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false]) + tmp + + elsif not ast[1] or ast[1] == [:self] + optimize_call_static(ast, scope, want_value) + end + end + + # return ptr, arity + # ptr is a CExpr pointing to the C func implementing klass#method + def get_cfuncptr(klass, method, singleton=false) + cls = singleton ? (class << klass ; self ; end) : klass + ptr = RubyHack.get_method_node_ptr(cls, method) + return if ptr == 0 + ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff] + return if ftype != :cfunc + fast = RubyHack.read_node(ptr) + arity = fast[1][:arity] + fptr = fast[1][:fptr] + + fproto = C::Function.new(value, []) + case arity + when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value) + when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value) + else (arity+1).times { fproto.args << C::Variable.new(nil, value) } + end + + C::CExpression[[fptr], C::Pointer.new(fproto)] + end + + # call C funcs directly + # assume private function calls are not virtual and hardlink them here + def optimize_call_static(ast, scope, want_value) + arity = method_arity(ast[2]) rescue return + if ast[2].to_s == @meth.to_s + # self is recursive + fptr = @cur_cfunc + else + fptr = get_cfuncptr(@klass, ast[2], @meth_singleton) + return if not fptr + end + + c_arglist = [] + + if not ast[3] + args = [] + elsif ast[3][0] == :array + args = ast[3][1..-1] + elsif ast[3][0] == :splat + args = ast_to_c(ast[3], scope) + if arity != -2 and !args.kind_of?(C::Variable) + tmp = get_new_tmp_var('arg') + scope.statements << C::CExpression[tmp, :'=', args] + args = tmp + end + case arity + when -2 + c_arglist << rb_self << args + when -1 + c_arglist << [rb_ary_len(args)] << rb_ary_ptr(args) << rb_self + else + cnd = C::CExpression[rb_ary_len(args), :'!=', [arity]] + scope.statements << C::If.new(cnd, rb_raise("#{arity} args expected", 'rb_eArgumentError'), nil) + + c_arglist << rb_self + arity.times { |i| c_arglist << rb_ary_ptr(args, i) } + end + arity = :canttouchthis + else return # TODO + end + + case arity + when :canttouchthis + when -2 + arg = get_new_tmp_var('arg') + scope.statements << C::CExpression[arg, :'=', fcall('rb_ary_new')] + args.each { |a| + scope.statements << fcall('rb_ary_push', arg, ast_to_c(a, scope)) + } + c_arglist << rb_self << arg + + when -1 + case args.length + when 0 + argv = C::CExpression[[0], C::Pointer.new(value)] + when 1 + val = ast_to_c(args[0], scope) + if not val.kind_of? C::Variable + argv = get_new_tmp_var('argv') + scope.statements << C::CExpression[argv, :'=', val] + val = argv + end + argv = C::CExpression[:'&', val] + else + argv = get_new_tmp_var('argv') + argv.type = C::Array.new(value, args.length) + args.each_with_index { |a, i| + val = ast_to_c(a, scope) + scope.statements << C::CExpression[[argv, :'[]', [i]], :'=', val] + } + end + c_arglist << [args.length] << argv << rb_self + + else + c_arglist << rb_self + args.each { |a| + va = get_new_tmp_var('arg') + val = ast_to_c(a, scope, va) + scope.statements << C::CExpression[va, :'=', val] if val != va + c_arglist << va + } + end + + f = C::CExpression[fptr, :funcall, c_arglist] + if want_value + ret = get_new_tmp_var('ccall', want_value) + scope.statements << C::CExpression[ret, :'=', f] + ret + else + scope.statements << f + end + end + + def optimize_iter(ast, scope, want_value) + b_args, b_body, b_recv = ast[1, 3] + + old_ib = @iter_break + if want_value + # a new tmpvar, so we can overwrite it in 'break :foo' + @iter_break = get_new_tmp_var('iterbreak') + else + @iter_break = nil + end + + if b_recv[0] == :call and b_recv[2] == 'reverse_each' + # convert ary.reverse_each to ary.reverse.each + b_recv = b_recv.dup + b_recv[1] = [:call, b_recv[1], 'reverse'] + b_recv[2] = 'each' + elsif b_recv[0] == :call and b_recv[2] == 'each_key' + # convert hash.each_key to hash.keys.each + b_recv = b_recv.dup + b_recv[1] = [:call, b_recv[1], 'keys'] + b_recv[2] = 'each' + end + + # loop { } + if b_recv[0] == :fcall and b_recv[2] == 'loop' + body = C::Block.new(scope) + ast_to_c(b_body, body) + scope.statements << C::For.new(nil, nil, nil, body) + + # int.times { |i| } + elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'times' + limit = get_new_tmp_var('limit') + recv = ast_to_c(b_recv[1], scope, limit) + scope.statements << C::If.new(C::CExpression[:'!', [recv, :&, 1]], rb_raise('only Fixnum#times handled'), nil) + if want_value + scope.statements << C::CExpression[@iter_break, :'=', recv] + end + scope.statements << C::CExpression[limit, :'=', [recv, :>>, 1]] + cntr = get_new_tmp_var('cntr') + cntr.type = C::BaseType.new(:int, :unsigned) + body = C::Block.new(scope) + if b_args and b_args[0] == :dasgn_curr + body.statements << C::CExpression[dvar(b_args[1]), :'=', [[cntr, :<<, 1], :|, 1]] + end + ast_to_c(b_body, body) + scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, limit], C::CExpression[:'++', cntr], body) + + # ary.each { |e| } + elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'each' and b_args and + b_args[0] == :dasgn_curr + ary = get_new_tmp_var('ary') + recv = ast_to_c(b_recv[1], scope, ary) + scope.statements << C::CExpression[ary, :'=', recv] if ary != recv + scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#each { |e| } handled')) + if want_value + scope.statements << C::CExpression[@iter_break, :'=', ary] + end + cntr = get_new_tmp_var('cntr') + cntr.type = C::BaseType.new(:int, :unsigned) + body = C::Block.new(scope) + if b_args and b_args[0] == :dasgn_curr + body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]] + end + ast_to_c(b_body, body) + scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body) + + # ary.find { |e| } + elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'find' and b_args and + b_args[0] == :dasgn_curr + ary = get_new_tmp_var('ary') + recv = ast_to_c(b_recv[1], scope, ary) + scope.statements << C::CExpression[ary, :'=', recv] if ary != recv + scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#find { |e| } handled')) + if want_value + scope.statements << C::CExpression[@iter_break, :'=', rb_nil] + end + cntr = get_new_tmp_var('cntr') + cntr.type = C::BaseType.new(:int, :unsigned) + body = C::Block.new(scope) + if b_args and b_args[0] == :dasgn_curr + body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]] + end + # same as #each up to this point (except default retval), now add a 'if (body_value) break ary[cntr];' + # XXX 'find { next true }' + + found = ast_to_c(b_body, body) + t = C::Block.new(body) + t.statements << C::CExpression[@iter_break, :'=', rb_ary_ptr(ary, cntr)] + t.statements << C::Break.new + body.statements << C::If.new(rb_test(found, body), t, nil) + + scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body) + + # ary.map { |e| } + elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'map' and b_args and + b_args[0] == :dasgn_curr + ary = get_new_tmp_var('ary') + recv = ast_to_c(b_recv[1], scope, ary) + scope.statements << C::CExpression[ary, :'=', recv] if ary != recv + scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#map { |e| } handled')) + if want_value + scope.statements << C::CExpression[@iter_break, :'=', fcall('rb_ary_new')] + end + cntr = get_new_tmp_var('cntr') + cntr.type = C::BaseType.new(:int, :unsigned) + body = C::Block.new(scope) + if b_args and b_args[0] == :dasgn_curr + body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]] + end + # same as #each up to this point (except default retval), now add a '@iter_break << body_value' + # XXX 'next' unhandled + + val = ast_to_c(b_body, body) + body.statements << fcall('rb_ary_push', @iter_break, val) + + scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body) + + else + @iter_break = old_ib + return + end + + ret = @iter_break + @iter_break = old_ib + ret || nil.object_id + end +end + +# a ruby2c C generator for use in the any ruby interpreter (generates C suitable for use as a standard Ruby extension) +class RubyStaticCompiler < RubyLiveCompiler + # add a new ruby function to the current @cp + def self.compile(klass, *methlist) + @rcp ||= new + methlist.each { |meth| + ast = RubyHack.read_method_ast(klass, meth) + @rcp.compile(ast, klass, meth) + } + self + end + + def self.compile_singleton(klass, *methlist) + @rcp ||= new + methlist.each { |meth| + ast = RubyHack.read_singleton_method_ast(klass, meth) + @rcp.compile(ast, klass, meth, true) + } + self + end + + def self.dump + < + # const_Bar = rb_const_get(rb_cObject, rb_intern("Bar")); + # const_Bar_Baz = rb_const_get(const_Bar, rb_intern("Baz")); + # const_Bar_Baz_FOO = rb_const_get(const_Bar_Baz, rb_intern("FOO")); + # use rb_const(nil, class) to get a pointer to a class/module + def rb_const(constname, owner = resolve_const_owner(constname)) + raise Fail, "no dynamic constant resolution #{constname}" if not owner + + @const_value ||= { [::Object, 'Object'] => rb_global('rb_cObject') } + + k = ::Object + v = nil + cname = owner.name + cname += '::' + constname if constname + cname.split('::').each { |n| + kk = k.const_get(n) + if not v = @const_value[[k, n]] + # class A ; end ; B = A => B.name => 'A' + vn = "const_#{escape_varname((k.name + '::' + n).sub(/^Object::/, '').gsub('::', '_'))}" + vi = fcall('rb_const_get', rb_const(nil, k), fcall('rb_intern', n)) + v = declare_newtopvar(vn, vi) + # n wont be reused, so do not alloc a global intern_#{n} for this + @const_value[[k, n]] = v + end + k = kk + } + v + end + + # TODO remove this when the C compiler is fixed + def rb_global(cname) + C::CExpression[:*, @cp.toplevel.symbol[cname]] + end + + def get_cfuncptr(klass, method, singleton=false) + # is it a func we have in the current cparser ? + if ptr = @compiled_func_cache[[klass, method.to_s, singleton]] + return ptr + end + + # check if it's a C or ruby func in the current interpreter + cls = singleton ? (class << klass ; self ; end) : klass + ptr = RubyHack.get_method_node_ptr(cls, method) + return if ptr == 0 + ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff] + return if ftype != :cfunc + + # ok, so assume it will be the same next time + n = escape_varname "fptr_#{klass.name}#{singleton ? '.' : '#'}#{method}".gsub('::', '_') + if not v = @cp.toplevel.symbol[n] + v = get_cfuncptr_dyn(klass, method, singleton, n) + end + + v + end + + def get_cfuncptr_dyn(klass, method, singleton, n) + arity = singleton ? klass.method(method).arity : klass.instance_method(method).arity + fproto = C::Function.new(value, []) + case arity + when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value) + when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value) + else (arity+1).times { fproto.args << C::Variable.new(nil, value) } + end + + if not ptr = init.symbol['ptr'] + ptr = C::Variable.new('ptr', C::Pointer.new(C::BaseType.new(:int))) + init.symbol[ptr.name] = ptr + init.statements << C::Declaration.new(ptr) + end + + cls = rb_const(nil, klass) + cls = fcall('rb_singleton_class', cls) if singleton + init.statements << C::CExpression[ptr, :'=', fcall('rb_method_node', cls, rb_intern(method))] + + # dynamically recheck that klass#method is a :cfunc + cnd = C::CExpression[[:'!', ptr], :'||', [[[[ptr, :'[]', [0]], :>>, [11]], :&, [0xff]], :'!=', [RubyHack::NODETYPE.index(:cfunc)]]] + init.statements << C::If.new(cnd, rb_raise("CFunc expected at #{klass}#{singleton ? '.' : '#'}#{method}"), nil) + + vi = C::CExpression[[ptr, :'[]', [1]], C::Pointer.new(fproto)] + declare_newtopvar(n, vi, C::Pointer.new(fproto)) + end + + if defined? $trace_rbfuncall and $trace_rbfuncall + # dynamic trace of all rb_funcall made from our module + def rb_funcall(recv, meth, *args) + if not defined? @rb_fcid + @cp.parse <ptr, RString(str)->len); + doit(VAL2INT(count), STR_PTR(str), STR_LEN(str)); return count; } -void doit(int count, char *str, int strlen) { - asm(#{src_asm.inspect}); -} +void doit(int count, char *str, int strlen) { asm(#{src_asm.inspect}); } EOS -m = Metasm::RubyHack.compile_c(src).encode_string + class Foo + end -o = Object.new -Metasm::RubyHack.set_object_method_binary(o, 'bar', m, 2) + m = Metasm::RubyHack.compile_c(src).encoded -puts "test1" -o.bar(4, "blabla\n") -puts "test2" -o.bar(2, "foo\n") + Metasm::RubyHack.set_method_binary(Foo, 'bar', m, 2) + + Foo.new.bar(4, "blabla\n") + Foo.new.bar(2, "foo\n") +when :compile_ruby + abort 'need ' if ARGV.empty? + require 'pp' + puts '#if 0' + ARGV.each { |av| + next if not av =~ /^(.*)([.#])(.*)$/ + cls, sg, meth = $1, $2, $3.to_sym + sg = { '.' => true, '#' => false }[sg] + cls = cls.split('::').inject(::Object) { |o, cst| o.const_get(cst) } + if sg + ast = Metasm::RubyHack.read_singleton_method_ast(cls, meth) + cls.method(meth) if not ast # raise NoMethodError + puts ' --- ast ---' + pp ast + Metasm::RubyStaticCompiler.compile_singleton(cls, meth) + else + ast = Metasm::RubyHack.read_method_ast(cls, meth) + cls.instance_method(meth) if not ast + puts ' --- ast ---' + pp ast + Metasm::RubyStaticCompiler.compile(cls, meth) + end + } + puts '', ' --- C ---', '#endif' + puts Metasm::RubyStaticCompiler.dump -when :dump_ruby_ast - -abort 'need args' if ARGV.length != 2 -c = Metasm.const_get(ARGV.shift) -m = ARGV.shift -ptr = Metasm::RubyHack.get_method_node_ptr(c, m) -require 'pp' -pp Metasm::RubyHack.read_node(ptr) when :test_jit - - -class Foo - def bla + class Foo + def bla(x=500) i = 0 - 20_000_000.times { i += 1 } + x.times { i += 16 } i end -end + end -t0 = Time.now -Metasm::RubyHack.compile_ruby(Foo, :bla) -t1 = Time.now -p Foo.new.bla -t2 = Time.now + t0 = Time.now + Metasm::RubyLiveCompiler.compile(Foo, :bla) + t1 = Time.now + ret = Foo.new.bla(0x401_0000) + puts ret.to_s(16), ret.class + t2 = Time.now -puts "compile %.3fs run %.3fs" % [t1-t0, t2-t1] + puts "compile %.3fs run %.3fs" % [t1-t0, t2-t1] + +when :generate_persistent + Metasm::RubyStaticCompiler.compile(Metasm::Preprocessor, :getchar, :ungetchar, :unreadtok, :readtok_nopp_str, :readtok_nopp, :readtok) + Metasm::RubyStaticCompiler.compile(Metasm::Expression, :reduce_rec, :initialize) + Metasm::RubyStaticCompiler.compile_singleton(Metasm::Expression, :[]) + c_src = Metasm::RubyStaticCompiler.dump + File.open('compiledruby.c', 'w') { |fd| fd.puts c_src } if $VERBOSE + puts 'compiling..' +begin ; require 'compiledruby' ; rescue LoadError ; end + # To encode to a different file, you must also rename the Init_compliedruby() function to match the lib name + Metasm::ELF.compile_c(Metasm::Ia32.new, c_src).encode_file('compiledruby.so') + puts 'ruby -r metasm -r compiledruby ftw' end end diff --git a/lib/metasm/samples/lindebug.rb b/lib/metasm/samples/lindebug.rb index 82286dda25..653620bd12 100644 --- a/lib/metasm/samples/lindebug.rb +++ b/lib/metasm/samples/lindebug.rb @@ -258,7 +258,7 @@ class LinDebug text << ' ' x += r.length + 11 } - text << (' '*(@console_width-x)) << "\n" << ' ' + text << (' '*([@console_width-x, 0].max)) << "\n" << ' ' x = 1 %w[esi edi ebp esp].each { |r| text << Color[:changed] if @rs.regs_cache[r] != @rs.oldregs[r] @@ -281,7 +281,7 @@ class LinDebug text << ' ' x += 2 } - text << (' '*(@console_width-x)) << "\n" + text << (' '*([@console_width-x, 0].max)) << "\n" end def updatecode @@ -342,7 +342,7 @@ class LinDebug if di text << if addr == @rs.regs_cache['eip'] - "*#{di.instruction}".ljust(@console_width-37) + "*#{di.instruction}".ljust([@console_width-37, 0].max) else " #{di.instruction}" << Ansi::ClearLineAfter end @@ -368,7 +368,7 @@ class LinDebug text << Color[:border] title = @rs.findsymbol(addr) pre = [@console_width-100, 6].max - post = @console_width - (pre + title.length + 2) + post = [@console_width - (pre + title.length + 2), 0].max text << Ansi.hline(pre) << ' ' << title << ' ' << Ansi.hline(post) << Color[:normal] << "\n" cnt = @win_data_height diff --git a/lib/metasm/tests/dynldr.rb b/lib/metasm/tests/dynldr.rb index 5ebf87b05a..4c00cc9600 100644 --- a/lib/metasm/tests/dynldr.rb +++ b/lib/metasm/tests/dynldr.rb @@ -14,5 +14,16 @@ class TestDynldr < Test::Unit::TestCase d.new_api_c('int memcpy(char*, char*, int)') d.memcpy(str, "9999", 2) assert_equal('9934', str) + + c_src = <