;; ;; linux_logo in m88k assembler 0.33 ;; ;; by Vince Weaver ;; ;; assemble with "as -o ll.o ll.m88k.s" ;; link with "ld -o ll ll.o" ;; m88k - a short-lived RISC chip made by Motorola after ;; the m68k but before the PPC ;; No Linux port is ever likely to happen, so we use NetBSD ;; under gxemul ;; m88k features (88110 specifically) ;; - 32 integer registers, r0-r31, r0 = zero, r1 = branch target ;; - 32 80-bit FP registers, x0-x31, x0 = zero ;; - GPU (!) instructions, mainly for packing pixels ;; - Target Instruction Cache (TIC) caches insns at branch target ;; thus if predicted can run those w/o stalling! ;; - Both normal TLB and a "block-sized" software managed TLB ;; - issues in order, can complete out-of-order ;; - 8kB L1 I and D caches, 2-way ;; - aligned load/stores only ;; - can be big or little endian (big endian is default) ;; - immediates can be signed or unsigned, set by software? ;; - 3 operand instructions ;; - branch delay slot is optional (!) ;; - static branch prediction based on eq0, gt0, etc ;; flags - C = carry ;; instructions ;; - integer : add, addu (add.ci add.co add.cio various carry) ;; cmp (returns a 16-bit value with various flags set) ;; divs ;; divu (.d makes it a 64-bit division using 2 reg pair) ;; muls, mulu (mulu.d is a 64-bit result) ;; sub, subu (similar carry/borrow to add) ;; - bit : clr, ext (extract bit field), extu ;; ff0 (find first 0) 0 is LSB, 31 MSB 32= not found ;; ff1 (find first 1) ;; mak (make bit field) select bits from a register ;; rot (rotate) rotates right ;; set (set bit field) ;; - logical : and (and.c complements, and.u does upper 16 bits) ;; mask, ;; or (or.c complements, or.u does upper 16 bits) ;; xor (xor.c complements, xor.u does upper 16 bits) ;; - GPU : padd, padds, pcmp, pmul, ppack, prot, psub, psubs, punpk ;; - branches : bb0 (branch if selected bit is 0. bb0.n has delay slot) ;; bb1 (branch if selected set is 1. bb1.n has delay slot) ;; bcnd (eq0, ne0, gt0, lt0, ge0, le0) also .n option ;; br (branch always) also a .n option ;; bsr (branch subroutine). saved in r1. .n option ;; illop (illegal op), jmp, jsr ;; rte, tb0, tb1, tbnd, tcnd (trap instructions) ;; - load/store : ld .b .bu .h .hu .d ;; either a reg or a 16-bit constant added to source ;; in scaled mode (surrounded in brackets) it is ;; shifted by the size of the opreation ;; lda, ldcr (control reg) ;; st (similar to ld) ;; stcr, xcr, xmem (exchange reg and mem) ;; - FPU : fadd, fcmp, fcmpu, fcvt, fdiv, fldcr ;; flt (convert int to fp), fmul, fsqrt (software implemented), fstcr ;; fsub, fxcr, int (round floating point to int) ;; mov, nint (round fp to nearest int), trnc (truncate fp to int) ;; ;; - NO SHIFT INSTRUCTIONS. ;; for "asr rx,y" use "ext rx,rx,32" ;; for "lsr rx,y" use "extu rx,rx,32" ;; for "lsl rx,y" use "mak rx,rx,32" ;; p 353 begins instruction reference ;; r0 = constant 0 ;; r1 = return address ;; r2-r9 = called proc parameters ;; r10-r13 = scratch ;; r14-r25 = callee preserved ;; r26-r29 = reserved by linker ;; r30 = frame pointer ;; r31 = stack pointer ;; openBSD ;; use ktrace/kdump instead of strace ;; system calls are done with "tb0 0,0,0x80" ;; syscall number in r13 ;; arguments in r2-r9 ;; return value in r2 (which is why r2 has result of execve at start?) ;; URGH! OpenBSD expects a sycall to look like this ;; tb0 0,0,0x80 ;; br error ;; jmp r1 ;; It modifies the trap return to skip an instruction if no error ;; So you have to waste an instruction if you are not handling errors /* OPTIMIZATION */ /* + 8192 = Initial code, straight port from PPC code */ /* + 6008 = Merge data and text segments */ /* + 4096 = strip. aout format wants page size multiple */ /* + 1376 = don't count trailing zeros toward filesize */ /* + 1352 = we are smaller than 64k so pointers are only 16-bit */ /* this saves us 4 bytes for each pointer load */ /* + 1344 = some minor tweaks to the LZSS decompression code */ /* + 1320 = make pointers 16 bit with the first line code */ /* + 1284 = make pointers 16 bit with the middle and last line */ /* + 1264 = complete 16-bit pointer conversion */ /* + 1260 = have num_to_ascii fall through to strcat */ /* + 1248 = make CTL_KERN the common case for sysctl */ /* + 1240 = remove "hello world" test string that I had forgotten */ ;; Sycscalls ;; /usr/include/sys/syscall.h .equ SYSCALL_EXIT, 1 .equ SYSCALL_READ, 3 .equ SYSCALL_WRITE, 4 .equ SYSCALL_OPEN, 5 .equ SYSCALL_CLOSE, 6 .equ SYSCALL_SYSCTL, 202 ;; sysctls ;; /usr/include/sys/sysctl.h .equ CTL_KERN,1 .equ CTL_HW,6 .equ KERN_OSTYPE,1 .equ KERN_OSRELEASE,2 .equ KERN_HOSTNAME,10 .equ KERN_OSVERSION,27 .equ HW_PHYSMEM,5 .equ STDIN, 0 .equ STDOUT,1 .equ STDERR,2 .include "logo.include" .globl start .text .align 4 start: or r0,r0,r0 /* first 2 instructions ignored? */ or r0,r0,r0 /* I must be doing something wrong */ /* ========================= */ /* PRINT LOGO /* ========================= */ ;; LZSS decompression algorithm implementation ;; by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989 ;; optimized some more by Vince Weaver /* or.u r18,r0,hi16(out_buffer) */ /* put out_buffer in r18 */ /* or r18,r18,lo16(out_buffer) */ or r18,r0,lo16(out_buffer) /* fits in just 16-bits */ or r16,r18,r0 /* copy out_buffer to r16 */ or r21,r0,lo16(text_buf) /* put text_buf in r21 */ or r9,r0,lo16(logo) /* r9 points to logo */ or r12,r0,lo16(logo_end) /* r12 points to end of the logo */ or r8,r0,(N-F) /* r8 is R (N-F) */ decompression_loop: ld.bu r10,r9,r0 /* load in a byte */ addu r9,r9,1 /* increment pointer */ or r11,r10,0xff00 /* copy to r11 and load top as */ /* a hackish 8-bit counter */ test_flags: cmp r10,r12,r9 /* have we reached the end? */ bb1 2,r10,done_logo /* if eq (bit 2 set) then exit */ mask r13,r11,0x1 /* get the bottom bit */ extu r11,r11,32<1> /* logical shift right */ bcnd ne0,r13,discrete_char /* if low bit set, discrete_char */ offset_length: ld.bu r10,r9,r0 /* load in 16-bit little endian */ ld.bu r24,r9,1 /* in two byte chunks */ addu r9,r9,2 mak r24,r24,32<8> /* re-arrange to be big-endian */ or r24,r24,r10 extu r15,r24,32<10> /* P_BITS is 10 */ addu r15,r15,THRESHOLD+1 /* r15 = (r10>>P_BITS)+THRESHOLD+1 */ /* == match_length */ output_loop: mask r24,r24,(POSITION_MASK<<8+0xff) /* mask it */ ld.bu r10,r21,r24 /* load byte from txt_buf[] */ addu r24,r24,1 /* increment pointer */ store_byte: st.b r10,r16,r0 /* store byte to output */ addu r16,r16,1 /* increment pointer */ st.b r10,r21,r8 /* store into text_buf[r] */ addu r8,r8,1 /* increment r */ mask r8,r8,(N-1) /* mask to handle oflo */ subu r15,r15,1 /* decrement out count */ bcnd ne0,r15,output_loop /* loop until r15 is zero */ mask r13,r11,0xff00 /* test to see if we are done byte */ bcnd ne0,r13,test_flags br decompression_loop discrete_char: ld.bu r10,r9,r0 /* load a byte */ addu r9,r9,1 /* increment pointer */ or r15,r0,1 /* set length to 1 */ br store_byte done_logo: or r3,r18,r0 /* restore out_buffer */ bsr write_stdout /* and print the logo */ or r14,r18,r0 /* restore out_buffer to r14 */ /*==========================*/ /* PRINT VERSION */ /*==========================*/ first_line: /* OpenBSD does not have a UNAME syscall */ /* We have to use sysctl instead */ or r11,r0,KERN_OSTYPE bsr run_sysctl_kern or r16,r0,lo16(sysctl_info) bsr strcat /* os-name from sysctl "OpenBSD" */ or r16,r0,lo16(ver_string) bsr strcat /* source is " Version " */ or r11,r0,KERN_OSRELEASE bsr run_sysctl_kern or r16,r0,lo16(sysctl_info) bsr strcat /* version from sysctl "4.4" */ or r16,r0,lo16(compiled_string) bsr strcat /* source is ", Compiled " */ or r11,r0,KERN_OSVERSION bsr run_sysctl_kern or r16,r0,lo16(sysctl_info) bsr strcat /* compiled info */ or r16,r0,lo16(linefeed) bsr strcat /* source is "\n" */ bsr center_and_print /* write it to screen */ /*===============================*/ /* Middle-Line */ /*===============================*/ middle_line: or r14,r18,r0 /* restore out buffer */ /*================================*/ /* Load /proc/cpuinfo into buffer */ /*================================*/ /* There is no /proc/cpuinfo on OpenBSD */ /* So we fake it. We could do this all */ /* with sysctls, but I want the exe size */ /* to be comparable with Linux. So */ /* instead we parse the output of sysctl */ or r13,r0,SYSCALL_OPEN /* open() */ or r2,r0,lo16(cpuinfo) /* '/proc/cpuinfo' */ or r3,r0,r0 /* O_RDONLY */ tb0 0,r0,0x80 /* syscall. fd in r2. */ br exit /* exit if error */ or r8,r2,r0 /* save our fd for later */ or r13,r0,SYSCALL_READ /* read() */ or r3,r0,lo16(disk_buffer) or r4,r0,32768 /* assume less than 32k */ tb0 0,r0,0x80 br exit /* exit if error */ or r2,r0,r8 /* restore fd */ or r13,r0,SYSCALL_CLOSE /* close() */ tb0 0,r0,0x80 br exit /* exit if error */ /*================*/ /* Number of CPUs */ /*================*/ number_of_cpus: /* Assume 1 CPU for now, even though people have run SMP m88k*/ or r16,r0,lo16(one) bsr strcat /* One */ /*=========*/ /* MHz */ /*=========*/ print_mhz: or.u r2,r0,('o'<<8)+'d' or r2,r2,('e'<<8)+'l' /* find model */ or r3,r0,',' or r4,r0,'M' bsr find_string or r16,r0,lo16(megahertz) bsr strcat /* MHz */ /*=============*/ /* Chip Name */ /*=============*/ chip_name: or.u r2,r0,('o'<<8)+'d' or r2,r2,('e'<<8)+'l' /* find model */ or r3,r0,'=' /* start at = */ or r4,r0,',' /* grab until , */ bsr find_string or r16,r0,lo16(comma) bsr strcat /* ", " */ /*========*/ /* RAM */ /*========*/ ram: or r10,r0,CTL_HW or r11,r0,HW_PHYSMEM bsr run_sysctl or r3,r0,lo16(sysctl_info) ld r2,r3,r0 /* load bytes of RAM into r2 */ extu r2,r2,32<20> /* divide by 2^20 to get MB */ or r5,r0,r0 /* set to write using strcat */ bsr num_to_ascii /* convert to ascii */ or r16,r0,lo16(ram_comma) bsr strcat /* "M RAM, " */ /*============*/ /* Bogomips */ /*============*/ bogomips: /* OpenBSD has no concept of Bogomips... */ bsr center_and_print /* center and print it */ /*=================================*/ /* Print Host Name */ /*=================================*/ last_line: or r14,r18,r0 /* restore out buffer */ or r11,r0,KERN_HOSTNAME bsr run_sysctl_kern or r16,r0,lo16(sysctl_info) bsr strcat /* hostname */ bsr center_and_print or r3,r0,lo16(default_colors) bsr write_stdout /* restore the default colors */ /*================================*/ /* Exit */ /*================================*/ exit: or r2,r0,r0 /* exit value of zero */ or r13,r0,SYSCALL_EXIT /* put the exit syscall in r13 */ tb0 0,r0,0x80 /* and exit */ /*=================================*/ /* FIND_STRING */ /*=================================*/ /* r3 is char to start at */ /* r4 is char to end at */ /* r2 is the 4-char to look for */ /* r5,r6,r7 trashed */ find_string: or r5,r0,lo16(disk_buffer) /* Look in disk buffer */ find_loop: ld.bu r6,r5,0 /* load first byte */ mak r7,r6,32<8> ld.bu r6,r5,1 /* load second byte */ or r7,r7,r6 mak r7,r7,32<8> ld.bu r6,r5,2 /* load third byte */ or r7,r7,r6 mak r7,r7,32<8> ld.bu r6,r5,3 /* load fourth byte */ or r7,r7,r6 addu r5,r5,1 /* increment pointer */ bcnd eq0,r6,done /* if null, we are done */ cmp r7,r2,r7 /* compare with out 4 char string */ bb1 3,r7,find_loop /* if (ne==3), keep looping */ /* if we get this far, we matched */ find_start_char: ld.bu r6,r5,0 /* repeat till we find r3 char */ addu r5,r5,1 bcnd eq0,r6,done /* if null, we are done */ cmp r7,r6,r3 bb1 3,r7,find_start_char /* if no match, repeat */ store_loop: ld.bu r6,r5,r0 /* load character */ addu r5,r5,1 bcnd eq0,r6,done /* if null, bail */ cmp r7,r6,r4 /* is it end char? */ bb1 2,r7,almost_done /* if so, finish */ st.b r6,r14,r0 /* if not store and continue */ addu r14,r14,1 br store_loop almost_done: st.b r0,r14,r0 /* replace last value with null */ done: jmp r1 /* return */ /*==============================*/ /* center_and_print */ /*==============================*/ /* r14 is end of buffer */ /* r18 is start of buffer */ /* r19 is saved link register */ /* r10,r11 trashed */ center_and_print: or r19,r1,r0 /* back up return address */ subu r10,r14,r18 /* calc length of buffer */ cmp r11,r10,80 /* see if we are >80 */ bb1 7,r11,done_center /* see if ge (bit 7) */ or r3,r0,lo16(escape) bsr write_stdout /* print escape character */ or r11,r0,80 /* 80 column screen */ subu r11,r11,r10 /* subtract strlen */ extu r2,r11,32<1> /* divide by two */ or r5,r0,1 /* print to stdout */ bsr num_to_ascii /* print number */ or r3,r0,lo16(C) bsr write_stdout /* print C character */ /* ansi for shift right is ^[numC */ done_center: or r3,r18,r0 /* point to string to print */ or r1,r19,r0 /* restore link register */ /* write_stdout returns for us */ /*================================*/ /* WRITE_STDOUT */ /*================================*/ /* r3 has string */ /* r2,r4,r13 trashed */ write_stdout: or r4,r0,r0 /* string length counter */ strlen_loop: ld.bu r2,r3,r4 /* get byte from (r3+r4) */ addu r4,r4,1 /* increment counter */ bcnd ne0,r2,strlen_loop /* if not zero keep counting */ subu r4,r4,1 /* adjust to not count final 0 */ or r13,r0,SYSCALL_WRITE /* write syscall */ or r2,r0,STDOUT /* write to stdout */ tb0 0,r0,0x80 /* syscall */ br exit /* exit if error */ jmp r1 /* return */ /*==============================*/ /* run_sysctl */ /*==============================*/ /* r10,r11 = sysctl number */ /* size is hard-coded 256 */ /* result goes to sysctl_info */ run_sysctl_kern: or r10,r0,CTL_KERN /* setup more common CTL_KERN */ run_sysctl: or r2,r0,lo16(sysctl_num1)/* arg1: point r2 to sysctl_num1 */ st r10,r2,r0 /* save r10 to sysctl_num1 */ st r11,r2,4 /* save r11 to sysctl_num2 */ or r3,r0,0x2 /* arg2: set size to 2 */ addu r4,r2,8 /* arg3: point r4 to sysctl_info */ addu r5,r2,256 /* arg4: point r5 to sysctl_size */ or r10,r0,256 /* sysctl_size is 256 */ st r10,r5,r0 /* save to memory */ or r6,r0,r0 /* arg5: NULL */ or r7,r0,r0 /* arg6: 0 */ or r13,r0,SYSCALL_SYSCTL /* set sysctl syscall */ tb0 0,r0,0x80 br exit /* exit if error */ jmp r1 /* return */ /*===============================*/ /* Num to Ascii */ /*===============================*/ /* num is in r2 */ /* r5=0 strcat, otherwise stdout */ /* r5,r6,r7,r8 trashed */ num_to_ascii: or r7,r0,lo16(num_to_ascii_buff+9) /* the end of a backwards growing */ /* 10 byte long buffer. */ div_by_10: or r4,r2,r0 /* copy to r4 */ divu r2,r2,10 /* divide r3 by 10, store in r3 */ mulu r8,r2,10 /* find remainder. 1st q*dividend */ subu r6,r4,r8 /* then subtract from original = R */ addu r6,r6,0x30 /* convert remainder to ascii */ st.b r6,r7,r0 /* Store to backwards buffer */ subu r7,r7,1 /* decrement backwards buffer */ bcnd ne0,r2,div_by_10 /* was quotient zero? */ /* if not keep dividing */ write_out: addu r7,r7,1 /* fix to point at buffer */ bcnd eq0,r5,strcat_num /* if r5 is 0 then skip ahead */ stdout_num: or r3,r7,r0 /* point to our buffer */ br write_stdout /* stdout will return for us */ jmp r1 /* return */ strcat_num: or r16,r7,r0 /* point to the beginning */ /*================================*/ /* strcat */ /*================================*/ /* r10 = scratch */ /* r16 = source */ /* r14 = destination */ strcat: strcat_loop: ld.bu r10,r16,r0 /* load a byte from [r16] */ st.b r10,r14,r0 /* store a byte to [r14] */ addu r14,r14,1 addu r16,r16,1 bcnd ne0,r10,strcat_loop /* loop if not zero */ subu r14,r14,1 /* point to one less than null */ jmp r1 /* return */ /*==========================================================================*/ /* .data */ /*==========================================================================*/ data_begin: ver_string: .ascii " Version \0" compiled_string: .ascii ", Compiled \0" linefeed: .ascii "\n\0" one: .ascii "One\0" megahertz: .ascii "MHz \0" comma: .ascii ", \0" ram_comma: .ascii "M RAM, " bogo_total: .ascii "Unknown Bogomips Total\n\0" default_colors: .ascii "\033[0m\n\n\0" escape: .ascii "\033[\0" C: .ascii "C\0" .ifdef FAKE_PROC cpuinfo: .ascii "proc/cpu.m88k\0" .else cpuinfo: .ascii "/proc/cpuinfo\0" .endif .include "logo.lzss_new" /*==========================================================================*/ ;;.bss /*==========================================================================*/ .lcomm bss_begin,0 .lcomm num_to_ascii_buff,10 .lcomm text_buf, (N+F-1) /* These buffers must follow each other */ .lcomm out_buffer,16384 .lcomm sysctl_num1,4 .lcomm sysctl_num2,4 .lcomm sysctl_info,256 .lcomm sysctl_info_size,4 .lcomm disk_buffer,32768