# # linux_logo in ARM assembler 0.25 # # Originally by # Vince Weaver # # Crazy size-optimization hacks by # Stephan Walter # # assemble with "as -o ll.o ll.arm.s" # link with "ld -o ll ll.o" .include "logo.include" # ARM has 31 registers, (only lower 16 visible to userspace) # r0-r7 are unbanked, always the same. r8-r14 change depending on mode # + r0-r12 are general purpose # + r13 = stack pointer # + r14 = link register # + r15 = program counter # reading r15 in general gives you current PC+8 (exceptions for STM and STR) # 6 Status registers (only one visible in userspace) # - NZCVQ (Negative, Zero, Carry, oVerflow, saturate) # prefix most instructions to handle the condition codes: # EQ, NE (equal/not equal) CS, CC (carry set/clear) # MI, PL (minus/plus) VS, VC (overflow set/clear) # HI, LS (unsigned higer/lowersame) # GE, LT (greaterequal,less than) # GT, LE (greater than, lessthanequal) # AL (always) # comment character is a @ # gas supports "nop"=mov r0,r0 # ldr = load register # adr reg,label = load label into addr (via pc-relative add or sub) # adrl = like above, but always an 8-byte instr # every instruction has a condition code # THUMB instructions... will do both. # ALU ops can also do a shift (no dedicated shift instr) # Shift goes last. LSL, LSR (logical shift left/right) # ASR (arithmatic shift right) # ROR (rotate right) # RRX (rotate right sign extend) # Carry out is the last value shifted out, or C if no value shifted # # load store addressing modes for load word/store word/ unsigned byte # [ r , #+/- 12bitoffset] = load as expected # [ r , +/- reg ] = load as expected # [ r , +/- reg, shift #shift amt ] = load as expected # [ r , #+/- 12bitoffset]! = pre-index / load+off, then if CC then write back new index # [ r , +/- reg ]! = pre-index / load+off, then if CC then write back new index # [ r , +/- reg, shift #shift amt]! = pre-index / load+off, then if CC then write back new index # [ r ], #+/- 12bitofset = post-index / load, then if CC write back base+off # [ r ] , +/- reg = post-index / load, then if CC write back base+off # [ r ] , +/- reg, shift #shift amt = post-index / load, then if CC write back base+off # for halfword, signed bytes you only get an 8bit offset and some other caveats # Constants are 8 bits, optionally shifted left by an even amount # The PC is a gp register and can be written too by any ALU op # multiply with accumulate option, mul two numbers together, add in third, store to 4th # no support for unaligned memory access # Addressing modes include pre/post incrememnt that can # save back the updated address to a register # Also, multiple registers worth of data can be read/stored at once # Syscalls.. old way is swi SYSBASE+SYSCALLNUM with args in r0-r6 # New EABI way, syscall num is in r7, do a "swi 0" # for EABI you need kernel support and gcc > 4.0.0? # TODO - see if we can optimize with condition codes! # offsets into the results returned by the uname syscall .equ U_SYSNAME,0 .equ U_NODENAME,65 .equ U_RELEASE,65*2 .equ U_VERSION,(65*3) .equ U_MACHINE,(65*4) .equ U_DOMAINNAME,65*5 # offset into the results returned by the sysinfo syscall .equ S_TOTALRAM,16 # Sycscalls .equ SYSCALL_BASE, 0x900000 .equ SYSCALL_EXIT, 1 .equ SYSCALL_READ, 3 .equ SYSCALL_WRITE, 4 .equ SYSCALL_OPEN, 5 .equ SYSCALL_CLOSE, 6 .equ SYSCALL_SYSINFO, 116 .equ SYSCALL_UNAME, 122 # .equ STDIN,0 .equ STDOUT,1 .equ STDERR,2 .globl _start _start: ldr r11,data_addr ldr r12,bss_addr #========================= # PRINT LOGO #========================= # LZSS decompression algorithm implementation # by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989 # optimized some more by Vince Weaver ldr r1,out_addr @ buffer we are printing to mov r2,#(N-F) @ R add r3,r11,#(logo-data_begin) @ r3 points to logo data ldr r8,logo_end_addr @ r8 points to logo end ldr r9,text_addr @ r9 points to text buf decompression_loop: ldrb r4,[r3],#+1 @ load a byte, increment pointer mov r5,#0xff @ load top as a hackish 8-bit counter orr r5,r4,r5,LSL #8 @ shift 0xff left by 8 and or in the byte we loaded test_flags: cmp r3,r8 @ have we reached the end? bge done_logo @ if so, exit lsrs r5,#1 @ shift bottom bit into carry flag bcs discrete_char @ if set, we jump to discrete char offset_length: ldrb r0,[r3],#+1 @ load a byte, increment pointer ldrb r4,[r3],#+1 @ load a byte, increment pointer @ we can't load halfword as no unaligned loads on arm orr r4,r0,r4,LSL #8 @ merge back into 16 bits @ this has match_length and match_position mov r7,r4 @ copy r4 to r7 @ no need to mask r7, as we do it @ by default in output_loop mov r0,#(THRESHOLD+1) add r6,r0,r4,LSR #(P_BITS) @ r6 = (r4 >> P_BITS) + THRESHOLD + 1 @ (=match_length) output_loop: ldr r0,pos_mask @ urgh, can't handle simple constants and r7,r7,r0 @ mask it ldrb r4,[r9,r7] @ load byte from text_buf[] add r7,r7,#1 @ advance pointer in text_buf store_byte: strb r4,[r1],#+1 @ store a byte, increment pointer strb r4,[r9,r2] @ store a byte to text_buf[r] add r2,r2,#1 @ r++ mov r0,#(N) sub r0,r0,#1 @ grrr no way to get this easier and r2,r2,r0 @ mask r subs r6,r6,#1 @ decement count bne output_loop @ repeat until k>j tst r5,#0xff00 @ are the top bits 0? bne test_flags @ if not, re-load flags b decompression_loop discrete_char: ldrb r4,[r3],#+1 @ load a byte, increment pointer mov r6,#1 @ we set r6 to one so byte @ will be output once b store_byte @ and store it # end of LZSS code done_logo: ldr r1,out_addr @ buffer we are printing to bl write_stdout @ print the logo #========================== # PRINT VERSION #========================== first_line: add r0,r12,#(uname_info-bss_begin) @ uname struct swi SYSCALL_BASE+SYSCALL_UNAME @ do syscall add r1,r12,#(uname_info-bss_begin) @ os-name from uname "Linux" ldr r10,out_addr @ point r10 to out_buffer bl strcat @ call strcat add r1,r11,#(ver_string-data_begin) @ source is " Version " bl strcat @ call strcat add r1,r12,#((uname_info-bss_begin)+U_RELEASE) @ version from uname, ie "2.6.20" bl strcat @ call strcat add r1,r11,#(compiled_string-data_begin) @ source is ", Compiled " bl strcat @ call strcat add r1,r12,#((uname_info-bss_begin)+U_VERSION) @ compiled date bl strcat @ call strcat mov r3,#0xa strb r3,[r10],#+1 @ store a linefeed, increment pointer strb r0,[r10],#+1 @ NUL terminate, increment pointer bl center_and_print @ center and print @=============================== @ Middle-Line @=============================== middle_line: @========= @ Load /proc/cpuinfo into buffer @========= ldr r10,out_addr @ point r10 to out_buffer add r0,r11,#(cpuinfo-data_begin) @ '/proc/cpuinfo' mov r1,#0 @ 0 = O_RDONLY swi SYSCALL_BASE+SYSCALL_OPEN @ syscall. return in r0? mov r5,r0 @ save our fd ldr r1,disk_addr mov r2,#4096 @ 4096 is maximum size of proc file ;) swi SYSCALL_BASE+SYSCALL_READ mov r0,r5 swi SYSCALL_BASE+SYSCALL_CLOSE @ close (to be correct) @============= @ Number of CPUs @============= number_of_cpus: add r1,r11,#(one-data_begin) # cheat. Who has an SMP arm? bl strcat @========= @ MHz @========= print_mhz: @ the arm system I have does not report MHz @========= @ Chip Name @========= chip_name: mov r0,#'s' mov r1,#'o' mov r2,#'r' mov r3,#' ' bl find_string @ find 'sor\t: ' and grab up to ' ' add r1,r11,#(processor-data_begin) @ print " Processor, " bl strcat @======== @ RAM @======== add r0,r12,#(sysinfo_buff-bss_begin) swi SYSCALL_BASE+SYSCALL_SYSINFO @ sysinfo() syscall ldr r3,[r12,#((sysinfo_buff-bss_begin)+S_TOTALRAM)] @ size in bytes of RAM movs r3,r3,lsr #20 @ divide by 1024*1024 to get M adc r3,r3,#0 @ round mov r0,#1 bl num_to_ascii add r1,r11,#(ram_comma-data_begin) @ print 'M RAM, ' bl strcat @ call strcat @======== @ Bogomips @======== mov r0,#'I' mov r1,#'P' mov r2,#'S' mov r3,#'\n' bl find_string add r1,r11,#(bogo_total-data_begin) bl strcat @ print bogomips total bl center_and_print @ center and print #================================= # Print Host Name #================================= last_line: ldr r10,out_addr @ point r10 to out_buffer add r1,r12,#((uname_info-bss_begin)+U_NODENAME) @ host name from uname() bl strcat @ call strcat bl center_and_print @ center and print add r1,r11,#(default_colors-data_begin) @ restore colors, print a few linefeeds bl write_stdout @================================ @ Exit @================================ exit: mov r0,#0 @ result is zero swi SYSCALL_BASE+SYSCALL_EXIT @ and exit @================================= @ FIND_STRING @================================= @ r0,r1,r2 = string to find @ r3 = char to end at @ r5 trashed find_string: ldr r7,disk_addr @ look in cpuinfo buffer find_loop: ldrb r5,[r7],#+1 @ load a byte, increment pointer cmp r5,r0 @ compare against first byte ldrb r5,[r7] @ load next byte cmpeq r5,r1 @ if first byte matched, comp this one ldrb r5,[r7,#+1] @ load next byte cmpeq r5,r2 @ if first two matched, comp this one beq find_colon @ if all 3 matched, we are found cmp r5,#0 @ are we at EOF? beq done @ if so, done b find_loop find_colon: ldrb r5,[r7],#+1 @ load a byte, increment pointer cmp r5,#':' bne find_colon @ repeat till we find colon add r7,r7,#1 @ skip the space store_loop: ldrb r5,[r7],#+1 @ load a byte, increment pointer strb r5,[r10],#+1 @ store a byte, increment pointer cmp r5,r3 bne store_loop almost_done: mov r0,#0 strb r0,[r10],#-1 @ replace last value with NUL done: blx r14 @ return #================================ # strcat #================================ # value to cat in r1 # output buffer in r10 # r3 trashed strcat: ldrb r3,[r1],#+1 @ load a byte, increment pointer strb r3,[r10],#+1 @ store a byte, increment pointer cmp r3,#0 @ is it zero? bne strcat @ if not loop sub r10,r10,#1 @ point to one less than null bx r14 @ return #============================== # center_and_print #============================== # string to center in at output_buffer center_and_print: stmfd SP!,{LR} @ store return address on stack add r1,r11,#(escape-data_begin) @ we want to output ^[[ bl write_stdout str_loop2: ldr r2,out_addr @ point r2 to out_buffer sub r2,r10,r2 @ get length by subtracting rsb r2,r2,#81 @ reverse subtract! r2=81-r2 @ we use 81 to not count ending \n bne done_center @ if result negative, don't center lsrs r3,r2,#1 @ divide by 2 adc r3,r3,#0 @ round? mov r0,#0 @ print to stdout bl num_to_ascii @ print number of spaces add r1,r11,#(C-data_begin) @ we want to output C bl write_stdout done_center: ldr r1,out_addr @ point r1 to out_buffer ldmfd SP!,{LR} @ restore return address from stack #================================ # WRITE_STDOUT #================================ # r1 has string # r0,r2,r3 trashed write_stdout: mov r2,#0 @ clear count str_loop1: add r2,r2,#1 ldrb r3,[r1,r2] cmp r3,#0 bne str_loop1 @ repeat till zero write_stdout_we_know_size: mov r0,#STDOUT @ print to stdout swi SYSCALL_BASE+SYSCALL_WRITE @ run the syscall bx r14 @ return @############################# @ num_to_ascii @############################# @ r3 = value to print @ r0 = 0=stdout, 1=strcat num_to_ascii: stmfd SP!,{r10,LR} @ store return address on stack add r10,r12,#((ascii_buffer-bss_begin)) add r10,r10,#10 @ point to end of our buffer mov r4,#10 @ we'll be dividing by 10 div_by_10: bl divide @ Q=r7,$0, R=r8,$1 add r8,r8,#0x30 @ convert to ascii strb r8,[r10],#-1 @ store a byte, decrement pointer adds r3,r7,#0 @ move Q in for next divide, update flags bne div_by_10 @ if Q not zero, loop write_out: add r1,r10,#1 @ adjust pointer ldmfd SP!,{r10,LR} @ restore return address from stack cmp r0,#0 bne strcat @ if 1, strcat b write_stdout @ else, fallthrough to stdout @=================================================== @ Divide - because ARM has no hardware int divide @ yes this is an awful algorithm, but simple @ and uses few registers @================================================== @ r3=numerator r4=denominator @ r7=quotient r8=remainder @ r5=trashed divide: mov r7,#0 @ zero out quotient divide_loop: mul r5,r7,r4 @ multiply Q by denominator add r7,r7,#1 @ increment quotient cmp r5,r3 @ is it greater than numerator? ble divide_loop @ if not, loop sub r7,r7,#2 @ otherwise went too far, decrement @ and done mul r5,r7,r4 @ calculate remainder sub r8,r3,r5 @ R=N-(Q*D) blx r14 @ return bss_addr: .word bss_begin data_addr: .word data_begin out_addr: .word out_buffer disk_addr: .word disk_buffer logo_end_addr: .word logo_end pos_mask: .word ((POSITION_MASK<<8)+0xff) text_addr: .word text_buf #=========================================================================== # section .data #=========================================================================== .data data_begin: ver_string: .ascii " Version \0" compiled_string: .ascii ", Compiled \0" processor: .ascii " Processor, \0" ram_comma: .ascii "M RAM, \0" bogo_total: .ascii " Bogomips Total\n\0" default_colors: .ascii "\033[0m\n\n\0" escape: .ascii "\033[\0" C: .ascii "C\0" cpuinfo: .ascii "/proc/cpuinfo\0" one: .ascii "One \0" .include "logo.lzss_new" #============================================================================ # section .bss #============================================================================ .bss bss_begin: .lcomm uname_info,(65*6) .lcomm sysinfo_buff,(64) .lcomm ascii_buffer,10 .lcomm text_buf, (N+F-1) .lcomm disk_buffer,4096 @ we cheat!!!! .lcomm out_buffer,16384 # see /usr/src/linux/include/linux/kernel.h