# # linux_logo in s390 assembler 0.28 # # by # Vince Weaver # # assemble with "as -o ll.o ll.s390.s" # link with "ld -o ll ll.o" # Note - we assume decimal and string hardware is installed. # this might not be true on older machines # # gas's addressing is a bit odd. Labels start from the beginning of # the segment, rather than IP relative. So I use %r1 as an offset reg # register ABI # r0,r1 = general purpose # r2,r3 = params, return values # r4-r6 = params # r7-r11 = local # r12 = GOT pointer # r13 = literal pool pointer # r14 = return address # r15 = stack pointer # Hardware Summary # + 16 32-bit integer registers (r0-r15) # + 4-16 64-bit floating point registers # + PSW (Program Status Word) has PC and other info # + can address 31-bit or 24-bit address spaces # variable instruction length (16, 32 or 48 bits) # big-endian # addressing modes. not orthogonal: # reg/reg # immediate # crazy complicated combinations # # B=Base (value in register) # X=index (value in a register) # D=12-bit immediate # Luckily can avoid using EBCDIC # Syscalls # values in r2-r6 # svc NUM to execute # result returned in r2 # assembly instructions # note, can you tell this is a CISC architecture? # ar, a, ah, ahi (add, add halfword, add halfword immediate) # nr, n, ni, nc (and ) # bal, balr (branch and link) [use branch and save instead?] # basr, basm, bsm (branch and save) # bcr, bc (branch on condition) # bctr, bct (branch on count) [like x86 loop instruction] # bxh, bxle (branch on index high, branch on index low or equal) # bras (branch relative and save) # brc (branch relative on condition) # brct (branch relative on count) # brxh, brxle (branch relative on index high / index low or equal) # cksm (checksum) # cr, c (compare) condition codes 0=equal, 1=first low, 2=first high # cfc (compare and for codeword) [crazy instruction for sorting?] # cs, cds (compare and swap) # ch, chi (compare halfword) # clr, cl, cli, clc (compare logical) # clm (compare logical under mask) # clcl (compare logical long) 64-bit compare # clcle (compare logical long extended) # clst (compare logical string) - compare up to 256 bytes # cuse (compare until substring equal) # cvb, cvd (convert to binary/decimal) # cuutf, cutfu (convert to unicode/utf-8) # divide (divide) # xr, x, xi ,xc (xor) # ex (execute) - create an instruction and execute it # ic (insert character) - insert 8 bits into a register # icm (insert character under mask) # lr,l (load) # la, lae (load address, load address extended) # ltr (load and test) - move register and set condition register # lcr (load and complement) - negate a value # lh, lhi (load halfword/immediate) # lm (load multiple) - load consecutive mem into registers # lnr (load negative) - negative of absolute value loaded # lpr (load positive) - absolute value # mc (monitor call) # mvi, mvc, mvcin, mvcl,mvcle (move/move inverse/move long/move long extended) # mnv (move numerics) # mvpg (move page) - move a whole 4kb page # mvst x,y (move string) - # copy string at y to x. r0 holds the terminating character # mvo, mvz (move with offset/zones) # mr, m, mh, mhi (multiply/multiply halfword/multiply halfword immediate) # msr, ms (multiply single) # or, o, oi, oc (or) # pack # plo (perform locked operation) # srst (search string) # sla, sll, sldl (shif left single/single logical/double) # sra, srl, srdl, srda (shift right single/logical/double) # st (store) # stc, stcm (Store char, store char under mask) # sth (store halfword) # stm (store multiple) # sr, s (substract) # sh (subtract halfword) # slr, sl (subtract logical) # svc (supervisor call) # ts (test and set) # tm, tmh, tml (test under mask, high, low) # tr, trt (translate, translate and test) # unpk (unpack) # upt (update tree) # a whole raft of decimal instructions # a whole raft of floating point instructions # a whole raft of operating-system level instructions # OPTIMIZATIONS # + 1168 - first draft # + 1156 - remove unused code, minor ops # + 1140 - use "bctr" to end loops instead of subtract/compare # + 1096 - make num_to_ascii use divide instead of decimal opcodes # + 1092 - make r9 be a constant reg equalling 1 # + 1088 - remove some more unused variables # + 1104 - make it properly print s if SMP system # + 1096 - replace out_buffer with r8 .include "logo.include" # offsets into the results returned by the uname syscall .equ U_SYSNAME,0 .equ U_NODENAME,65 .equ U_RELEASE,65*2 .equ U_VERSION,(65*3) .equ U_MACHINE,(65*4) .equ U_DOMAINNAME,65*5 # offset into the results returned by the sysinfo syscall .equ S_TOTALRAM,16 # Sycscalls .equ SYSCALL_EXIT, 1 .equ SYSCALL_READ, 3 .equ SYSCALL_WRITE, 4 .equ SYSCALL_OPEN, 5 .equ SYSCALL_CLOSE, 6 .equ SYSCALL_STAT, 106 .equ SYSCALL_SYSINFO,116 .equ SYSCALL_UNAME, 122 # .equ STDIN,0 .equ STDOUT,1 .equ STDERR,2 .globl _start _start: #========================= # PRINT LOGO #========================= basr %r1,0 # get data offset into r1 base: la %r2,base sr %r1,%r2 # addresses are releative to segment # so subtract off current offset # LZSS decompression algorithm implementation # by Vince Weaver based on LZSS.C by Haruhiko Okumura 1989 lhi %r10,(N-F) # load "r" la %r11,text_buf(%r1) # point %r11 to text_buf la %r12,logo-1(%r1) # point %r12 to logo la %r13,logo_end-1(%r1) # point %r13 to logo_end la %r14,out_buffer(%r1) # point %r14 to output buffer lhi %r9,1 # constant 1 in %r9 decompression_loop: lhi %r8,8 # re-load shift counter lh %r2,0(0,%r12) # load flags register ar %r12,%r9 # increment pointer test_flags: cr %r12,%r13 # see if we've reached the end je done_logo # if so, exit tml %r2,1 # is lowest bit 0 srl %r2,1 # shift right our flags jne discrete_char # if not, keep going offset_length: # have to do two loads because # we are big-endian # there is probably an obscure # opcode that will do this for us lh %r6,0(0,%r12) # load a byte lhi %r3,0xff # load mask nr %r6,%r3 # get only bottom byte lh %r7,1(0,%r12) # load next byte ahi %r12,2 # increment pointer nr %r7,%r3 # mask off bottom byte sll %r7,8 # shift or %r6,%r7 # and combine as one 16 bit value lr %r5,%r6 # length is (hw>>P_BITS)+THRESHOLD+1 srl %r5,P_BITS ahi %r5,(THRESHOLD+1) output_loop: lhi %r3,((POSITION_MASK<<8)+0xff) nr %r6,%r3 # mask to get position lh %r4,0(%r6,%r11) # load from text_buf[pos] srl %r4,8 # shift to get actual byte ar %r6,%r9 # increment pointer store_byte: stc %r4,0(0,%r14) # store byte to output ar %r14,%r9 # increment pointer stc %r4,0(%r10,%r11) # store byte to text_buf[r] ar %r10,%r9 # increment r lhi %r3,(N-1) # load mask nr %r10,%r3 # mask r with (N-1) bct %r5,output_loop(%r1) # subtract 1 from count, loop if not 0 bct %r8,test_flags(%r1) # subtract 1 from count, reload if 0 j decompression_loop # otherwise, loop discrete_char: lh %r4,0(0,%r12) # load flags register ar %r12,%r9 # increment pointer lr %r5,%r9 # set count (r5) to 1 j store_byte done_logo: la %r3,out_buffer(%r1) # point to beginning of buffer lr %r8,%r3 # store out buffer in r8 bras %r14,write_stdout # and print it #========================== # PRINT VERSION #========================== print_version: la %r10,strcat(%r1) # point r10 to strcat() lr %r13,%r8 # re-point point to output buffer la %r2,uname_info(%r1) # point to beginning of buffer lr %r12,%r2 # U_SYSNAME is 0, so this will point # to os-name from uname "Linux svc SYSCALL_UNAME # uname syscall basr %r14,%r10 # strcat la %r12,ver_string(%r1) # source is " Version " basr %r14,%r10 la %r12,uname_info+U_RELEASE(%r1) # version from uname "2.4.1" basr %r14,%r10 # strcat la %r12,compiled_string(%r1) # source is ", Compiled " basr %r14,%r10 # strcat la %r12,uname_info+U_VERSION(%r1) # compiled date basr %r14,%r10 # strcat lhi %r4,0x0a00 sth %r4,0(0,%r13) # append linefeed and nul bras %r14,center_and_print # center and print #=============================== # Middle-Line #=============================== middle_line: #========= # Load /proc/cpuinfo into buffer #========= start_cpuinfo: la %r2,cpuinfo(%r1) # '/proc/cpuinfo' sr %r3,%r3 # 0 = O_RDONLY svc SYSCALL_OPEN # open(). fd=%r2 # we should check that %r2>=0 lr %r6,%r2 # save the fd l %r3,disk_buffer_p(%r1) lhi %r4,4096 # 4096 is maximum size of proc file ;) svc SYSCALL_READ # read() lr %r2,%r6 svc SYSCALL_CLOSE # close (to be correct) end_cpuinfo: #============= # Number of CPUs #============= la %r13,temp_ascii(%r1) # point to beginning of temp_num la %r4,search_sors(%r1) # search for "sors" bras %r14,find_string ahi %r13,-1 # back up result lh %r4,0(%r13) # grab the number sra %r4,8 lhi %r3,0xf nr %r4,%r3 # mask off (convert from ascii) lr %r2,%r4 ahi %r4,-1 # decrement (arrays start at 0) sla %r4,2 # shift over (32bit pointer) la %r5,ordinal(%r1) # load the pointer ar %r5,%r4 # add the offset lr %r13,%r8 # point to beginning of output buffer l %r12,0(%r5) # print the string basr %r14,%r10 # call strcat #========= # MHz #========= # No MHz detection on this architecture #========= # Chip Name #========= la %r4,search_r_id(%r1) # Look for "vendor_id" bras %r14,find_string la %r12,processor(%r1) # print ' Processor' basr %r14,%r10 # call strcat la %r12,s_comma(%r1) # point to 's, ' bct %r2,no_s(%r1) # if only one processor, ar %r12,%r9 # point past s no_s: basr %r14,%r10 # call strcat #======== # RAM #======== la %r2,sysinfo_buff(%r1) # do sysinfo() syscall svc SYSCALL_SYSINFO l %r7,sysinfo_buff+S_TOTALRAM(%r1) # size in bytes of RAM sra %r7,20 # divide by 1024*1024 to get M bras %r14,num_to_ascii # print to ascii lr %r12,%r5 # point to ascii result basr %r14,%r10 # call strcat la %r12,ram_comma(%r1) # print 'M RAM, ' basr %r14,%r10 # call strcat #======== # Bogomips #======== la %r4,search_bogo(%r1) # Grab number of bogomips bras %r14,find_string la %r12,bogo_total(%r1) # source is " Bogomips Total" basr %r14,%r10 # call strcat bras %r14,center_and_print # print some spaces #================================= # Print Host Name #================================= print_host_name: lr %r13,%r8 # point to beginning of buffer la %r12,uname_info+U_NODENAME(%r1) # host name from uname() basr %r14,%r10 # call strcat bras %r14,center_and_print la %r3,default_colors(%r1) # restore the default colors bras %r14,write_stdout #================================ # Exit #================================ xr %r2,%r2 # exit(0) svc SYSCALL_EXIT # and exit #================================ # strcat #================================ # destination=r13 # source=r12 # r0=trashed strcat: sr %r0,%r0 # nul is our end of line for mvst move_loop: mvst %r13,%r12 # move (copy) string from r12 to r13 bc 3,move_loop(%r1) # loop if the CPU ended early and # we need to continue basr 0,%r14 # return #================================= # NUM_TO_ASCII #================================= # r7 is input # output returned in *r5 # r0, r4, r6 trashed # I tried to make an excessively clever version of this # using the "cvd" (convert to decimal) # "unpk" (unpack) # and "ed" (display) # opcodes, but it ended up being much longer than # the RISC version. num_to_ascii: la %r5,temp_ascii+7(%r1) lhi %r4,10 # dividing by 10 div_by_10: xr %r6,%r6 dr %r6,%r4 # divide r6r7/r4 # q is in r7, remainder in r6 ahi %r6,0x30 # convert to ascii stc %r6,0(%r5) # store ahi %r5,-1 # update pointer xr %r0,%r0 cr %r7,%r0 # are we at the end? jne div_by_10 # if not, loop ar %r5,%r9 # fix pointer to point to beginning basr 0,%r14 # return #================================= # FIND_STRING #================================= # r5 is char to end at # r4 points to search string # r13 points at output buffer # r0 trashed find_string: # originally wanted to use "cuse" (compare until substring equal) # instruction, but it turns out that's not as much a search # as it is looking for matches at identical offsets into two strings l %r5,0(%r4) # 4-byte string in r5 l %r6,disk_buffer_p(%r1) # point to cpuinfo find_loop: l %r7,0(%r6) # load 4 bytes from cpuinfo ar %r6,%r9 # increment pointer cr %r5,%r7 # do we match search pattern? jne find_loop # if not, keep searching # (should check for EOF as well # if we get this far, we matched find_colon: lhi %r0,':' # search for colon srst %r5,%r6 # repeat till we find colon ahi %r5,2 # point after colon and space store_loop: lhi %r0,'\n' # grab till end of line mvst %r13,%r5 # and print to screen done: basr 0,%r14 # return #============================== # center_and_print #============================== # center_and_print: lr %r15,%r14 # save return address la %r3,escape(%r1) # point to escape bras %r14,write_stdout # print to stdout lr %r3,%r8 # point to beginning of buffer sr %r13,%r3 # calculate length # (r13=r13-r3) lhi %r7,80 # load in 80 cr %r13,%r7 # compare r13 with r6 bc 2,done_center(%r1) # if r13 higher, do nothing sr %r7,%r13 # subtract size from 80 sra %r7,1 # then divide by 2 bras %r14,num_to_ascii # call num_to_ascii lr %r3,%r5 bras %r14,write_stdout # print shift to stdout la %r3,C(%r1) # point to escape bras %r14,write_stdout # print to stdout lr %r3,%r8 # point to beginning of buffer done_center: lr %r14,%r15 # restore return address # write_stdout returns for us #================================ # WRITE_STDOUT #================================ # r3 has string # r2,r4,r5 trashed write_stdout: lhi %r2,STDOUT # write to STDOUT strlen: sr %r0,%r0 # clear r0 (subtract) lr %r5,%r3 # save r3 strlen_loop: srst %r4,%r5 # search for 0, starting at r3 # and storing result in r4 brc 3,strlen_loop # cpu stops checking at 256 # so we loop again if it times out sr %r4,%r3 # subtract 0 pointer from original # pointer to get length svc SYSCALL_WRITE # run the syscall basr 0,%r14 #=========================================================================== # section .data #=========================================================================== ver_string: .ascii " Version \0" compiled_string: .ascii ", Compiled \0" processor: .ascii " Processor\0" s_comma: .ascii "s, \0" ram_comma: .ascii "M RAM, \0" bogo_total: .ascii " Bogomips Total\n\0" default_colors: .ascii "\033[0m\n\n\0" escape: .ascii "\033[\0" C: .ascii "C\0" cpuinfo: .ascii "/proc/cpuinfo\0" ordinal: .long one,two,three,four search_sors: .ascii "sors" search_bogo: .ascii "bogo" search_r_id: .ascii "r_id" one: .ascii "One \0" two: .ascii "Two \0" three: .ascii "Three \0" four: .ascii "Four \0" .include "logo.lzss_new" disk_buffer_p: .long disk_buffer #============================================================================ # section .bss #============================================================================ #.bss .lcomm temp_ascii,16 .lcomm uname_info,(65*6) .lcomm sysinfo_buff,(64) # urgh get above from /usr/src/linux/include/asm/stat.h # not glibc .lcomm text_buf, (N+F-1) .lcomm out_buffer,16384 .lcomm disk_buffer,4096 # we cheat!!!!