# # linux_logo in PDP-11 assembler 0.34 # # Vince Weaver # # assemble with "as -o ll.o ll.pdp11.s" # link with "ld -o ll ll.o" # # We use a cross-compiled pdp11-dec-aout binutils for assembling # 2.19 and earlier has a bug with index/index deferred mode # I use a patched version, the bug has been reported to binutils # # Architectural info # # + We assume a pdp-11/40 with EIS instructions available # + 64k address space (more is possible with MMU) # + 8 gp 16-bit regs, r0-r7. r6=stack, r7=pc # + optional fpu, ac0-ac3 # + Status registers Z,N,C,V # + Little-endian (Actually PDP-endian for 32-bit vals) # + no unaligned memory accesses # leis (limited eis) and eis (extended instruction set) are # instructions later added to the instruction set # Addressing modes: # + Single operand - one operand # + Double operand - two operands (source and destination) # + Direct Addressing # - Register - Rx # - Auto-inc - (Rx)+ - register used, then incremented (1 for byte,2 word) # - Auto-dec - -(Rx) - register decremented, then used # - Index - N(Rx) - offset used against Rx # + Indirect Addressing # - Register - @Rx or (Rx) - reg contains address of op # - Auto-inc - @(Rn)+ - reg used as pointer, then inc by 2 # - Auto-dec - @-(Rn) - reg dec by 2, then used as pointer # - Index - @N(Rx) - X is added to Rn and that is used as pointer # + PC based # - Immediate - #n - the immediate value follows immediately inline # - Absolute - @#A - the immediate value follows after, used as pointer # - Relative - A - address calculated relative to PC # - Relative Indirect - @A - address calculated relative to PC, use as point # Instruction summary # CLR, CLRB - clear a destination # COM, COMB - complement destination # INC, INCB - increment # DEC, DECB - decrement # NEG, NEGB - two's complement # TST, TSTB - set condition codes # ASR, ASRB - arith shift right (into carry) # ASL, ASLB - arith shift left (into carry) # ROR, RORB - rotate right (through carry) # ROL, ROLB - rotate left (through carry) # SWAB - swap upper and lower bytes # ADC, ADCB - add with carry # SBC, SBCB - subtract with carry # SXT - (leis) sign extend # MOV, MOVB - move (for movb, sign extends) # CMP, CMPB - compare and set condition codes # ADD - add # SUB - subtract # BIT, BITB - test if bits set, sets condition code # BIC, BICB - clears bits in destination set in source (mask. can make and) # BIS, BISB - or # XOR - (leis) xor # BR - branch unconditional # BNE, BEQ - branch if equal or not to zero (Z) # BPL, BMI - branch if plus or minus (N) # BVC, BVS - branch if (V) set or not # BCC, BCS - branch if carry or not # BGE, BLT - branhh if ge/lt # BGT, BLE - branch if gt/le # BHI, BLOS - branch if higher, branch if lower or same # BHIS, BLO - branch if higher or same, if lower # JMP - unconditional jump (can be used for vector jumps) # JSR - jump subroutine. pushes old addr on stack. # stack parameters can be read with auto-increment # special JSR PC insn for passing arguments in registers # RTS - return from subroutine # MARK - (leis) marks end of stack, returns can auto-clean up stack size # SOB - (leis) subtract one, branch if not zero # CLN,CLZ,CLV,CLC,CCC,SEn,SEZ,SEV,SEC,SCC - set/clear condition codes. # can be ored together # MUL - OPTIONAL eis multiply instruction # DIV - OPTIONAL eis divide instruction # ASH - OPTIONAL eis shift right or left by more than one # ASHC - OPTIONAL eis shift right or left of two combined registers # System calls: # # See great resource: http://mdfs.net/Docs/Comp/Unix/pdp11/SYSCalls # by J.G.Hartson # # + use "trap X" syntax # + somtimes the parameter is in r0 (such as exit) # + other times the parameters directly follow inline # + Sometimes self-modifying code and the indirect syscall are used # + Return value in r0 if applicable # Optimizations # + 938 bytes = Original straight port of the THUMB code # + 934 bytes = fix up the unaligned load code # + 930 bytes = remove unnecessary use of r0 in output_loop, store_byte # + 928 bytes = change mov $1 into an inc because we know existing val 0 # + 922 bytes = keep out_buffer value on top of stack # + 906 bytes = semi-merge the prints in first-line. In reality # we completely fake it so it could all be one string # but that would make the comparison across architectures # even more unfair # + 904 bytes = make exit return 0 (was 5 before for debugging) # + 900 bytes = replace mov 0 with clrb # + 892 bytes = pass write_stdout param after the call # + 890 bytes = make fallthrough from num_to_ascii be strcat .include "logo.include" # Sycscalls .equ SYSCALL_EXIT, 1 .equ SYSCALL_READ, 3 .equ SYSCALL_WRITE, 4 .equ SYSCALL_OPEN, 5 .equ SYSCALL_CLOSE, 6 # .equ STDIN,0 .equ STDOUT,1 .equ STDERR,2 .globl _start _start: #========================= # PRINT LOGO #========================= # LZSS decompression algorithm implementation # by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989 # optimized some more by Vince Weaver # r0 = loaded_byte # r1 = output_buffer # r2 = R # r3 = logo # r4 = byte to output # r5 = text_buf pointer # $counter = counter mov $out_buffer,r1 # out_buffer in r1 mov r1,-(r6) # save on stack for later use mov $(N-F),r2 # R is in r2 mov $logo,r3 # r3 points to logo data decompression_loop: movb (r3)+,r0 # load a byte, increment bis $0xff00,r0 # load top as a hackish 8-bit counter test_flags: cmp r3,$logo_end # have we reached the end? beq done_logo # if so, exit asr r0 # shift bottom bit into carry flag bic $0x8000,r0 # make it a logical shift bcs discrete_char # if C set, we jump to discrete char offset_length: # load an unaligned little-endian word # and increment pointer by two # this has match_length and match_position movb (r3)+,r4 # load byte1 movb (r3)+,r5 # load byte2 bic $0xff00,r4 # undo sign-extension ash $0x8,r5 # shift byte high bisb r4,r5 # or together mov r5,r4 # copy r5 to r4 # no need to mask r5, as we do it # by default in output_loop ash $-P_BITS,r4 # shift right (negative) p-bits bic $0xffc0,r4 # mask because arith shift add $(THRESHOLD+1),r4 mov r4,counter # counter = (r4 >> P_BITS) + THRESHOLD + 1 # (=match_length) output_loop: # Position Mask <<8 + 0xff = 0x3ff # Inverted 16-bits is 0xfc00 # not sure why ~ not working here bic $0xfc00,r5 # r5 = r5 & (POSITION_MASK<<8+0xff) mov r5,-(r6) # push r5 onto stack add $text_buf,r5 movb (r5),r4 # load byte from text_buf[] mov (r6)+,r5 # restore r5 from stack inc r5 # advance pointer in text_buf store_byte: movb r4,(r1)+ # store a byte, increment pointer mov r2,-(r6) # push r2 onto stack add $text_buf,r2 movb r4,(r2) # store a byte to text_buf[r] mov (r6)+,r2 # restore r2 from stack inc r2 # r++ # N-1 = 1023 = 0x3ff # ~0x3ff = 0xfc00 bic $0xfc00,r2 # mask r dec counter # decrement, repeat if !=0 bne output_loop # sad we can't use SOB only works regs bit $0x100,r0 # is bit 8 0? bne test_flags # if not, re-load flags br decompression_loop discrete_char: movb (r3)+,r4 # load a byte, increment pointer inc counter # set counter to output once # we know it has to be zero here br store_byte # and store it # end of LZSS code done_logo: jsr r2,write_stdout # print the logo .word out_buffer # out_buffer is the param #========================== # PRINT VERSION #========================== first_line: mov (r6),r1 # point r1 to out_buffer # UN*X v7 has no uname syscall # so fake it up mov $os_string,r0 jsr r2,strcat # mov $ver_string,r0 # source is " Version " jsr r2,strcat # mov $compiled_string,r0 # source is ", Compiled " jsr r2,strcat # mov $compiled_date,r0 # compiled date jsr r2,strcat # call strcat_r5 # mov $linefeed,r0 # source is "\n" jsr r2,strcat # call strcat_r4 jsr r2,center_and_print # center and print #=============================== # Middle-Line #=============================== middle_line: mov (r6),r1 # point r1 to output_buffer #========= # Load /proc/cpuinfo into buffer #========= trap SYSCALL_OPEN # call open .word cpuinfo # cpuinfo filename .word 0 # 0 = O_RDONLY # result returned in r0 mov r0,r3 # save our fd trap SYSCALL_READ .word disk_buffer .word 4096 mov r3,r0 # restore fd trap SYSCALL_CLOSE # close (to be correct) #============= # Number of CPUs #============= number_of_cpus: mov $one,r0 # Assume one processor jsr r2,strcat #========= # MHz #========= print_mhz: # We don't have MHz #========= # Chip Name #========= chip_name: mov $('t'<<8+'y'),r4 mov $('p'<<8+'e'),r5 jsr r2,find_string # find 'type\t: ' and grab up to '\n' mov $processor,r0 # print " Processor, " jsr r2,strcat #======== # RAM #======== print_ram: # not sure how you know how much RAM on pdp-11 # let's assume 64kB # Amount of RAM in r4:r5 mov $0x1,r4 clr r5 # size in bytes of RAM r4:r5 ashc $-10,r4 # divide by 1024 to get K mov $1,r4 jsr r2,num_to_ascii mov $ram_comma,r0 # print 'K RAM, ' jsr r2,strcat # call strcat #======== # Bogomips #======== print_bogomips: mov $('M'<<8+'I'),r4 mov $('P'<<8+'S'),r5 jsr r2,find_string # find 'MIPS\t: ' and grab up to '\n' mov $bogo_total,r0 # print bogomips total jsr r2,strcat jsr r2,center_and_print # center and print #================================= # Print Host Name #================================= last_line: mov (r6),r1 # copy out_buffer to r1 mov $host_string,r0 jsr r2,strcat # print host name jsr r2,center_and_print # center and print jsr r2,write_stdout # write_stdout .word default_colors # restore colors, print a few linefeeds #================================ # Exit #================================ exit: clr r0 # exit syscall takes param in r0 trap $SYSCALL_EXIT #================================= # FIND_STRING #================================= # r4,r5 = string to find # writes to r1 # r0,r3 trashed find_string: mov $disk_buffer,r3 # look in cpuinfo buffer find_loop: movb (r3)+,compare+1 beq almost_done # leave if we hit 0 mov r3,-(r6) # push next r3 on stack movb (r3)+,compare movb (r3)+,compare2+1 movb (r3)+,compare2 mov (r6)+,r3 # restore r3 from stack cmp r4,@$compare # see if first 2 bytes match bne find_loop cmp r5,@$compare2 # see if next 2 bytes match bne find_loop find_colon: movb (r3)+,r0 # load a byte cmpb r0,$':' bne find_colon # repeat till we find colon inc r3 # skip the space store_loop: cmpb (r3),$'\n' beq almost_done movb (r3)+,(r1)+ # load/store byte, incrementing both br store_loop almost_done: clrb (r1) # replace last value with NUL done: rts r2 # return #============================== # center_and_print #============================== # r1 = end of string # string to center at output_buffer center_and_print: jsr r2,write_stdout .word escape # we want to output ^[[ str_loop2: sub $out_buffer,r1 neg r1 add $81,r1 bmi done_center # if result negative, don't center asr r1 # divide by 2 mov r1,r5 clr r4 # print to stdout jsr r2,num_to_ascii # print number of spaces jsr r2,write_stdout # write_stdout .word C # writing out "C" done_center: jsr r2,write_stdout # write_stdout .word out_buffer # writing out out_buffer rts r2 #============================= # num_to_ascii #============================= # r5 = value to print # r4 = 0=stdout, 1=strcat # r3 trashed num_to_ascii: mov r4,-(r6) # store r4 on stack mov $(ascii_buffer+9),r3 # point to end of our buffer div_by_10: clr r4 # clear the top of the 32-bit # number we are dividing by div $10,r4 # divide by 10 # Q in r4 # R in r5 add $0x30,r5 # convert R to ascii movb r5,-(r3) # store a byte, decrement mov r4,r5 # move Q in for next divide, update flags bne div_by_10 # if Q not zero, loop write_out: mov (r6)+,r4 # restore r4 from stack bne num_strcat # if r4==1 then strcat num_stdout: mov r3,output_val jsr r2,write_stdout # jump to stdout output_val: .word 0 rts r2 num_strcat: mov r3,r0 # fall through to strcat #================================ # strcat #================================ # value to cat in r0 # output buffer in r1 # return value in r2 strcat: movb (r0)+,(r1)+ # load and store byte, increment both bne strcat # loop if not zero dec r1 # point to before terminating nul rts r2 # return #================================ # WRITE_STDOUT #================================ # (r2)+: has pointer to string # r0 trashed write_stdout: mov (r2),r0 # get string addr from after # jump instruction mov r0,write_val # store in proper place str_loop: tstb (r0)+ # test if byte is 0 bne str_loop # if not, loop incrementing sub (r2)+,r0 # subtract to get length in r0 # also point return address # to be after our call mov r0,write_count # move r0 to our count location mov $STDOUT,r0 # fd passed in r0 trap SYSCALL_WRITE # call syscall write_val: .word 0 # pointer to string goes here write_count: .word 2 # count goes here rts r2 # return #=========================================================================== # section .data #=========================================================================== #.data os_string: .asciz "UN*X " ver_string: .asciz "Version 7" compiled_string:.asciz ", Compiled " compiled_date: .asciz "Fri Jun 8 10:00:00 EDT 1979" linefeed: .asciz "\n" one: .asciz "One " processor: .asciz " Processor, " ram_comma: .asciz "K RAM, " bogo_total: .asciz " Bogomips Total\n" host_string: .asciz "esw" default_colors: .asciz "\033[0m\n\n" escape: .asciz "\033[" C: .asciz "C" .ifdef FAKE_PROC cpuinfo: .asciz "proc/cp.pdp11" .else cpuinfo: .asciz "/proc/cpuinfo" .endif .include "logo.lzss_new" #============================================================================ # section .bss #============================================================================ .bss .lcomm counter,2 .lcomm compare,2 .lcomm compare2,2 .lcomm ascii_buffer,10 .lcomm text_buf, (N+F-1) .lcomm disk_buffer,4096 # we cheat!!!! .lcomm out_buffer,16384