\ ----------------------------------------------------------------------
\ LANGUAGE    : ANS Forth 
\ PROJECT     : CHForth, a Dutch public domain ANS Forth for PC's 
\ DESCRIPTION : Ackerman's function : ack(3,9) ack(2,4)
\ CATEGORY    : Benchmarks 
\ AUTHOR      : Marcel Hendrix, October 9th, 1991
\ LAST CHANGE : October 08, 1994, Coos Haak FOR NEXT changed to DO LOOP
\ LAST CHANGE : May 16th, 1994, Coos Haak 
\ LAST CHANGE : September 8th, 1993 MHX, stacks for iForth 
\ LAST CHANGE : January 18th, 1992 MHX, added main memory stacks for ack(3,9)
\ LAST CHANGE : December 30th, 1991 MHX, made it 330% faster.
\ LAST CHANGE : October 9th, 1991 MHX
\ ----------------------------------------------------------------------



\               (* ************************************ *)
\               (*                                      *)
\               (*      Berkeley standard benchmark     *)
\               (*    Ackerman's function : ack(2, 4)   *)
\               (*    Tests recursion and integer math  *)
\               (*        Repeats 10,000 times          *)
\               (*  Translated to Forth from Modula II  *)
\               (* FC: Marcel Hendrix, April 5th, 1991  *)
\               (*                                      *)
\               (*   Dr Dobb's Journal, September 1988  *)
\               (* Original timings on 6 Mhz Tandon AT: *)
\               (*      FTL              20.71 sec.     *)
\               (*      LogiTech         11.82 sec.     *)
\               (*      TopSpeed         10.19 sec.     *)
\               (*      Stony Brook      12.14 sec.     *)
\               (*                                      *)
\               (* ************************************ *)


        NEEDS -stack

        ?DEF -ackerman [IF] -ackerman [THEN]

        MARKER -ackerman

        PRIVATES


-- Syntactic Sugar

: {             ;                       IMMEDIATE PRIVATE

#10000 CONSTANT MAXITER PRIVATE


        -- The meat

-- Time used on 50 MHz i486 using iForth:                              
        CR .( Enter:  1  ->  straight code using local variables)   ( 1.54s)
        CR .(         2  ->  code using the system stack)           ( 0.99s)
        CR .(         3  ->  code using both data and system stack) ( 0.66s)
        CR .(         4  ->  register optimized version of 3)       ( 0.60s)
        CR .(         5  ->  version 4 with tail recursion eliminated)
        CR .( ? ) KEY DUP EMIT


DUP '1' = [IF]  

-- Direct MODULA II copy, using local variables.
-- Unfortunately, the overhead for LOCAL is _very_ high here.

: }ack          RECURSIVE                       \ <s> <t> --- <n>
                LOCAL t  LOCAL s
                 s IF  t IF { s 1-  { s  t 1- }ack }ack
                       ELSE { s 1-        1        }ack
                      ENDIF
                 ELSE t 1+ 
                ENDIF ; PRIVATE

[THEN]


DUP '2' = [IF]

-- Using the system stack. 
-- Less readable to Forth neophites, but more efficient.

: }ack          RECURSIVE                       \ <s> <t> --- <n>
                ( T) >S  ( S) >S
                 S IF  
                       T IF { S 1-  { S>  S> 1- }ack  }ack  EXIT  
                      ENDIF
                            { S> 1-  -S  1 }ack  EXIT
                ENDIF
                -S S> 1+ ; PRIVATE

[THEN]


DUP '3' = [IF]

-- Only recommended for enthousiasts. The data stack is used, at the expense
-- of somewhat tricky code. 
-- On a 20 MHz T800 the code runs 4 seconds, so we're faster than Pascal 
-- on a 33 MHz '386, even without correcting for clock speed.

: }ack          RECURSIVE                       \ <s> <t> --- <n>
                ( t) >S  ( s=stack)
                DUP IF  
                       S IF { DUP 1-  { SWAP  S> 1- }ack  }ack EXIT  
                      ENDIF
                            { 1- -S  1 }ack  EXIT
                 ENDIF
                DROP S> 1+ ; PRIVATE

[THEN]

DUP '4' = [IF]

-- Again developed from the above. We do this for money, you know.
-- The register prevents costly `deep' stack operations like ROT .
-- Still no assembly language in sight.

-- 0 REGISTER top
0 VALUE top     ( REGISTERs don't exist in CHForth )

: }ack          RECURSIVE                       \ <s> <t> --- <n>
                OVER IF  DUP IF  TO top  DUP 1- SWAP  top 1- }ack  }ack EXIT  
                          ENDIF
                         { DROP 1-  1 }ack  EXIT
                  ENDIF
                NIP 1+ ; PRIVATE

[THEN]  

DUP '5' = [IF]

DOC
(*
 Another try, for the honor.
 Some assembly language, to eliminate tail recursion.
 We are now about 330% faster than with our original approach. And 2 hours 
 older.
 More REGISTERs don't help.
 On a T800 @ 20MHz this code clocks in at 2.672 seconds.
 Use the disassembler to see how it can be improved. The jumps prevent the
 optimizer from keeping the internal stack full, but undoubtedly you know 
 some tricks...
*)
ENDDOC


: again-2
        2 cs-roll postpone again ; immediate compile-only

: again-1
        1 cs-roll postpone again ; immediate compile-only

0 VALUE top     \ iForth has REGISTERs

: }ack          RECURSIVE       \ <s> <t> --- <n>
                                \ 4 ajw,  rpush,  is here... 
begin begin                     \ ... after this prelude
                OVER IF
                         DUP IF  
                                 TO top
                                 DUP 1-  { SWAP top 1- }ack  
                                    again-2     \ this is "}ack EXIT"
                          ENDIF
                          DROP 1- 1 again-1     \ this is "}ack EXIT"
                  ENDIF
                NIP 1+ ; PRIVATE

[THEN]  

'1' '6' WITHIN 0= [IF]  CR .( Beg your pardon?) ABORT
                [THEN]


-- Utility. Enlarge all stacks. Note two tForth peculiarities: Stacks grow up
-- and they need 8 CELLS for underflow!
-- To initialize ALL stacks, make sure both QUIT (resets return stack) _and_
-- ABORT (all other stacks) are executed! You can't put these commands all
-- on one line or in a definition (Sorry, _I_ can't).

        -- The test itself

: ACK(2,4)      CR ." Testing Ackerman(2,4)" 
                TIMER-RESET
                MAXITER 0 DO
                           { 2 4 }ack DROP 
                        LOOP
                CR .ELAPSED ;


INTERNAL SP0 RP0 FORTH - #CELLS #4000 > [IF]

-- Cannot run more than ACK(3,3) with the standard 256 word [return] stack!
-- You'll have to set up a main memory stack (==> no processes !) for this.
-- Advised: #20000 /STACKS
: ACK(3,9)      CR ." Ackerman(3,9) = " 
                TIMER-RESET
                 { 3 9 }ack DEC. 
                CR .ELAPSED ; 

[THEN]

: .help
        CR ." Enter: ACK(2,4) to compute Ackerman(2,4) 10,000 times." 
        CR ." If you are curious, the result is 11. "
        CR ." A 33 MHz 80386 needs 4.17 seconds, using Turbo Pascal 5.0 (64K stack, longints)" 
        CR ." ( 4.17*33/20 = 6.88 seconds)" CR
[ ?DEF ACK(3,9) ] [IF]
        CR ." Enter: ACK(3,9) to compute Ackerman(3,9) once." 
        CR ." For this test, the results are known for the ICC and Parsec's Par-C:"  CR
        CR ."   Par-C => 107 seconds (25 MHz T8)"
        CR ."   ICC   =>  40 seconds (25 MHz T8)"
        CR ."   TP5   =>  72 seconds (33 MHz i386)." \ CR
\       CR ." Note: for ACK(3,9) larger stacks are needed, use (only once!):"
\       CR ."        20000 /STACKS <cr> ABORT"
[THEN]
        CR ;

DEPRIVE

.help

                            \ (* End of Source *) /
