;;-*- Mode: Lisp; Package: CCL -*-
;;;
;;;   Copyright (C) 1994-2001 Digitool, Inc
;;;   This file is part of Opensourced MCL.
;;;
;;;   Opensourced MCL is free software; you can redistribute it and/or
;;;   modify it under the terms of the GNU Lesser General Public
;;;   License as published by the Free Software Foundation; either
;;;   version 2.1 of the License, or (at your option) any later version.
;;;
;;;   Opensourced MCL is distributed in the hope that it will be useful,
;;;   but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;   Lesser General Public License for more details.
;;;
;;;   You should have received a copy of the GNU Lesser General Public
;;;   License along with this library; if not, write to the Free Software
;;;   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
;;;


;(in-package "CCL")

(eval-when (:compile-toplevel :execute)
  (require "PPC-ARCH")
  (require "PPC-LAPMACROS"))

;;; %BIGNUM-REF needs to access bignums as obviously as possible, and it needs
;;; to be able to return 32 bits somewhere no one looks for real objects.
;;;
;;; The easiest thing to do is to store the 32 raw bits in two fixnums
;;; and return multiple values.

(defppclapfunction %bignum-ref ((bignum arg_y) (i arg_z))
  (vref32 imm0 bignum i imm1)
  (digit-h temp0 imm0)
  (digit-l temp1 imm0)
  (vpush temp0)
  (vpush temp1)
  (la temp0 8 vsp)                      ; ?? why not (mr temp0 vsp) before vpushing?
  (set-nargs 2)                         ; that doesn't make any difference.  And, in this case,
                                        ; we can get away without setting nargs (since the caller
                                        ; called us with 2 args, but that's horrible style.)
  (ba .SPvalues))



(defppclapfunction %bignum-ref-hi ((bignum arg_y) (i arg_z))
  (la imm1 arch::misc-data-offset i)
  (lhzx imm0 bignum imm1)
  (box-fixnum arg_z imm0)
  (blr))


(defppclapfunction %bignum-set ((bignum 0) (i arg_x) (high arg_y) (low arg_z))
  (compose-digit imm0 high low)
  (lwz arg_z bignum vsp)
  (vset32 imm0 arg_z i imm1)
  (la vsp 4 vsp)
  (blr))

;;; %ADD-WITH-CARRY -- Internal.
;;;
;;; This should be in assembler, and should not cons intermediate results.  It
;;; returns a 32bit digit (split in half) and a carry resulting from adding 
;;; together the a, b, and an incoming carry.
;;;



; this is silly 
(defppclapfunction %add-the-carry ((b-h arg_x) (b-l arg_y) (carry-in arg_z))
  (let ((a imm0)
        (b imm1)
        (temp imm2)
        (c imm3))    
    (compose-digit b b-h b-l)
    (unbox-fixnum c carry-in)
    (add b c b)
    (digit-h temp0 b)
    (digit-l temp1 b)
    (vpush temp0)
    (vpush temp1)
    (la temp0 8 vsp)
    (set-nargs 2)
    (ba .SPvalues)))




;;; %SUBTRACT-WITH-BORROW -- Internal.
;;;
;;; This should be in assembler, and should not cons intermediate results.  It
;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
;;; subtracting a possible incoming borrow.
;;;
;;; We really do:  a - b - 1 + borrow, where borrow is either 0 or 1.
;;; 

(defppclapfunction %subtract-with-borrow ((a-h 4) (a-l 0) (b-h arg_x) (b-l
arg_y) (borrow-in arg_z))
  (let ((a imm0)
        (b imm1)
        (temp imm2)
        (c imm3))
    (lwz temp0 a-h vsp)
    (lwz temp1 a-l vsp)
    (compose-digit a temp0 temp1)
    (compose-digit b b-h b-l)
    (unbox-fixnum c borrow-in)
    (li temp -1)
    (addc temp c temp)
    (subfe a b a)
    (addze c rzero)
    (box-fixnum c c)
    (digit-h temp0 a)
    (digit-l temp1 a)
    (vpush temp0)
    (vpush temp1)
    (vpush c)
    (la temp0 20 vsp)
    (set-nargs 3)
    (ba .SPvalues)))



(defppclapfunction %subtract-one ((a-h arg_y)(a-l arg_z))
  (let ((a imm0))
    (compose-digit a a-h a-l)
    (subi a a 1)
    (digit-h temp0 a)
    (vpush temp0)
    (digit-l temp0 a)
    (vpush temp0)
    (la temp0 8 vsp)
    (set-nargs 2)
    (ba .spvalues)))

;;; %MULTIPLY -- Internal.
;;;
;;; This multiplies two digit-size (32-bit) numbers, returning a 64-bit result
;;; split into two 32-bit quantities.
;;; Or, as fate would have it, into 4 16-bit quantities
;;;


; given 2 fixnums, returns product as 4 16 bit dohickies
(defppclapfunction %multiply-signed-fixnums ((x arg_y)(y arg_z))
  (let ((x-un imm0)
        (y-un imm1)
        (res-h imm2)
        (res-l imm3))
    (unbox-fixnum x-un x)
    (unbox-fixnum y-un y)
    (mullw res-l x-un y-un)
    (mulhw res-h x-un y-un)  ; vs mulhwu
    (digit-h temp0 res-h)
    (digit-l temp1 res-h)
    (digit-h temp2 res-l)
    (digit-l temp3 res-l)
    (vpush temp0)
    (vpush temp1)
    (vpush temp2)
    (vpush temp3)
    (set-nargs 4)
    (la temp0 16 vsp)
    (ba .SPvalues)))

;;; %MULTIPLY-AND-ADD  --  Internal.
;;;
;;; This multiplies x-digit and y-digit, producing high and low digits
;;; manifesting the result.  Then it adds the low digit, res-digit, and
;;; carry-in-digit.  Any carries (note, you still have to add two digits at a
;;; time possibly producing two carries) from adding these three digits get
;;; added to the high digit from the multiply, producing the next carry digit.
;;; Res-digit is optional since two uses of this primitive multiplies a single
;;; digit bignum by a multiple digit bignum, and in this situation there is no
;;; need for a result buffer accumulating partial results which is where the
;;; res-digit comes from.
;;; [slh] I assume that the returned carry "digit" can only be 0, 1 or 2


(defppclapfunction %multiply-and-add ((x-high 8)
				      (x-low 4)
				      (y-high 0)
				      (y-low arg_x)
				      (carry-in-high arg_y)
				      (carry-in-low arg_z))
  (let ((x imm0)
	(y imm1)
	(carry-in imm2)
	(lo imm3)
	(hi imm4))
    (compose-digit carry-in carry-in-high carry-in-low)
    (vpop temp0)
    (compose-digit y temp0 y-low)
    (vpop temp0)
    (vpop temp1)
    (compose-digit x temp1 temp0)
    (mullw lo x y)
    (mulhwu hi x y)
    (addc lo lo carry-in)
    (addze hi hi)
    (digit-h temp0 hi)
    (digit-l temp1 hi)
    (digit-h temp2 lo)
    (digit-l temp3 lo)
    (vpush temp0)
    (vpush temp1)
    (vpush temp2)
    (vpush temp3)
    (set-nargs 4)
    (la temp0 16 vsp)
    (ba .SPvalues)))

    
; return carry-hi carry-lo (= halves of prod-h) - no don't just store it at len
(defppclapfunction %multiply-and-add-loop ((Bignum 0)(res arg_x)(len arg_y) (x-box arg_z))
  (let ((x imm0)
        (idx imm1)
        (big temp0)
        ;(res temp1)
        (count temp2)
        (prod-h imm2)
        (prod-l imm3)
        (y imm4))
    (unbox-fixnum x x-box)
    (li idx arch::misc-data-offset)
    (li count 0)
    (lwz big bignum vsp)
    (li prod-h 0) ; init de carry
    @loop
    (lwzx y big idx)               ; get digit
    (mullw prod-l x y)             ; times x to prod-l
    (addc prod-l prod-l prod-h)    ; add last prod-h with carry out
    (mulhwu prod-h x y)            ; high times x to prod-h    
    (adde prod-h prod-h rzero)     ; add carry out to prod-h    
    (stwx prod-l res idx)    
    (addi count count '1)
    (cmpw count len)
    (addi idx idx '1)
    (blt @loop)
    (stwx prod-h res idx)
    (la vsp 4 vsp)
    (blr)))


;; multiply i'th digit of x by y and add to result starting at digit i
(defppclapfunction %multiply-and-add-harder-loop-2 ((x-ptr 4) (y-ptr 0)
                                                    (resptr arg_x)(residx arg_y) (count arg_z))  
  (let ((tem imm0)
        (y imm1)
        (prod-h imm2)
        (prod-l imm3)
        (x imm4)
        (xptr temp2)
        (yidx temp1)
        (yptr temp0))
    (lwz xptr x-ptr vsp)
    (addi tem residx arch::misc-data-offset)
    (lwzx x xptr tem)
    (lwz yptr y-ptr vsp)
    (li yidx 0) ; init yidx 0 
    (addc prod-h rzero rzero) ; init carry 0, mumble 0
    @loop
    (addi tem yidx arch::misc-data-offset)   ; get yidx
    (lwzx y yptr tem) 
    (mullw prod-l x y)
    (addc prod-l prod-l prod-h)
    (mulhwu prod-h x y) ; do we know for sure that this doesn't clobber carry?
    (adde prod-h prod-h rzero)
    (addi tem residx arch::misc-data-offset)
    (lwzx y resptr tem)    
    (addc prod-l prod-l y)
    (adde prod-h prod-h rzero)
    (stwx prod-l resptr tem)    
    (subic. count count '1)
    (addi residx residx '1)
    (addi yidx yidx '1)
    (bgt @loop)
    (addi tem residx arch::misc-data-offset)
    (stwx prod-h resptr tem)
    (la vsp 8 vsp)
    (blr)))



(defppclapfunction %logcount ((high arg_y) (low arg_z))
  (let ((arg imm0)
        (shift imm1)
        (temp imm2))
    (compose-digit arg high low)
    (mr. shift arg)
    (li arg_z 0)
    (if ne
      (progn
        @loop
        (la temp -1 shift)
        (and. shift shift temp)
        (la arg_z '1 arg_z)
        (bne @loop)))
    (blr)))

; return res
(defppclapfunction bignum-add-loop-2 ((aptr arg_x)(bptr arg_y) (result arg_z))
  (let ((idx imm0)
        (count imm1)
        (x imm2)
        (y imm3)        
        (len-a temp0)
        (len-b temp1)
        (tem temp2))
    (li idx arch::misc-data-offset)    
    (lwz imm4 arch::misc-header-offset aptr)
    (header-length len-a imm4)
    (lwz imm4 arch::misc-header-offset bptr)
    (header-length len-b imm4)
    ; make a be shorter one
    (cmpw len-a len-b)
    (li count 0)
    ; initialize carry 0
    (addc x rzero rzero)
    (ble @loop)
    ; b shorter - swap em
    (mr tem len-a)
    (mr len-a len-b)
    (mr len-b tem)
    (mr tem aptr)
    (mr aptr bptr)
    (mr bptr tem)    
    @loop
    (lwzx y aptr idx)
    (lwzx x bptr idx)    
    (addi count count '1)
    (cmpw count len-a)
    (adde x x y)
    (stwx x result idx)
    (addi idx idx '1)
    (blt @loop)
    ; now propagate carry thru longer (b) using sign of shorter    
    ;(SUBI imm4 idx '1) ; y has hi order word of a
    ;(lwzx y aptr imm4)
    (cmpw len-a len-b)
    (adde imm4 rzero rzero) ; get carry
    (srawi y y 31)  ; p.o.s clobbers carry 
    (addic imm4 imm4 -1)  ; restore carry
    (beq @l3)  ; unless equal
    @loop2
    (lwzx x bptr idx)
    (adde x x y)
    (stwx x result idx)
    (addi count count '1)
    (cmpw count len-b)
    (addi idx idx '1)
    (blt @loop2)
    ; y has sign of shorter - get sign of longer to x
    @l3
    (subi imm4 idx '1)
    (lwzx x bptr imm4)
    (adde imm4 rzero rzero) ; get carry
    (srawi x x 31)  ; clobbers carry 
    (addic imm4 imm4 -1)
    (adde x x y)
    (stwx x result idx)
    (blr)))

;; same as above but with initial a index and finishes
(defppclapfunction bignum-add-loop-+ ((init-a 0)(aptr arg_x)(bptr arg_y)(length arg_z))
  (let ((idx imm0)        
        (count imm1)
        (x imm2)
        (y imm3)
        (aidx imm4))
    (li idx arch::misc-data-offset)
    (lwz aidx init-a vsp)
    (addi aidx aidx arch::misc-data-offset)
    (li count 0)
    ; initialize carry 0
    (addc x rzero rzero)
    @loop
    (lwzx x aptr aidx)
    (lwzx y bptr idx)
    (adde x x y)
    (stwx x aptr aidx)
    (addi count count '1)
    (cmpw count length)
    (addi idx idx '1)
    (addi aidx aidx '1)
    (blt @loop)
    (lwzx x aptr aidx)  ; add carry into next one
    (adde x x  rzero)
    (stwx x aptr aidx)
    (la vsp 4 vsp)
    (blr)))



(defppclapfunction bignum-negate-loop-really ((big arg_x) (len arg_y) (result arg_z))
  (let ((idx imm0)
        (one imm1)
        (x imm2))
    (li idx arch::misc-data-offset)
    (li one '1)
    ; initialize carry 1
    (li x -1)
    (addic x x 1)
    @loop        
    ;(addi count count '1)    
    ;(cmpw count len)
    (subf. len one len)
    (lwzx x big idx)
    (not x x)
    (adde x x rzero)
    (stwx x result idx)    
    (addi idx idx '1)
    (bgt @loop)
    ; return carry
    (li x 0)
    (adde x x  rzero)
    (box-fixnum arg_z x)
    (blr)))

;; she do tolerate len = jidx
(defppclapfunction bignum-shift-left-loop ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (jidx arg_z))
  (let ((y imm0)
        (idx imm1)
        (bits imm2)
        (rbits imm3)
        (x imm4)
        (iidx temp0)
        (resptr temp1))
    (li iidx 0)
    (lwz bits nbits vsp)
    (lwz resptr result vsp)
    (unbox-fixnum bits bits)
    (subfic rbits bits 32)    
    ;(dbg)
    (lwz imm4 arch::misc-data-offset bignum)
    (slw imm4 imm4 bits)
    (la y (+ arch::misc-data-offset -4) jidx)  
    (stwx imm4 y resptr) 
     
    (cmpw len jidx)
    (beq @done)
    @loop
    (addi idx iidx arch::misc-data-offset)
    (lwzx x bignum idx)
    (srw x x rbits)
    (addi idx idx '1)
    (lwzx y bignum idx)
    (slw y y bits)
    (or x x y)
    (addi idx jidx arch::misc-data-offset)
    (stwx x resptr idx)
    (addi jidx jidx '1)    
    (cmpw jidx len)
    (addi iidx iidx '1)
    (blt @loop)    
    @done
    ; do first - lo order
       
    ; do last - hi order    
    (addi idx iidx arch::misc-data-offset)
    ;(dbg t)
    (lwzx y bignum idx)
    (sraw y y rbits)
    (addi idx len arch::misc-data-offset)
    (stwx y resptr idx)
    (la vsp 8 vsp)
    (blr)))



(defppclapfunction bignum-shift-right-loop-1 ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (iidx arg_z))
  (let ((y imm0)
        (idx imm1)
        (bits imm2)
        (rbits imm3)
        (x imm4)
        (jidx temp0)
        (resptr temp1))
    (li jidx 0)
    (lwz bits nbits vsp)
    (lwz resptr result vsp)
    (unbox-fixnum bits bits)
    (cmpw jidx len)
    (subfic rbits bits 32)    
    (bge @done)
    @loop
    (addi idx iidx arch::misc-data-offset)
    (lwzx x bignum idx)
    (srw x x bits)
    (addi idx idx '1)
    (lwzx y bignum idx)
    (slw y y rbits)
    (or x x y)
    (addi idx jidx arch::misc-data-offset)
    (stwx x resptr idx)
    (addi jidx jidx '1)    
    (cmpw jidx len)
    (addi iidx iidx '1)
    (blt @loop)
    @done
    (addi idx iidx arch::misc-data-offset)
    (lwzx x bignum idx)
    (sraw x x bits)
    (addi idx jidx arch::misc-data-offset)
    (stwx x resptr idx)
    (la vsp 8 vsp)
    (blr)))

(defppclapfunction bignum-subtract-loop ((a 16)(len-a 12)(sa 8)
                                         (b 4) (len-b 0) (sb arg_x)
                                         (result arg_y) (length arg_z))
  (let ((idx imm0)
        (count temp1)
        (x imm2)
        (y imm3)
        (aptr temp0)
        (tlen-a temp2)
        (bptr temp3)
        (sign-a imm1)
        (sign-b imm4)
        (tlen-b arg_x))
    (li idx arch::misc-data-offset)
    (li count 0)
    (lwz sign-a sa vsp)
    (unbox-fixnum sign-a sign-a)
    (lwz aptr a vsp)
    (lwz bptr b vsp)
    (lwz tlen-a len-a vsp)    
    (unbox-fixnum sign-b sb)  ; get arg_x to imm reg
    (lwz tlen-b len-b vsp)  ; tlen-b is arg_x
    ;(unbox-fixnum sign-b sb)
    ; initialize carry 1
    (li x -1)
    (addic x x 1)
    ;(addc x rzero rzero)  ; or zero?
    ; this could be faster - count down len-a and len-b and use more cr's
    @loop    
    (cmpw count tlen-a)
    (mr x sign-a)
    (bge @a1)
    (lwzx x aptr idx)
    @a1
    (cmpw count tlen-b)
    (mr y sign-b)
    ;(unbox-fixnum y sign-b) ; p.o.s clobbers carry
    (bge @b1)    
    (lwzx y bptr idx)
    @b1
    (subfe x y x)
    (stwx x result idx)
    (addi count count '1)
    (cmpw count length)
    (addi idx idx '1)
    (blt @loop)
    ; return carry
    (li x 0)
    (addze x  rzero)
    (box-fixnum arg_z x)
    (la vsp 20 vsp)
    (blr)))

(defppclapfunction bignum-compare-loop ((a arg_x) (b arg_y) (len arg_z))
  (let ((x imm0)
        (y imm1)
        (idx imm2))
    (addi idx len (- arch::misc-data-offset 4))
    @loop
    (lwzx x a idx)
    (lwzx y b idx)
    (cmpl :cr1 x y)
    (subic. len len '1)
    (subi idx idx '1)
    (bgt :cr1 @gt)
    (blt :cr1 @lt)    
    (bgt :cr0 @loop)
    (li arg_z 0)   ; was all =
    (blr)
    @gt
    (li arg_z '1)
    (blr)
    @lt
    (li arg_z '-1)
    (blr)))

;; returns number of bits in digit-hi,digit-lo that are sign bits
;; 32 - digits-sign-bits is integer-length

(defppclapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
  (rlwinm. imm1 hi (- 16 arch::fixnumshift) 0 15)
  (rlwimi imm1 lo (- 32 arch::fixnumshift) 16 31)
  (not imm1 imm1)
  (blt @wasneg)
  (not imm1 imm1)
  @wasneg
  (cntlzw imm1 imm1)
  (box-fixnum arg_z imm1)
  (blr))

(defppclapfunction bignum-logtest-loop ((count arg_x) (s1 arg_y) (s2 arg_z))  
  (addi imm1 rzero arch::misc-data-offset)
  @loop
  (lwzx imm2 s1 imm1)
  (lwzx imm3 s2 imm1)
  (and. imm2 imm3 imm2)  
  (addi imm1 imm1 4)
  (bne @true)
  (subic. count count 4)
  (bgt  @loop)
  (mr arg_z rnil)
  (blr)
  @true
  (addi arg_z rnil arch::t-offset)
  (blr))

(defppclapfunction bignum-not-loop ((count arg_x) (s1 arg_Y) (dest arg_z))
  ;(lwz imm0 count vsp)
  (addi imm1 rzero arch::misc-data-offset)
  @loop
  (lwzx imm2 s1 imm1)
  (not imm2 imm2)
  (subic. count count 4)
  (stwx imm2 dest imm1)
  (addi imm1 imm1 4)
  (bgt @loop)
  @out  
  (blr))

(defppclapfunction bignum-and-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
  (lwz imm0 count vsp)
  (addi imm1 rzero arch::misc-data-offset)
  @loop
  (lwzx imm2 s1 imm1)
  (lwzx imm3 s2 imm1)
  (and imm2 imm3 imm2)
  (subic. imm0 imm0 4)
  (stwx imm2 dest imm1)
  (addi imm1 imm1 4)
  (bgt @loop)
  @out
  (la vsp 4 vsp)
  (blr))

(defppclapfunction bignum-andc2-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
  (lwz imm0 count vsp)
  (addi imm1 rzero arch::misc-data-offset)
  @loop
  (lwzx imm2 s1 imm1)
  (lwzx imm3 s2 imm1)
  (andc imm2 imm2 imm3)
  (subic. imm0 imm0 4)
  (stwx imm2 dest imm1)
  (addi imm1 imm1 4)
  (bgt @loop)
  @out
  (la vsp 4 vsp)
  (blr))

(defppclapfunction bignum-andc1-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
  (lwz imm0 count vsp)
  (addi imm1 rzero arch::misc-data-offset)
  @loop
  (lwzx imm2 s1 imm1)
  (lwzx imm3 s2 imm1)
  (andc imm2 imm3 imm2)
  (subic. imm0 imm0 4)
  (stwx imm2 dest imm1)
  (addi imm1 imm1 4)
  (bgt @loop)
  @out
  (la vsp 4 vsp)
  (blr))

(defppclapfunction digit-lognot-move ((index arg_x) (source arg_y) (dest arg_z))
  (let ((scaled-index imm1))
    (vref32 imm0 source index scaled-index) ; imm1 has c(index) + data-offset
    (not imm0 imm0)
    (stwx imm0 dest scaled-index)
    (blr)))

; if dest not nil store unboxed result in dest(0), else return boxed result
(defppclapfunction fix-digit-logandc2 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
  (let ((w1 imm0)
        (w2 imm1))
    (unbox-fixnum  w1 fix)
    (lwz w2 arch::misc-data-offset big)
    (cmpw dest rnil)
    (not w2 w2)
    (and w1 w1 w2)
    (bne @store)
    (box-fixnum arg_z w1)
    (blr)
    @store
    (stw w1 arch::misc-data-offset dest)
    (blr)))



(defppclapfunction fix-digit-logand ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
  (let ((w1 imm0)
        (w2 imm1))
    (unbox-fixnum  w1 fix)
    (lwz w2 arch::misc-data-offset big)
    (cmpw dest rnil)
    (and w1 w1 w2)
    (bne @store)
    (box-fixnum arg_z w1)
    (blr)
    @store
    (stw w1 arch::misc-data-offset dest)
    (blr)))



(defppclapfunction fix-digit-logandc1 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
  (let ((w1 imm0)
        (w2 imm1))
    (unbox-fixnum  w1 fix)
    (lwz w2 arch::misc-data-offset big)
    (cmpw dest rnil)
    (not w1 w1)
    (and w1 w1 w2)
    (bne @store)
    (box-fixnum arg_z w1)
    (blr)
    @store
    (stw w1 arch::misc-data-offset dest)
    (blr)))

(defppclapfunction bignum-ior-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
  (lwz imm0 count vsp)
  ;(cmpw imm0 rzero)
  (addi imm1 rzero arch::misc-data-offset)
  ;(beq @out)
  @loop
  (lwzx imm2 s1 imm1)
  (lwzx imm3 s2 imm1)
  (or imm2 imm3 imm2)
  (subic. imm0 imm0 4)
  (stwx imm2 dest imm1)
  (addi imm1 imm1 4)
  (bgt @loop)
  @out
  (la vsp 4 vsp)
  (blr))

(defppclapfunction bignum-xor-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
  (lwz imm0 count vsp)
  (addi imm1 rzero arch::misc-data-offset)
  @loop
  (lwzx imm2 s1 imm1)
  (lwzx imm3 s2 imm1)
  (xor imm2 imm3 imm2)
  (subic. imm0 imm0 4)
  (stwx imm2 dest imm1)
  (addi imm1 imm1 4)
  (bgt @loop)
  @out
  (la vsp 4 vsp)
  (blr))

#+nomore
(defppclapfunction try-guess-loop-1 ((guess-h 8)(guess-l 4)(len-y 0)
                                     (xidx arg_x) (xptr arg_y) (yptr arg_z))
  (let ((guess imm0)
        (carry imm1)
        (y imm2)
        (x imm2)
        (prod-l imm3)
        (prod-h imm4)
        (tem imm4)
        (yidx temp0)
        (end-y temp1)
        (carry-bit temp2))
    (lwz x guess-h vsp)
    (lwz tem guess-l vsp)
    (compose-digit guess x tem)
    (lwz end-y len-y vsp)
    (li yidx 0)
    (li carry 0) 
    (li carry-bit '1)
    @loop
    ; multiply guess by ydigit, add carry to lo, hi is new carry
    ; then get an xdigit subtract prod-lo from it and store result in x (remember carry)
    (addi tem yidx arch::misc-data-offset)   ; get yidx
    (lwzx y yptr tem)
    (mullw prod-l guess y)
    (mulhwu prod-h guess y)    
    (addc prod-l prod-l carry) 
    (adde carry prod-h rzero)
    ; get back saved carry
    (li tem '-1)
    (addc tem carry-bit tem)
    (addi tem xidx arch::misc-data-offset)
    (lwzx x xptr tem)    
    (subfe x prod-l x)        
    (stwx x xptr tem)
    ; save carry
    (adde prod-l rzero rzero)
    (box-fixnum carry-bit prod-l)
    (addi yidx yidx '1)
    (cmpw yidx end-y)
    (addi xidx xidx '1)
    (blt @loop)
    ; finally subtract carry from last x digit
    @done
    (li prod-l '-1)  ; get back saved carry again - box clobbered it?
    (addc prod-l carry-bit prod-l)
    (addi tem xidx arch::misc-data-offset) ; maybe still there - nope
    (lwzx x xptr tem)
    (subfe x carry x)
    (stwx x xptr tem)
    (la vsp 12 vsp)
    (blr)))

;; x0 is at index, x1 at index-1, x2 at index-2
;; y1 is at index, y2 at index-1
;; this doesnt help much
(defppclapfunction truncate-guess-loop ((guess-h 8)(guess-l 4)(x 0)
                                        (xidx arg_x)(yptr arg_y) (yidx arg_z))
  (let ((guess imm0)
        (y1 imm1)
        (y2 imm1)
        (gy1-lo imm2) ; look out below
        (gy1-hi imm2)
        (gy2-lo imm2)
        (gy2-hi imm2)
        (xptr temp0)
        (m imm3)
        (tem imm4)
        (y1-idx 28)
        (y2-idx 24)
        (x0-idx 20)
        (x1-idx 16)
        (x2-idx 12))
    (stwu tsp -32 tsp)
    (stw tsp 4 tsp)
    (lwz y1 guess-h vsp)
    (lwz tem guess-l vsp)
    (compose-digit guess y1 tem)
    (addi tem yidx arch::misc-data-offset)
    (lwzx y1 yptr tem)
    (stw y1 y1-idx tsp)
    (subi tem tem 4)
    (lwzx y2 yptr tem)
    (stw y2 y2-idx tsp)
    (lwz xptr x vsp)
    (addi tem xidx arch::misc-data-offset)
    (lwzx y1 xptr tem) ; its x0
    (stw y1 x0-idx tsp)
    (subi tem tem 4)
    (lwzx y1 xptr tem)
    (stw y1 x1-idx tsp)
    (subi tem tem 4)
    (lwzx y1 xptr tem)
    (stw y1 x2-idx tsp)
    @loop
    (lwz y1 y1-idx tsp)     ; get y1
    (mullw gy1-lo guess y1)
    (lwz m x1-idx tsp)      ; get x1
    (subc m m gy1-lo)      ; x1 - gy1-lo => m
    (mulhwu gy1-hi guess y1)
    (lwz tem x0-idx tsp)    ; get x0
    (subfe. tem gy1-hi tem)      ; - val not used just cr
    (lwz y2 y2-idx tsp)     ; get y2
    (mulhwu gy2-hi guess y2)   ; does it pay to do this now even tho may
not need?
    (bne @done)
    (cmpl :cr0 gy2-hi m)       ; if > or = and foo then more - L means
logical means unsigned
    (blt @done)           ; if < done
    (bne @more)           ; if = test lo
    (mullw gy2-lo guess y2)
    (lwz tem x2-idx tsp) ; get x2
    (cmpl :cr0 gy2-lo tem)
    (ble @done)
    @more
    (subi guess guess 1)
    (b @loop)
    @done
    (digit-h temp0 guess)
    (vpush temp0)
    (digit-l temp0 guess)
    (vpush temp0)
    (la temp0 20 vsp)
    (lwz tsp 0 tsp)
    (set-nargs 2)
    (ba .spvalues)))

(defppclapfunction normalize-bignum-loop ((sign arg_x)(res arg_y)(len arg_z))
  (let ((idx imm0)
        (usign imm1)
        (val imm2))      
    (unbox-fixnum usign sign)
    (cmpwi len 0)
    (addi idx len (- arch::misc-data-offset 4))  
    (beqlr) ; huh - can this ever happen?
    @loop
    (lwzx val res idx)
    (cmpw  val usign)    
    (subi idx idx '1)
    (bne @neq)    
    (subic. len len '1)
    (bgt @loop)
    ; fall through - its all sign - return 1
    (li arg_z '1)
    (blr)
    @neq
    (rlwinm usign usign 0 0 0) ; hi bit
    (rlwinm val val 0 0 0)
    (cmpw usign val)  ; is hi bit = sign, if so then done   
    (beqlr)
    (addi len len '1) ; if not, need 1 more
    (blr)))

(defppclapfunction %normalize-bignum-2 ((fixp arg_y)(res arg_z))
  (let ((idx imm0)
        (usign imm1)
        (val imm2)
        (len arg_x)
        (oldlen temp0))
    (lwz imm4 (- arch::fulltag-misc) res)
    (header-length len imm4)
    (cmpwi len 0)
    (mr oldlen len)
    (addi idx len (- arch::misc-data-offset 4))  
    (beqlr) ; huh - can this ever happen?
    (lwzx val res idx) ; high order word
    (srawi usign val 31) ; get sign
    @loop
    (lwzx val res idx)
    (cmpw  val usign)    
    (subi idx idx '1)
    (bne @neq)    
    (subic. len len '1)
    (bgt @loop)
    ; fall through - its all sign - return 1
    (li len '1)
    (rlwinm usign usign 0 0 0) ; hi bit
    (b @more)
    @neq
    (rlwinm usign usign 0 0 0) ; hi bit
    (rlwinm val val 0 0 0)
    (cmpw usign val)  ; is hi bit = sign, if so then done   
    (beq @more)
    (addi len len '1) ; if not, need 1 more
    (b @big)
    @more
    (cmpw :cr1 fixp rnil)
    (cmpwi len '1)
    (beq :cr1 @big)  ; dont return fixnum
    (bgt @big)
    ;; stuff for maybe fixnum
    ;(dbg t)
    (lwz val arch::misc-data-offset res)
    (rlwinm imm4 val 0 0 2) ; hi 3 bits same? - we assume fixnumshift is 2
    (srawi usign usign 2)
    (cmpw usign imm4)
    (bne @big)    
    (box-fixnum arg_z val)
    (blr)
    @big
    (cmpw oldlen len)
    (beqlr) ; same length - done
    (li imm4 arch::subtag-bignum) ; set new length
    (rlwimi imm4 len (- arch::num-subtag-bits arch::fixnumshift) 0 (- 31 arch::num-subtag-bits))
    (stw imm4 arch::misc-header-offset res)
    ; 0 to tail if negative
    (cmpwi usign 0)
    (beqlr) 
     ; zero from len inclusive to oldlen exclusive
    ;(dbg t)
    (addi idx len arch::misc-data-offset)
    @loop2
    (stwx rzero idx res)
    (addi len len '1)
    (cmpw len oldlen)
    (addi idx idx '1)
    (blt @loop2)
    (blr)))

(defppclapfunction %count-digit-leading-zeros ((high arg_y) (low arg_z))
  (compose-digit imm0 high low)
  (cntlzw imm0 imm0)
  (box-fixnum arg_z imm0)
  (blr))

(defppclapfunction %count-digit-trailing-zeros ((high arg_y) (low arg_z))
  (compose-digit imm0 high low)
  (neg imm1 imm0)
  (and imm0 imm0 imm1)
  (cntlzw imm0 imm0)
  (subfic imm0 imm0 31)
  (box-fixnum arg_z imm0)
  (blr))


(defppclapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
  (let ((ndigits arg_x)
	(nbits arg_y)
	(digit imm0)
	(ptr imm1))
    (li ptr arch::misc-data-offset)
    (li ndigits '-32)
    @next
    (lwzx digit bignum ptr)
    (cmpwi digit 0)
    (la ptr 4 ptr)
    (addi ndigits ndigits '32)
    (beq @next)
    (neg ptr digit)
    (and digit digit ptr)
    (cntlzw digit digit)
    (subfic digit digit 31)
    (box-fixnum nbits digit)
    (add arg_z nbits ndigits)
    (blr)))

    
    
; End of ppc-bignum.lisp
