; mix.asm, some mixers and clippers for sb.

[BITS 32]
[GLOBAL _mixmonoloop]
[GLOBAL _mixstereoloop]
[GLOBAL _clip16bitloop]
[GLOBAL _clip8bitloop]
[EXTERN _pit]
[EXTERN _posil]
[EXTERN _posih]
[EXTERN _ma]
[EXTERN _deita]
[EXTERN _taul]
[EXTERN _rtaul]
[EXTERN _ltaul]
[EXTERN _vali]
[EXTERN _faddl]
[EXTERN _faddh]
[EXTERN _outputsamples]
[EXTERN _selektoori]

[SECTION .text]

 ; *********** mono mixer ************


_mixmonoloop:            ;mono mixer
   pushad                ;save everything

   mov ebp,[_pit]        ;ebp is the length

   mov edx,[_posih]      ;edx is the high part and ebx the low part of the
   mov ebx,[_posil]      ;32.32 fixed point position indicator.

   mov eax,[_ma]         ;destination position
   shl eax,2             ;it's a dword table
   add eax,[_vali]       ;destination buffer

   add edx,[_deita]      ;source (==sample data)

   mov ecx,[_taul]       ;djgpp aligns this to at least a 4 byte boundary,
   shr ecx,2             ;so we'll div by 4 here and mul it while indexing
                         ;(=register saved)
   mov edi,[_faddl]

; times ($$-$) & 3 nop    ;pad with NOPs to 4-byte boundary..

mixmonoloop_inner:       ;now the inner loop

   mov cl,byte [edx]     ;sample byte
   add ebx,edi           ;source position's low 32 bits
   mov esi,[ecx*4]       ;adjust the volume of the sample byte
   adc edx,[_faddh]      ;source position's high 32 bits
   add [eax],esi         ;poke to output buffer
   add eax,4             ;add destination pointer
   dec ebp               ;see if we're done
   jnz mixmonoloop_inner

   sub edx,[_deita]      ;sub source
   mov [_posih],edx      ;return the current position HIgh
   mov [_posil],ebx      ;LOw

   popad                 ;restore everything
   ret                   ;return

 ; *********** stereo mixer ************

 ;times ($$-$) & 3 nop   ;pad with NOPs to 4-byte boundary..

_mixstereoloop:
   pushad                ;save everything

   mov edi,[_pit]

   mov ebp,[_posih]      ;high word of the 32.32 fixed position
   mov ebx,[_posil]      ;low word

   mov eax,[_ma]         ;desti..
   shl eax,2             ;(buffer is 32-bit, so multiply with 4)
   add eax,[_vali]       ;..nation

   add ebp,[_deita];     ;add source data pointer to position high

   mov ecx,[_ltaul];     ;left channel's entry to the volume table
   mov edx,[_rtaul]      ;right channels's one.
   shr ecx,2
   shr edx,2

;times ($$-$) & 3 nop     ;pad with NOPs to 4-byte boundary..
   mov cl,byte [ebp]

mixstereoloop_inner:     ;let's mix.
   mov esi,[ecx*4]       ;adjust the volume for left channel
   add ebx,[_faddl]      ;source position's low 32 bits
   mov dl,cl             ;copy the sample byte for right channel
   adc ebp,[_faddh]      ;source position's high 32 bits
   add [eax],esi         ;poke left channel to output buffer
   mov esi,[edx*4]       ;adjust the volume for right channel
   mov cl,byte [ebp]     ;get the next sample byte
   add [eax+4],esi       ;poke right channel to output buffer
   add eax,8             ;increase destination position

   dec edi               ;continue?
   jnz mixstereoloop_inner
                         ;no
   sub ebp,[_deita]      ;subtract source data pointer
   mov [_posih],ebp      ;return the current position HIgh
   mov [_posil],ebx      ;LOw

   popad                 ;restore everything
   ret                   ;return

; *********** 16-bit clipper ************

_clip16bitloop:
   pushad                ;save everything
   push fs               ;even fs

   mov ax,[_selektoori]  ;dma buffer is in the low (below 1MB) memory
   mov fs,ax             ;and it has a selector of its own. destination.

   mov edx,[_pit]        ;number of elements to clip

   mov esi,[_vali]       ;source
   mov edi,[_posil]      ;destination position

   mov eax,[esi]         ;peek a signed dword from the buffer

clip16bitloop_inner:     ;ok, let's clip
   cmp eax,-32768
   jl prob1_underflow    ;too small, we have to clip
   cmp eax,32767
   jg prob2_overflow     ;too big, we have to clip
   mov [fs:edi],eax      ;everything is ok, just poke to buffer
   add esi,4             ;increase source pointer
   add edi,2             ;increase destination pointer
   mov eax,[esi]         ;peek a signed dword from the buffer
   dec edx               ;decrease length
   jnz clip16bitloop_inner ;0?
finitoc16bli:            ;yep
   pop fs                ; restore everything
   popad
   ret                   ;return

;times ($$-$) & 3 nop     ;pad with NOPs to 4-byte boundary..

prob1_underflow:         ;too small, i.e. less than -32768
   mov word [fs:edi],-32768 ;so output -32768
   add esi,4             ;inc source
   add edi,2             ;inc destination
   mov eax,[esi]         ;peek a signed dword from the buffer
   dec edx               ;dec length
   jnz clip16bitloop_inner ;done?
   jmp finitoc16bli      ;jep, goto finish.

;times ($$-$) & 3 nop     ;pad with NOPs to 4-byte boundary..
			 ;i wonder how djgpp deals with diz.

prob2_overflow:          ;too large (greater than 32767)
   mov word [fs:edi],32767 ;so use 32767
   add esi,4
   add edi,2
   mov eax,[esi]         ;peek a signed dword from the buffer
   dec edx
   jnz clip16bitloop_inner
   jmp finitoc16bli

; *********** 8-bit clipper ************
;times ($$-$) & 3 nop     ;pad with NOPs to 4-byte boundary..

_clip8bitloop:
   pushad
   push fs

   mov ax,[_selektoori]
   mov fs,ax

   mov edx,[_pit]

   mov esi,[_vali]
   mov edi,[_posil]

;times ($$-$) & 3 nop    ;pad with NOPs to 4-byte boundary..

clip8bitloop_inner:
   mov eax,[esi]
   sar eax,8             ;we're outputting 8-bit, so we'll have to
                         ;ignore the 8 most insignificant bits.
   cmp eax,127           ;now check for boundaries
   jg prob2_overflow_8bit;too big, clip it.

   add eax,128           ;add x,128 = sub x,-128 ~= cmp x,-128
   jl prob1_underflow_8bit ;this little "trick" does the signed->unsigned
   mov [fs:edi],al       ;conversion
   add esi,4
   inc edi
   dec edx
   jnz clip8bitloop_inner
finitoc8bli:
   pop fs
   popad
   ret

;times ($$-$) & 3 nop     ;pad with NOPs to 4-byte boundary..
			 ;i wonder how djgpp deals with diz.

prob1_underflow_8bit:    ;too small, so just output 0
   mov byte [fs:edi],0
   add esi,4
   inc edi
   dec edx
   jnz clip8bitloop_inner
   jmp finitoc8bli

;times ($$-$) & 3 nop     ;pad with NOPs to 4-byte boundary..
			 ;i wonder how djgpp deals with diz.

prob2_overflow_8bit:     ;too large, so use 255
   mov byte [fs:edi],255
   add esi,4
   inc edi
   dec edx
   jnz clip8bitloop_inner
   jmp finitoc8bli

[SECTION .data]
[SECTION .bss]

; end of file
