;"basic" XSD sound mixer with plain DAC support
; for PC Speaker


;------------------------------------------------------------------------------
; SOUND FILE INPUT:
; 8bit "not compressed" VOC file decoder
; Yeah! RIFF WAV is the future but VOC files
; are supported by nearly all applications and are easier to decode

set_comp macro
           lodsb
           or al,al
           mov al,2 ; set compressed_data error value (won't change flags)
           jne voc_end
         endm
           
set_sample macro
             xor eax,eax
             mov ebx,256
             lodsb            ; get sample rate byte
             xor edx,edx
             sub ebx,eax
             mov eax,1000000
             div ebx
             ; eax = samples/sec
             mov voc_rate,eax
             mov ebx,eax
             xor edx,edx
             mov eax,1000000000 ; 1 billion nanoseconds
             div ebx
             ; eax = voc time period in nanoseconds
             mov voc_nano,eax
           endm
           
blitquiet macro
            ; ecx =ctr
            ; convert silence from voc_rate to play_rate
            ; using the all time high bresenham line drawing algorithm
            ; Yeah! Bres is useful for every kind of linear scaling!!!!
            mov ebp,voc_nano
            mov edx,play_nano ; accumulator
            cmp ebp,edx
            jb play_big
voc_big:    ; playback rate is faster
            xor eax,eax
            inc ecx
            lea ebx,[ebp*2]
            shl edx,1
            neg ebp
            jmp short vzaap
vthresh:    sub ebp,ebx
vzaap:      dec ecx
            je vee_end
vzoop:      add ebp,edx
            jge vtresh
            stosb
            jmp short vzoop
play_big:   xchg ebp,edx
            xor eax,eax
            inc ecx
            lea ebx,[ebp*2]
            shl edx,1
            neg ebp
            jmp short pzaap
pthresh:    sub ebp,ebx
pzaap:      stosb
            dec ecx
            je vee_end
pzoop:      add ebp,edx
            jge ptresh
            jmp short pzoop
vee_end:
            endm

blitsound macro
            ; ecx =ctr
            ; convert silence from voc_rate to play_rate
            ; using the all time high bresenham line drawing algorithm
            ; Yeah! Bres is useful for every kind of linear scaling!!!!
            mov ebp,voc_nano
            mov edx,play_nano ; accumulator
            cmp ebp,edx
            jb splay_big
svoc_big:   ; playback rate is faster
            xor eax,eax
            inc ecx
            lea ebx,[ebp*2]
            shl edx,1
            neg ebp
            jmp short svzaap
svthresh:   sub ebp,ebx
svzaap:     dec ecx
            je svee_end
            lodsb
svzoop:     add ebp,edx
            jge svtresh
            stosb
            jmp short svzoop
splay_big:  xchg ebp,edx
            xor eax,eax
            inc ecx
            lea ebx,[ebp*2]
            shl edx,1
            neg ebp
            jmp short spzaap
spthresh:   sub ebp,ebx
spzaap:     movsb
            dec ecx
            je svee_end
spzoop:     add ebp,edx
            jge sptresh
            inc esi
            jmp short spzoop
svee_end:
            endm
            
          
VocPackets  dd offset voc_end, offset voc_data, offset voc_cont
            dd offset quiet, offset marker, offset ascii
            dd offset rep_start, offset rep_end, offset extend

decoder: mov ebx,0
decode:  lodsd
        mov bl,al
        cmp al,9
        jnb voc_end
        jmp [ebx*4+ VocPackets]

rep_end: mov al,1 ; rep end without rep_start if it is not matched
voc_end: ret

rep_start:
        lodsw
        push esi ; inserisci puntatore
reploop:
        mov esi,[esp]  ; ricarica puntatore
        push eax
        call decode
        pop eax
        dec ax
        jne reploop
        add esp,4 ; rimuovi puntatore        
        jmp decode
        
voc_data:
        mov ecx,-2
        shr eax,8
        add ecx,eax
        set_sample
        set_comp
sblit:
        blitsound
        jmp decoder
        
voc_cont:
        shr eax,8
        mov ecx,eax
        jmp sblit
        
quiet:
        movzx eax, word ptr [esi]
        mov ecx,1
        add esi,2
        add ecx,eax
        set_sample
        blitquiet
        jmp decoder
        
extend: mov al,3 ; extended voc file
        ret        
        
marker:
        add esi,2
        jmp decode
        
ascii:
        shr eax,8
        add esi,eax
        jmp decode
        
vocstamp db 'Creative Voice File',1Ah

Voc2PCM:
        ; in:
        ; esi = start of VOC data to translate
        ; edi = destination for RAW data
        ; out:
        ; al = error code (0 == no errors)
        ; ebx = RAW datablock start (first dword is equal to data size)
        ; edi = lenght of sound data
        push edi ; save data start
        add edi,4
        movzx ebx,word ptr [esi+14h]
        mov edx,esi
        mov ecx,0
        add esi,ebx        
        mov ebx, offset  vocstamp
votest: mov eax,[edx]
        cmp eax,[ebx]
        jne eend
        add edx,4
        add ebx,4
        inc ecx
        jmp votest
eend:   cmp ecx,5
        jne novoc
        call decoder
        pop ebx ; restore data start
        sub edi,ebx
try_dword:
        test edi,03h
        jz is_dworded
        mov byte ptr [ebx+edi],0
        inc edi
        jmp short try_dword
is_dworded: ; sound data is dword rounded
        sub edi,4 ; cut header lenght
        mov [ebx],edi  ; store lenght of data excluding header
        ; al is zero (value transmitted from decoder)
        ret                
novoc:
        add esp,4 ; remove pointer to start of voc file
        mov al,4 ; not a voc file
        ret        


;----------------------------------------------------------------------------
; SOFTCHANNEL MIXER
;

        align dword
        
; this is the "no sound" default block  (the "sound of silence" eh! eh!)
SILENCE_LEN=4096
snd_silence dd 4096 dup(0)


reset_softchannels:
        mov edi,offset _XSD_SCHANNELS
        mov ecx,SCHANNELS
softinit:
        
        mov dword ptr [edi+wt_sfeed],offset autosound
        mov dword ptr [edi+wt_sdata],0
        add edi,softchannel_size
        dec ecx
        jne softinit
        ret

autosound: ; autoselect next sound to play
           ; input:
           ;         esi== current sdac entry
           ; output:
           ;         eax== lenght of next sample to play
           ; can modify edx
        mov edx,[esi+wt_sdata]
        or edx,edx
        jz silence_feed
        ; sound feed
        mov eax,[edx]   ; lenght
        mov edx,[edx+4] ; pointer to samples
        mov [esi],eax          ; set new sample to play
        mov [esi+wt_ptr],edx  ;
        ret
silence_feed:
        mov dword ptr [esi],SILENCE_LEN
        mov dword ptr [esi+wt_ptr],offset snd_silence
        mov esi,SILENCE_LEN
        ret

;----------------------------------------------------------------------------
       ; 4 CHANNEL SOUND MIXER
blk_len dd 0 ; lenght of remaining block to mix
mix_len dd 0 ; lengh of current "mix run" to mix

Mixer: ; edi= code32 relative offset of block to send next
       ; esi= base of 4channel descriptors we mix

        ; set initial sizes
        mov eax,_XSD_FEEDSIZE
        mov blk_len,eax
        ; calculate lenght of current "mixage run"
        ; ( stop filling block at the first ending voice)
goomix: push esi
        mov ebp,blk_len
        mov ebx,4
fndlen: mov eax,[esi+wt_len]    ; check the "mix run" size
        or eax,eax
        jnz feedfilled
        call dword ptr [edi+wt_sfeed]
feedfilled:
        cmp ebp,eax
        jb nochange
dochange:        
        mov eax,ebp
nochange:
        add esi,softchannel_size
        dec ebx
        jne fndlen

        mov esi,[esp] ; reload esi with previous value
                      ; but leave it on stack
        ; ecx = lenght of continous run to mix IN DWORDS
        ;       ( 8bit:  mono = 4 samples,  stereo= 2 samples
        ;         16bit: mono = 2 samples,  stereo= 1 sample)
        mov mix_len,ecx
        mov edx,[esi+(wt_ptr+softchannel_size)]
        mov ebx,[esi+(wt_ptr+2*softchannel_size)]
        mov ecx,[esi+(wt_ptr+2*softchannel_size)]
        mov esi,[esi+wt_ptr]
        call SOFT_MIXER  ; still to refine this
        mov ebp,esi
        pop esi
        ; update softchannel pointers
        mov [esi+(wt_ptr+softchannel_size)],edx
        mov [esi+(wt_ptr+2*softchannel_size)],ebx
        mov [esi+(wt_ptr+2*softchannel_size)],ecx
        mov [esi+wt_ptr],ebp ; previous pointer inside esi
        mov eax,mix_len
        ; update softchannel counters
        sub [esi+(wt_len+softchannel_size)],eax
        sub [esi+(wt_len+2*softchannel_size)],eax
        sub [esi+(wt_len+2*softchannel_size)],eax
        sub [esi+wt_len],eax
        sub blk_len,eax
        jnbe goomix
        add esi,(4*softchannel_size) ; move to next channel block
        ; mixage completed for this 4-channel block, and ready for next
        ret

;;;;;;;;;;;;;;;;;;;;;;;;;;; SOUND DATA MIXERS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ; MIX_CUT
mix_cut:; mixer for "prealigned"   8bit-->6bit
        ;                       or 16bit-->14bit
        ; digital samples
        ; without volume controls
        lea esi,[esi+ebp*4]
        lea edx,[edx+ebp*4]
        lea ebx,[ebx+ebp*4]
        lea ecx,[ecx+ebp*4]
        lea edi,[edi+ebp*4]
        neg ebp
miixc:
        mov eax,[esi+ebp*4]   ; On 386P 3.00 you will see a
        add eax,[edx+ebp*4]   ; terrific routine capable to
        add eax,[ebx+ebp*4]   ; mix UP TO 32 sound channels in a single loop!
        add eax,[ecx+ebp*4]   ; <it will use a terrific auto-compiler
        mov [edi+ebp*4],eax   ;  derived from the "autolink" experiments>
        inc ebp
        jne miixc
        ret

; MIX8
; Ultra-optimized mixer for "uncutted" 8bit sound samples.
; I carefully profiled everything, but i'm still not satisfied
; because it greatly depends on the "mixer load" and what kind
; of samples are played.
; Notice i interleaved instructions for maximum 486/Pentium performance.

        ; low byte overrun
top1_1:
        add ah,[edx+ebp*4+1]
        jc topp
top1_2:
        add ah,[ebx+ebp*4+1]
        jc topp
top1_3
        add ah,[ecx+ebp*4+1]
        jc topp
        mov al,0FFh
        jmp short go_two

        ; high byte overrun
top2_1:
        add al,[edx+ebp*4]
        jc topp
top2_2:
        add al,[ebx+ebp*4]
        jc topp
top2_3
        add al,[ecx+ebp*4]
        jc topp
        mov ah,0FFh
        jmp short go_two

topp:   mov ax,[edi+ebp*4+2]            ; load high word &
        mov word ptr [edi+ebp*4],0FFFFh ; store "overflowed" low word

        add al,[edx+ebp*4+2]
        jc top3_1
        add ah,[edx+ebp*4+3]
        jc top4_1

        add al,[ebx+ebp*4+2]
        jc top3_2
        add ah,[ebx+ebp*4+3]
        jc top4_2

        add al,[ecx+ebp*4+2]
        jc top3_3
        add ah,[ecx+ebp*4+3]
        jc top4_3
        mov [edi+ebp*4+2],ax
        inc ebp
        jne mixx_8
        ret

mix_8:  ; mixer entry point is here  <------------
        lea esi,[esi+ebp*4]
        lea edx,[edx+ebp*4]
        lea ebx,[ebx+ebp*4]
        lea ecx,[ecx+ebp*4]
        lea edi,[edi+ebp*4]
        neg ebp
mixx_8:
        mov ax,[esi+ebp*4]  ; load two samples

        add al,[edx+ebp*4]
        jc top1_1
        add ah,[edx+ebp*4+1]
        jc top2_1

        add al,[ebx+ebp*4]
        jc top1_2
        add ah,[ebx+ebp*4+1]
        jc top2_2

        add al,[ecx+ebp*4]
        jc top1_3
        add ah,[ecx+ebp*4+1]
        jc top2_3
        ; COMPLETED PROCESSING OF FIRST WORD
go_two: mov [edi+ebp*4],ax    ; store low word &
        mov ax,[edi+ebp*4+2]  ; load high word
        ; START PROCESSING NEXT WORD
        add al,[edx+ebp*4+2]
        jc top3_1
        add ah,[edx+ebp*4+3]
        jc top4_1

        add al,[ebx+ebp*4+2]
        jc top3_2
        add ah,[ebx+ebp*4+3]
        jc top4_2

        add al,[ecx+ebp*4+2]
        jc top3_3
        add ah,[ecx+ebp*4+3]
        jc top4_3
        mov [edi+ebp*4+2],ax
        inc ebp
        jne mixx_8
        ret

        ; high word overrun
topk:
        mov word ptr [edi+ebp*4+2],0FFFFh
        inc ebp
        jne mixx_8
        ret

        ; high word, low byte overrun
top3_1:
        add ah,[edx+ebp*4+3]
        jc topk
top3_2:
        add ah,[ebx+ebp*4+3]
        jc topk
top3_3
        add ah,[ecx+ebp*4+3]
        jc topk
        mov al,0FFh
        inc ebp ; increment ahead, keep pumping Pentium's pipelines to the max
        mov [edi+ebp*4+2],ax
        jne mixx_8
        ret

        ; high word, high byte overrun
top4_1:
        add al,[edx+ebp*4+2]
        jc topk
top4_2:
        add al,[ebx+ebp*4+2]
        jc topk
top4_3
        add al,[ecx+ebp*4+2]
        jc topk
        mov ah,0FFh
        inc ebp ; obvious pentium optmization
        mov [edi],eax
        jne mixx_8
        ret

;;;;;;;;;;;;;;;;;;;;;;;;;; PANNING CONTROL   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; volume panning table:
; every table entry uses 16 bytes subdivided in 4 dwords
;       shift_volume   first byte is right_volume_shift, second is the left one
;       right_volume   ptr right volume lookup table
;       left_volume    ptr left volume lookup table
;       mask_volume    mask for "collective shift" when performing volume ctrl

align dword
stable dd 0
rtable dd 0
ltable dd 0
mtable dd ((256*4)-3) dup(0)

; "self modify" dummy values
DMODIFY=12345678h
SMODIFY=17

; panning control & mono-->stereo expansion

pcontrol: ;edi=buffer ptr (only UPPER half has been filled)
          ;ebp=buffer size in DWORDS
          ;esi=sounchannel ptr
          ; SELF-MODIFYING CODE ALLERT!!!!!!!!!!
          push esi
          movzx ebx,byte ptr [esi+wt_pan]
          add ebx,ebx
          mov ecx,[ebx*8+stable]
          mov esi,[ebx*8+rtable]
          mov eax,[ebx*8+ltable]
          lea edi,[edi+ebp*4] ; move to end of buffer
          neg ebp ; negate count  (we 'll UP-count)
          ; ok! Self Modify!
          mov ds:(2+offset shif1),ch
          mov ds:(2+offset shuf1),cl
          mov ds:(2+offset shif2),ch
          mov ds:(2+offset shuf2),cl
          mov ds:(2+offset mou1),eax
          mov ds:(2+offset mou2),eax
          xor ebx,ebx
          xor edx,edx
pctrl:    xor ecx,ecx
          mov ax,[edi+ebp*2] ; fetch two MONO samples
          mov bl,ah
          mov dl,al
          mov cl,bl
          and eax,255
mou1:     mov al,[eax+DMODIFY]
          mov ah,[esi+edx*1]
mou2:     mov cl,[ecx+DMODIFY]
shif1:    shr al,SMODIFY
          mov ch,[esi+ebx*1]
shuf1:    shr ah,SMODIFY
shif2:    shr cl,SMODIFY
          mov [edi+ebp*4],ax
shuf2:    shr ch,SMODIFY
          mov [edi+ebp*4+2],cx
          inc ebp
          jne pctrl
          pop esi
          ret

pramp:  ; PANNING RAMP
        mov ebx,buffer_size
        mov edi,buffer_start
pan_step:
        mov ecx,[esi+wt_panstep]
pan_it:
        mov dx,[esi+wt_pan]
        add dx,[esi+wt_paninc]
        mov [esi+wt_pan],dx
        cmp dx,[esi+wt_panend]
        je  pan_stop
        jnb pan_overshoot
paned:
        sub ebx,ecx
        je plimit
        jb poverflow
        mov ebp,ecx
pvuala:
        push ecx
        push ebx
        call pcontrol ; perform one step
        pop ebx
        pop ecx
        jmp short pan_it
plimit: ret ; buffer filled, continue next time
poverflow:
        lea ebp,[ecx+ebx*1]
        xor ebx,ebx
        xor ecx,ecx
        jmp short pvuala

pan_stop: ; ramp complete, try to "feed in" another ramp
        call [esi+wt_feed]
        jmp short paned

pan_overshoot:
        mov dx,[esi+wt_panend]
        mov [esi+wt_pan],dx
        mov word ptr [esi+wt_paninc],0
        jmp short paned



;;;;;;;;;;;;;;;;;;;;;;;;;; VOLUME  CONTROL   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; volume control (no ramps)

vcontrol: ;edi=buffer ptr
          ;ebp=buffer size in DWORDS
          ;esi=sounchannel ptr
          ; SELF-MODIFYING CODE ALLERT!!!!!!!!!!
          push esi
          movzx ebx,byte ptr [esi+wt_vol]
          add ebx,ebx
          mov ecx,[ebx*8+stable]
          mov esi,[ebx*8+rtable] ; right panning is equal to "normal" volume
          mov eax,[ebx*8+mtable]
          lea edi,[edi+ebp*4] ; move to end of buffer
          neg ebp ; negate count  (we 'll UP-count)
          ; ok! Self Modify!
          mov ds:(2+offset shifv),cl
          mov ds:(2+offset maaask),eax
          xor ebx,ebx
          xor edx,edx
          xor ecx,ecx
vctrl:    mov cl,[edi+ebp*4]
          mov bl,[edi+ebp*4+2]
          mov dl,[edi+ebp*4+3]
          mov cl,[edi+ebp*4+1]

          mov al,[esi+ebx*1]
          mov bl,[edi+ebp*4+1]
          mov ah,[esi+edx*1]

          rol eax,16 ; mov upper word ... up

          mov al,[esi+ecx*1]
          mov ah,[esi+ebx*1]

shifv:    shr al,SMODIFY
maaask:   and eax,DMODIFY
          mov [edi+ebp*4],eax
          inc ebp
          jne vctrl
          pop esi
          ret

vramp:  ; VOLUME RAMP
        mov ebx,buffer_size
        mov edi,buffer_start
vol_step:
        mov ecx,[esi+wt_volstep]
vol_it:
        mov dx,[esi+wt_vol]
        add dx,[esi+wt_volinc]
        mov [esi+wt_vol],dx
        cmp dx,[esi+wt_volend]
        je  vol_stop
        jnb vol_overshoot
voled:
        sub ebx,ecx
        je vlimit
        jb voverflow
        mov ebp,ecx
vuala:
        push ecx
        push ebx
        call vcontrol ; perform one step
        pop ebx
        pop ecx
        jmp short vol_it
vlimit: ret ; buffer filled, continue next time
voverflow:
        lea ebp,[ecx+ebx*1]
        xor ebx,ebx
        xor ecx,ecx
        jmp short vuala

vol_stop: ; ramp complete, try to "feed in" another ramp
        call [esi+wt_feed]
        jmp short voled

vol_overshoot:
        mov dx,[esi+wt_volend]
        mov [esi+wt_vol],dx
        mov word ptr [esi+wt_volinc],0
        jmp short voled



;;;;;;;;;;;;;;;;;;;;;;;;;; PLAYBACK RATE CTRL;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

rate_ramp:
        ; playback rate ramp feeder
        ; use this to add "doppler effect" to accelerating/decelerating
        ; sound sources
        ; (plain doppler effect can be done without ramps
        ;  it's the "acceleration" that needs rate ramping)
        ; INPUT: esi=channel ptr
        ;        buffer_start,buffer_end,buffer_size set
        ; register usage:
        ; esi=channel ptr/ input ptr, bl=input mantissa
        ; ebp=increment, bh=increment mantissa
        ; edi=output ptr
        ; eax=input ptr /accumulator
        ; ecx=output buffer limit
        ; edx=input  buffer limit
        ; first of all, load pointers to input buffer
        mov edx,[esi+wt_len]
        mov eax,[esi+wt_ptr]
        mov edi,buffer_start ; start of output buffer
        lea edx,[eax+edx*4]  ; ptr to end of input data
feed_step:
        ; calculate the width of this "rate feed" step
        ;
        mov ecx,[esi+wt_ratestep]
        add ecx,edi   ; ptr to end of output data
        cmp ecx,buffer_end   ; if this step gets at end of the output buffer
        jnb  buf_feed        ;    then check for limits & stop increment
rate_it:
        movzx bp,[esi+wt_rate]
        add   bp,[esi+wt_rateinc]
        mov [esi+wt_rate],bp
        cmp bp,[esi+wt_rateend]
        je rate_stop
        jnb rate_upped
rated:
        shrd bp,bx,10   ;  <== this may fail on some dumb assemblers
                        ;      if they fail to put the correct override
        push esi
        xor bl,bl
        mov esi,eax  ; "move" to real input pointer
resample:
        mov al,[esi] ; if what is written about Pentiums is all true
        add bl,bh    ; these four instructions should execute in two cycles
        adc esi,ebp  ; [sorry, i don't have a pentium (yet) ]
        mov [edi],al ;
        inc edi
        cmp esi,edx    ; check if input buffer has been completely feeded
        jnb input_feed ;

        cmp edi,ecx    ; feed_step/output_buffer completed?
        jb  resample   ; NO? then loop again
        ; YES! "feed in" another rate step
        mov eax,esi
        pop esi
        jmp short feed_step
input_feed:
        ; current input sample has been completely "played"
        ; it's time to play next sample
        mov esi,[esp]  ; read back esi saved on stack
        call [esi+wt_sfeed]
        ; remember that the "sample feeder" must supply at least
        ; a "silence" sample
        mov edx,[esi+wt_len]
        mov esi,[esi+wt_ptr]
        lea esi,[esi+edx*4]
        jmp short resample
        
buf_feed:
        
        cmp eax,buffer_end    ; is this ramp/conversion complete ?
        je resample_stop      ; YES! Stop
        ; no, we just reached the final rate value
        mov dword ptr [esi+wt_ratestep],0
        mov ecx,buffer_end
        jmp short rate_it ; do the final rate step

rate_upped:
        mov bp,[eax+wt_rateend]
        mov word ptr [esi+wt_rateinc],0
        jmp short rated
rate_stop:
        call [esi+wt_rfeed]
        mov bp,[eax+wt_rate]
        jmp short rated
resample_stop:
        ret





