; $Id$: ; MCM header in tracklet mode, from MSBit left (bit 31) to the LSBit ; < padrow (4 bits) | column (2 bits) | 1 | EPID2 (8 bit) | EPID1 (8 bit) | EPID0 (8 bit) | 1 > ; EPIDx comes from CPUx, if no tracklet from CPUx, then EPIDx is 0 ; followed by 0 to 3 dwords, depending on the number of tracklets, in order from CPU0 to CPU2 ; the header can be send optionally even when no tracklets are there (configuration bit) ; ; tracklet format, from MSBit left (bit 31) to the LSBit (0) ; < pad_position (11 bit) | slope (8 bit) | PID (12 bit) | 1 > acq: ; write something large in TPFE (Preprocessor's Linear Fit End) #ifdef cpu0 ; writing 0x7F to TPFE will immitate not finished preprocessing time ; cpu2 will restore two CPU clocks later the correct TPFE and so F0..7 will be latched again ; but probably this will take about 15 CPU clocks??? mov 0x7F, r3 ; the larges possible value iext TPFE sgio r3, TPFE ; load TPFE #endif #ifdef cpu1 mov MEMCOR_VAL, r1 iext MEMCOR sgio r1, MEMCOR #endif #ifdef cpu2 mov TPFE_VAL, r3 ; load the normal value iext TPFE sgio r3, TPFE ; ... and restore it in TPFE #endif ; Note! The upper part of Fit registers F8..15 are accessed directly, the lower part F0..F7 are first latched at the end of the drift time. ; If this is done too early, they might be invalid! To correct for this, the latch signal is activated again by writing a much longer end of Fit time in TPFE. ; There is a delay in the preprocessor, so that now the latch signal will be activated again between clock 20 and 28 after wake up. ; In this time even the F8..F15 will be invalid! After this time, all F0..F15 will be valid. ; So: don't use now F0..F7, as they might be not valid still! ; Even F8..F15 might be instabil between instructions 20 and 28 after wake up! #ifdef cpu3 mov WDOG_DM, r15 lgio 0, ADCMSK_DB ; read the ADCMSK from DBANK lra4 r13 lra4 r13 sub r13, 1, r13 sra r13, WDOG_DM ; if cpu3 can do something better here??? sem b0000_0000_0000_0111 ; cpu0..2 should write something to g0..2 #else ; 0..2 ; copy f8 to r8, as f8 will be soon invalid for 20-30 clocks mov f8, r8 ;############################################################## ;# ;# check for tracklets: CHANNEL !=31, same as CHANNEL+1 != 0 ;# F0 contains channel, F8 channel+1 ;############################################################## ; As F0 is still may be not ready, we read F8, which should be F0+1. When no tracklet present, F8 is 0 ; here we are at instruction 7 after wake up, F8 is correct! sub r8, 1, adc_ch_msk ; F8=F0+1, therefore F8-1 is the basis channel of the tracklet! F8 is 0 when F0 is 31 (no tracklet) jmp cc_carry, _acq_no_tr_frf_dtr ; carry means F8 was 0 - no tracklet! jump if no tracklet to precess, in this case adc_ch_msk will be loaded with another constant anyway #endif ; up to here all 4 CPUs run synchronously, now: ; cpu0 don't need to check for double tracklets ; cpu1 has to check if the ch# is neighbour to that of cpu0 ; cpu2 has to check if the ch# is neighbour to that of cpu1 ; cpu3 will not calculate tracklets more ; at _acq_no_tr_frf_dtr the adc_ch_msk will be loaded with CH_NR_NO_TRCK=23, then the cpu jumps back. 31 is not so good as value, see later ; don't use F0..F7, as they might be not valid still! Expected to be valid at instruction 28 after wake up! ; even F8..F15 might be instabil between instruction 20 and 28 after wake up! ; F8 was copied before to r8, so use it here #ifdef cpu1 ; check for double tracklets, about 8 cpu clocks sub r8, g4, r0 ; r0 = f8(CPU1) - f0(CPU0) jmp cc_eq, _acq_no_tr_frf_dtr ; jmp out when the result is 0, f0(CPU1)+1-f0(CPU0)=0 => f0(CPU1)-f0(CPU0)=-1 cmp r0, 2 ; compare with 2, f0(CPU1)+1-f0(CPU0)=2 => f0(CPU1)-f0(CPU0)=1 jmp cc_eq, _acq_no_tr_frf_dtr ; jmp out when 0 ; sub r8, 1, adc_ch_msk ; write again to clear the bit in syn mask #endif #ifdef cpu2 ; worst case about 8 cpu clocks sub r8, g5, r0 ; r0 = f8(CPU2) - f0(CPU1) jmp cc_eq, _acq_no_tr_frf_dtr ; jmp out when the result is 0, f0(CPU2)+1-f0(CPU1)=0 => f0(CPU2)-f0(CPU1)=-1 cmp r0, 2 ; compare with 2, f0(CPU2)+1-f0(CPU1)=2 => f0(CPU2)-f0(CPU1)=1 jmp cc_eq, _acq_no_tr_frf_dtr ; jmp out when 0 ; sub r8, 1, adc_ch_msk ; write again to clear the bit in syn mask #endif #ifdef cpu3 ; will calculate an adc mask with all channels contributing to tracklets mov 0xF0, r1 ; 1111 0000, mask for four channels at ch4 that should be marked #ifeq DYN_L1A, 1 lgio 1, CTGDOUT ; request reading of the global counter - special pulse for full readout #endif syt r2 ; or just some nops? and r2, 1, r2 jmpr cc_nzero, -2 ; start with mask for ch4 and 0, because the shift command can not shift from 0 to 18 ; but relative to 4 can shift -4 to +14 ; now the adc_ch_msk_0 is updated already mov adc_ch_msk_0, r0 ; channel 0, = 19 for no tracklet, which is -13 as 5 bit signed sub r0, 4, r0 ; for CH_NR_NO_TRCK=23 (no tracklet) we get 19, which is -13 interpreted as 5 bit signed ; so 1111_0000 will result in 0 after shifted by 13 to the right! shl r0, r1, r2 ; now two higher and one lower channels are marked around the channel selected in f0 of cpu0 ; if no tracklet, the shifted mask will be 0! ; the shift is within -16..+15, + is left, - is right #ifeq DYN_L1A, 1 ; dynamical control of the full readout mova SML2_VALnoA, r5 ; prepare to write the SML2, load it without L1A/R bit jmpr cc_busy, 0 ; wait for the global bus lpio GBUSR1, r0 ; get the global counter, requested before - was a special pulse for full readout? and r0, c1, r0 ; we expect 0 or 1 sll 14, r0, r0 ; shift to position 14 -> ignore_L1R or r0, r5, r5 ; merge with the SML2 value sgio r5, SML2 ; write the complete SML2 with ignore_L1R set (when the counter=1 was) or cleared (when counter=0 was) mov 0, r0 ; prepare to clear the counter jmpr cc_busy, 0 sgio r0, CTGDINI ; clear the counter #endif syn ; wait eventually for cpu1,2 to finish with double tracklet detection, their delay is not predictable mov adc_ch_msk_1, r0 ; channel 1 sub r0, 4, r0 ; -4 shl r0, r1, r0 ; 0xF0 << (ch-4) or r0, r2, r2 ; set the bits from cpu1 mov adc_ch_msk_2, r0 ; channel 2 sub r0, 4, r0 ; -4 shl r0, r1, r0 ; 0xF0 << (ch-4) or r0, r2, r2 ; now in adc_ch_mask are marked all channels, that should be integrated & read ; IMPORTANT! jmpr cc_busy, 0 lpio GBUSR0, r1 ; read from I/O space iext ADCMSK_DB ; refresh there sgio r1, ADCMSK_DB #ifgt TR_ADCMSK_AND, 0 and r1, r2, r2 ; and with the ADC channels mask to avoid reading masked channels #endif cmp r13, c7 ; watch dog counter was 0, decremented is -1 jmpr cc_neq, +2 ; skip when not -1 mov 0, r2 ; clear the ADC mask if too many events with tracklets one after another mov r2, adc_ch_msk_3 ; write go a GRF register, this will release the other CPUs jmpr cc_busy, 0 ; wait iext ADCMSK ; and write to the register sgio r1, ADCMSK ; so it doesn't need to be extra refreshed! ; !!! If some channel was masked, then it will contain just the baseline. It still can "contribute" to a tracklet? ; should we integrate it? We will loose the baseline there! ; if this is not desireble, remove the 2 lines of code above! #else _acq_no_tr_cont: ; cpu0..2 ; at this point cpu0 is first, 1..2 are almost synchronous but about 10 clocks later, cpu3 has a delay of about 10+10 clocks ; here the CPUs without tracklets have CH_NR_NO_TRCK=23 (instead of 31) in the adc_ch_mask[i] register (i=0..2) mov 0xFF, r12 #ifdef cpu0 mov r12, charge_i ; 7..0 #endif #ifdef cpu1 sll 8, r12, charge_i ; 15..8 #endif #ifdef cpu2 swp r12, charge_i ; 23..16 #endif sem b0000_0000_1000_0000 ; cpu0..2 will wait until cpu3 writes something to g7 (adc_ch_msk_3) ; don't use r12 and r13 until the line with div r12, r3 in the fit part! mov 0, r12 ; init some registers for later sll NACHKOMMAST, 1, r13 ; shift +1 to the left nachkommast-times, c1=1 ??? before we loaded c7 111.111 shifted left ; load 2**nachkommast ??? then it was -2**nachkommast #ifdef cpu0 ; cpu0 will only read the parameters from DMEM and store in GRF registers mov SCALE_Y_DM, r15 ; prepare for reading from DMEM nop lra rr_dword, r1 ; read scale_y lra+ rr_dword, r1 mov r1, scale_y lra rr_dword, r1 ; read offs_y lra+ rr_dword, r1 mov r1, offs_y lra rr_dword, r1 ; read scale_d lra+ rr_dword, r1 mov r1, scale_d #ifdef SCALE_Q_DM lra rr_dword, r1 ; read scale factor for Q, if used lra+ rr_dword, r1 mov r1, scale_q #else add r15, 4, r15 #endif #endif #ifdef cpu1 ; switch on the NIICE (NI input control ports enable) lvds cells mov 1, r0 sgio r0, NIICE #endif #ifdef cpu2 ; cpu2 will store back to DMEM the fit parameters, read by cpu0 before sem b0010_0000_0000_0000 ; cpu2 waits until cpu0 writes to g13=scale_d mov SCALE_Y_DM, r15 ; prepare for reading from DMEM syn sem b0000_0000_1000_0000 ; cpu2 will wait until cpu3 writes something to g7 (adc_ch_msk_3) mov scale_y, r1 sra+ r1 mov offs_y, r1 sra+ r1 mov scale_d, r1 sra+ r1 #ifdef SCALE_Q_DM mov scale_q, r1 sra+ r1 #endif #ifdef DEFL_CR_DM nop nop ; sra+ ??? #endif #endif ; prepare for the charge integration, to shorten the whole duration. might be unused in case of no tracklets mov EBR0, r1 ; cpu0..2 have 5 channels, cpu3 has 6 channels to read, each channel has 64 (0x40) ADC samples mov Q2_LEFT_MRG_VAL, r0 ; load the address of the 8-bit variable with the left integration margin add r1, r0, r1 ; modify the start address EBR0 (timebin0) to the beginning of the integration window mov ADC_Q2, r15 ; the start address in RAM to store the charge depends on the CPU# and is defined so mov 0x40, r2 ; the increment in the event buffer, 64 samples/channel sub r15, 4, r15 ; r15-=4, as the loop begins with increment, 4 as we have 4 bytes/dword mov EBR_CH_MSK, r4 ; 0..4 or 5 bit set, rest 0 syn ; cpu0..2 will wait until cpu3 writes something to g7 (adc_ch_msk_3) #endif ; here come all CPUs simultaneously! No matter if they have a tracklet or not! ; the global registers g0..g2 named charge_i are initialised with 0xFF << 8*CPU_ID shlt 0, adc_ch_msk_3 ; test if the mask with all channels containing tracklet is 0 jmp cc_zero, _acq_no_tr_mask ; in this case no tracklet remained at all! this case is simple. ; I n t e g r a t i o n in Q2 w i n d o w ; ; this is done by all CPUs, as each CPU has access only to a subset of all ADC Event buffers ; integrate in a window defined as constants Q2_LEFT_MRG_VAL and Q2_WIN_WIDTH_VAL (2..9) ; read from the event buffer, accumulate and store to RAM ; it takes (2*WIDTH+9)*Nch+10 CPU Clocks, for CPU0..2 from about 10 to about 55+10*WIDTH, for CPU3 from about 10 to ; about 64+12*WIDTH ; this means, depending on the distribution of the marked channels, the CPUs will need very different number of clocks ; to finish this part! #ifdef cpu0 sem b0000_0000_0000_0001 ; cpu0 will wait for the sum of Q2 of its tracklet channels #endif #ifdef cpu1 sem b0000_0000_0000_0010 ; cpu1 will wait for the sum of Q2 of its tracklet channels #endif #ifdef cpu2 sem b0000_0000_0000_0100 ; cpu2 will wait for the sum of Q2 of its tracklet channels #endif #ifdef cpu3 sem b0000_0111_0111_0000 ; cpu0..2 will store here something to say, we are ready with the charge integration #endif ; inputs: r1 contains EBR0+Q2_LEFT_MRG_VAL ; r15 contains the start address in RAM to store the sums (with offest +ch) ; adc_ch_msk_3 is g7 and contains the ADC mask with the channels, used in the tracklets ; ; constants used: ; EBR_CH_INI is the start channel # for each CPU: 0, 5, 10, 15 ; EBR_CH_MSK contains 1 1111 for CPU0..2 and 11 1111 for CPU3 ; Q2_WIN_WIDTH contains the width of the window ; ; modified: almost all reg, except for r12 and r13 ; no global regs modified ; no constants modified ; for CPU0..2 this initialisation moved upwards to use better the time waiting for CPU3! ; for CPU3 the code remains here #ifdef cpu3 mov EBR0, r1 ; cpu0..2 have 5 channels, cpu3 has 6 channels to read, each channel has 64 (0x40) ADC samples mov Q2_LEFT_MRG_VAL, r0 ; load the address of the 8-bit variable with the left integration margin add r1, r0, r1 ; modify the start address EBR0 (timebin0) to the beginning of the integration window mov ADC_Q2, r15 ; the start address in RAM to store the charge depends on the CPU# and is defined so mov 0x40, r2 ; the increment in the event buffer, 64 samples/channel sub r15, 4, r15 ; r15-=4, as the loop begins with increment, 4 as we have 4 bytes/dword mov EBR_CH_MSK, r4 ; 0..4 or 5 bit set, rest 0 #endif #inc "fit_q2_integrate.asm" ; at this point we have ; in RAM at address ADC_Q2_0 an array with the integrated charges, updated only for the channels contributing to the tracklets ; the others remain unchanged! #ifNdef cpu3 ; at the end of the integration, each CPU writes the same to its adc_ch_msk register (g4..7) to mark that it has finished. ; CPU3 must wait here until the bits 6..4 in its sync register are cleared. Of course it has finished the integration ; of its own channels. mov adc_ch_msk, adc_ch_msk ; write the same, just to clear the corresponding bit in the sync mask ; the syn command is of CPU3 ; here some CPUs come earlier than others! #endif #ifdef cpu3 ; CPU3 will wait until CPU0..2 have finished the Q2 integration, then will calculate the total charge Q2 ; for each tracklet and will store it to g0..2 for CPU0..2 ; Note: sometimes cpu3 has more to do than the other CPUs! #inc "fit_add_accs.asm" ; refresh the endmarkers and NI-enables (but for HCM and BM must be done separately) ; r3 counted in fit_add_accs how many (of the max 3) sums were NOT calculated, use this information to decide ; to make a small refresh #ifdef REFR_NI cmp r3, 1 jmp cc_ltu, _acq_skip_refr_ni ; r3 was initialised with 3 before adding the accumulated charges and was decremented by each existing tracklet ; ; do a small refresh, only when CPU3 has calculated less than 3 sums (there were less than 3 tracklets) ; otherwise may be it would delay the sending of the tracklets mova NSIG_TR_VAL, r1 mova NSIG_RR_VAL, r2 swp r2, r2 or r1, r2, r1 ; the full NES register has RR in bits 31..16 and TR in bits 15..0 jmpr cc_busy, 0 sgio r1, NES mov NIICE_VAL, r1 jmpr cc_busy, 0 sgio r1, NIICE _acq_skip_refr_ni: #endif syn ; wait for the tracklets of cpu0..2 iext 0x100001 mov 0x100001, r6 shl 8, r6, adc_ch_msk ; load 0x1000_0100 #else ; NOW: ; cpu0..2 will check again if they have a tracklet and will eventually make the fit mov adc_ch_msk, r0 ; the ADC start channel cmp r0, CH_NR_NO_TRCK ; 23, used to mark no tracklet ; it might happen, that some CPU0..2 had to integrate, but has no tracklet, so it can finish now jmp cc_eq, _acq_no_tr_q2f ; this CPU has no tracklet and has integrated its channels ; therefore skip the fit procedure! #inc "fit_fit.asm" ; at the end CPU0..2 clear the bits 8..10 in the sync mask of CPU3 #endif #ifndef cpu3 _acq_send_hdr_trackl: syn ; only CPU0..2 need to be synchronized, cpu3 has just now released them ; here all CPUs must come synchronously, CPU0 needs that CPU1,2 are ready with the fit! ; g0..2 contain the full 20-bit charge word and is 0 in case of no tracklet ; g8..10 contain the 32-bit tracklets or are don't care when the corresponding charge word is 0. ; CPU0 will send the header and part of PID of all up to 3 tracklets ; CPU1 will send the tracklet of CPU0, CPU2 will send the tracklet of CPU1, CPU3 will send the tracklet of CPU2 ; CPU0 has to get from each charge_i (i=0..2) the bits 19..12 to prepare the HPIDx in the header ; if no tracklets at all, but headers still wanted, all 3 charges will be 0 and only the header will be send ; MCM header: from MSBit left (bit 31) to the LSBit ; < 1 | padrow (4 bits) | column (2 bits) | HPID2 (8 bit) | HPID1 (8 bit) | HPID0 (8 bit) | 1 > ; HPIDx come from CPUx, if no tracklet from CPUx, then HPIDx is 0xFF. HPIDx = charge_i >> 12 ; followed by 0 to 3 dwords, depending on the number of tracklets, in order from CPU0 to CPU2 ; the header can be send optionally even when no tracklets are there. ; when the constant DONT_SEND_EMPTY_HDR_TR is 1, empty headers are suppressed. ; ; tracklet format, from MSBit left (bit 31) to the LSBit (0) ; < pad_position within the MCM (11 bit) | LPID (12 bit) | slope (8 bit) | 0 > #endif ; the BM chips use the CPUs for nothing, but the NI there is not programmed to send own data. ; so it doesn't matter if they try to send something ; the HCM can send tracklets. This part is only for normal MCMs! _acq_send_hdr_tr_ns: #ifdef cpu0 ; build first ( ( (charge_2[19..12] << 8) | charge_1[19..12]) << 8 ) | charge_0[19..12] ; if 0xFFFFFF, then we don't have any tracklets ; in this case depending on DONT_SEND_EMPTY_HDR_TR load the end marker or continue with the header ; r8 contains the HPID0, charge_1 is HPID1 << 8 and charge_2 is HPID2 << 16 or r8, charge_2, r6 or r6, charge_1, r6 ; at this point all 3 charges (bits 19..12 of each charge word) are put together in bits 23..0 #ifeq DONT_SEND_EMPTY_HDR_TR, 1 ; r5 was loaded with 0xFFFFFF ; and if the 24-bit word with HPIDs is 0xFFFFFF, we don't have to send anything except for end markers cmp r6, r5 jmp cc_eq, _acq_wr_em2ni ; if = 0xFFFFFF we don't have any tracklet and write end marker #endif lgio 0, H_PAD_ROW_COL_DB shl 1, r6, r6 ; otherwise put one '0' at the right side (prepared in pad_row_col) jmpr cc_busy, 0 lpio GBUSR0, r5 or r6, r5, r6 ; and add the position information, already prepared in boot program ; (but needs to be refreshed periodically!) iext H_PAD_ROW_COL_DB sgio r5, H_PAD_ROW_COL_DB ; here CPU0 is ready #endif #ifdef cpu1 ; send the tracklet of CPU0, if any, otherwise just end marker mov charge_0, r6 cmp r6, 0xFF ; - test the same part =? 0xFF as used in the header jmp cc_eq, _acq_wr_em2ni ; if = 0xFF we don't have a tracklet and write end marker mov trackl_0, r6 ; load the 32-bit word to be send to r6 xor r6, adc_ch_msk_3, r6 ; invert two bits in the tracklet, adc_ch_msk_3 initialised before by CPU3 #endif #ifdef cpu2 ; send the tracklet of CPU1, if any, otherwise just end marker slr 8, charge_1, r6 ; and now they are shifted to bits 7..0 cmp r6, 0xFF ; test the same part =? 0xFF as used in the header jmp cc_eq, _acq_wr_em2ni ; if = 0xFF we don't have a tracklet and write end marker mov trackl_1, r6 ; load the 32-bit word to be send to r6 xor r6, adc_ch_msk_3, r6 ; invert two bits in the tracklet, adc_ch_msk_3 initialised before by CPU3 #endif #ifdef cpu3 ; send the tracklet of CPU2, if any, otherwise just end marker slr 16, charge_2, r6 ; and now they are shifted to bits 7..0 cmp r6, 0xFF ; - test the same part =? 0xFF as used in the header jmp cc_eq, _acq_wr_em2ni ; if = 0xFF we don't have a tracklet and write end marker mov trackl_2, r6 ; load the 32-bit word to be send to r6 xor r6, adc_ch_msk_3, r6 ; invert two bits in the tracklet, adc_ch_msk_3 initialised before by CPU3 #endif ; all CPUs send the prepared 32-bit word. _acq_write2ni: spio r6, NODP sra r6, TrcklDMEMa ; store the tracklet in DMEM for debugging and as info for ZS readout jmp cc_uncond, clr_endloop _acq_wr_em2ni: mova NSIG_TR_VAL, r6 jmp cc_uncond, _acq_write2ni ; for CPU0..2, who may calculate tracklets #ifndef cpu3 ; 1. the Fit Register File has no tracklet candidate for this CPU ; or ; 2. after check for double tracklets, this CPU has no tracklet more _acq_no_tr_frf_dtr: mov CH_NR_NO_TRCK, adc_ch_msk jmp cc_uncond, _acq_no_tr_cont ; the CPUs should continue eventually to integrate the charge in the third window for another CPU(s) with tracklet ; finished with integration of Q2, but no own tracklet _acq_no_tr_q2f: ; from here the CPU has to wait about 100 to 140 clocks ; if CPU2 comes here, it can do some refresh of very important parameters ; CPU1 will come rarely here ; CPU0 will come even rarely here ; inside the fit part found, that the slope is out of range #ifdef cpu1 mvpcr +2, rstack ; small refresh jmp cc_uncond, load_direct_bm_m #else nop #endif ; #ifdef cpu2 ; mvpcr +2, rstack ; ; refresh 4 parameters from DMEM and 4 common constants from DBANK (as needed even on the HCM) ; jmp cc_uncond, load_dm_par ; #endif ; fit started, but the slope is out of range _acq_out_rng: mov CH_NR_NO_TRCK, adc_ch_msk ; to say to CPU3 that it doesn't need to calculate the sum of Q2 ; refresh the own interrupt vectors mvpcr +2, rstack jmp cc_uncond, load_irq_vec syn ; wait for CPU3, which adds the 4 integrated charges sem b0000_0000_1000_0000 ; prepare to wait for cpu3 is ready with Q2 of all CPU0..2 jmp cc_uncond, _acq_no_tr_mask_nsyn ; ALL CPUs 0..2 do not have tracklets!!! ; 1 . some specific delay ; 2 . send eventually MCM tracklet header and endmarkers ; this time can be used to make some register recovery? _acq_no_tr_mask: ; the adc mask containing the tracklet channels is 0 mov 7, r7 sll 1, c13, r1 ; x2, c13 contains the counter for events without tracklets, used for refresh add r1, c13, r1 ; x2 + x1 = x3 and r7, r1, r7 ; the lower 3 bits, so we have the sequence: 0, 3, 6, 1, 4, 7, 2, 5 => 0... cmp r7, 3 ; 0..2 used to refresh TPL LUT jmp cc_gtu, _acq_refr_4_7 jmp cc_eq, _acq_refr_3 ; here is 0..2 ; 3 times TPL LUT #ifdef cpu0 mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_tpl ; may be move to raw data readout, it is too long for here #endif #ifdef cpu1 sem b1100_0000_0000_0000 ; wait for g14, g15 mvpcr +2, rstack jmp cc_uncond, load_direct_first_m ; first do somthing else, CPU0 needs some time to prepare the loops mov _acq_no_tr_mask_nsyn, rstack ; return address is the end of the refresh jmp cc_uncond, load_tpl_sec_cpu ; prepare the address pointers and sync #endif #ifdef cpu2 mov _acq_no_tr_mask_nsyn, rstack cmp r7, 1 ; jmp cc_ltu, load_dm_par ; 0 not necessary more jmp cc_gtu, load_direct_bm_m ; 0 do something else? jmp cc_gtu, load_direct_bm_m ; 2 jmp cc_uncond, load_direct_adc ; 1 #endif _acq_refr_4: ; here is 4 #ifdef cpu0 mvpcr +2, rstack jmp cc_uncond, load_pre_par mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_irq_vec #endif #ifdef cpu1 mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_patchm_par #endif #ifdef cpu2 mvpcr +2, rstack jmp cc_uncond, load_direct_first_m mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_pre_clust_par #endif _acq_refr_4_5: cmp r7, 4 jmp cc_eq, _acq_refr_4 ; here is 5 #ifdef cpu0 mvpcr +2, rstack jmp cc_uncond, load_direct_all #endif #ifdef cpu1 mvpcr +2, rstack jmp cc_uncond, load_lptc_fil #endif #ifdef cpu2 mvpcr +2, rstack jmp cc_uncond, load_direct_first_m #endif mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_irq_vec _acq_refr_7: ; here is 7 #ifdef cpu0 mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_gain_filg #endif #ifdef cpu1 mvpcr +2, rstack jmp cc_uncond, load_direct_first_m mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_irq_vec #endif #ifdef cpu2 ; these 2 are not necessary! sem b0000_0010_0000_0000 ; wait until CPU1 finished with refresh ; mvpcr +2, rstack ; jmp cc_uncond, load_dm_par syn mvpcr +2, rstack jmp cc_uncond, load_direct_bm_m mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_irq_vec #endif _acq_refr_4_7: cmp r7, 6 jmp cc_ltu, _acq_refr_4_5 jmp cc_gtu, _acq_refr_7 ; here is 6 #ifdef cpu0 mvpcr +2, rstack jmp cc_uncond, load_pre_par mov 0, trackl_i ; write something, to release CPU1 mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_irq_vec #endif #ifdef cpu1 sem b0000_0001_0000_0000 ; wait until CPU0 finished with refresh mvpcr +2, rstack jmp cc_uncond, load_direct_first_m syn mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_irq_vec #endif #ifdef cpu2 mov _acq_no_tr_mask_nsyn, rstack jmp cc_uncond, load_direct_all #endif _acq_refr_3: ; here is 3 #ifdef cpu0 mvpcr +2, rstack jmp cc_uncond, load_gain_fila #endif #ifdef cpu1 mvpcr +2, rstack jmp cc_uncond, load_direct_first_m mvpcr +2, rstack jmp cc_uncond, load_unused ; mvpcr +2, rstack ; jmp cc_uncond, load_irq_vec #endif #ifdef cpu2 sem b0000_0010_0000_0000 ; wait until CPU1 finished with refresh ; mvpcr +2, rstack ; these 2 are not necessary! ; jmp cc_uncond, load_dm_par syn ; wait until CPU1 finished with refresh mvpcr +2, rstack jmp cc_uncond, load_direct_bm_m mvpcr +2, rstack jmp cc_uncond, load_irq_vec #endif ; refresh the interrupt vectors, only when all CPUs don't have tracklets _acq_no_tr_mask_nsyn: #ifdef cpu0 #ifeq DONT_SEND_EMPTY_HDR_TR, 1 ; and if this word is 0xFFFFFF, we don't have to send anything except for end markers iext 0xFFFFFF ; 24-bits with 1's mov 0xFFFFFF, r5 #endif #endif sem b0000_0000_1000_0000 ; prepare to wait for cpu3 is ready with Q2 of all CPU0..2 mov 0xFF, r8 ; bits 19..12 shifted to 7..0, ok for charge_0 #ifdef cpu0 mov r8, charge_i ; r8 contains charge_0 only by cpu0 #endif #ifdef cpu1 sll 8, r8, charge_i ; to bits 15..8 #endif #ifdef cpu2 swp r8, charge_i ; to bits 23..16 #endif mov 0, trackl_i ; write something, to release CPU3 jmp cc_uncond, _acq_send_hdr_trackl ; after the jump CPU0..2 will be synced again and released by CPU3 #else ; CPU3 ; ALL CPUs 0..2 do not have tracklets!!! ; 1 . some specific delay ; 2 . send eventually MCM tracklet header and endmarkers ; this time can be used to make some register recovery? _acq_no_tr_mask: ; the adc mask containing the tracklet channels is 0 ; entry points after decoding, that no tracklet should be calculated sem b0111 ; refresh the interrupt vectors, only when all CPUs don't have tracklets mvpcr +2, rstack jmp cc_uncond, load_irq_vec mov MAX_TR_EVENT, r1 sra r1, WDOG_DM ; increment the number of empty tracklet runs mov c13, r1 add r1, 1, r1 syn jmpr cc_busy, 0 sgio r1, C13CPUA ; update c13 used to control which refresh routine to start ; the other CPUs are ready here, so the new value is for the next event mov 0, adc_ch_msk ; release the CPU0..2 jmp cc_uncond, _acq_send_hdr_tr_ns ; and jump to the exit without sync #endif nop