; program to pack/unpack/crc32 the configuration in the TRAP chip ; ; by V.Angelov ; ; $Id$: ; ; The format for single conf. reg. ; (addr << 18) | (data & 0xFFFF) << 2) | (flag32 << 1) | 1 ; Bit 0 is 1 ; Bit 1 (flag32) is 0 for up to 16 bit data and 1 for more ; Bits 31..18 - address (note: up to 14 bits!) ; Bits 17..2 - data, the lower up to 16 bits ; in case of > 16 bits, a second dword is sent: ; data | 1 ; The format of a block is ; ; (address << 16) | (words << 8) | (widthM1 << 3) | (astep << 1) | 0 ; Block start_address(31..16) words(15..8) widthM1(7..3) astep(2..1) 0 ; - start_address is 16-bit, with small exceptions all configuration registers are ; below 0x3FFF ; - words (8 bit) is the number of configuration words, not the number ; of 32-bit data words that follow ; words=0 means end of the data => our end marker is just 0 ! ;- widthM1 (5 bit) the bitwidth-1, possible values: ; 4 for up to 6 x 5 bits ; 5 for up to 5 x 6 bits ; 6 for up to 4 x 7 bits ; 9 for up to 3 x 10 bits ; 14 for up to 2 x 15 bits ; 30 for 1 x 31 bits ;- astep 1|2 is the increment of the TRAP IO address, normally we need 1 or 2 ; any other width, or astep=0,3, or words=0 will stop the unpacking. ; So there are many possible endmarkers. Just need to be different from the readout tree endmarker ; and from the MCM header!!! ; 31 30..16 15..8 7..3 2..1 0 ; 0 0x7FFF 0x00 0x1F 3 0 => 0x7FFF00FE is a good endmarker! ; This program is better executed by cpu0..2, not 3, because cpu3 can not access DMEM in GIO. ; unpack ; input: in c8 the start address in GIO ; output: all configuration registers in the TRAP ; unpacking time 44 us #def mask_1F = r9 ; 5 bit mask #def mask_FF = r10 ; 8 bit mask #def mask_FFFF = r11 ; 16 bit mask #def mask_data = r12 ; data bit mask #def cnst_sa = c8 ; the start address in GIO #def cmp_errors_un = 0xF0F0 ; number of differences (if def cnf_compare) #def cpu_clocks_un = 0xF0F1 ; number of clocks for unpacking (if def debugT) #def cpu_clocks_crc = 0xF0F2 ; number of clocks for unpacking+crc32 (if def debugT) #def cpu_clocks_pk = 0xF0F3 ; number of clocks for packing (if def debugT) ;#def cnf_compare = 1 ; enable compare & counting errors ;#def debug = 1 ; enable debugging in DMEM ;#def debugT = 1 ; enable timer conf_man_un: #ifdef debugT mov 0x640, r0 spio r0, CTPCTRL mov 0, r0 spio r0, CTPDINI #endif ; initialization #ifdef debug mov 0 , r15 ; the DMEM pointer, used in debug mode #endif iext 0xFFFF mov 0xFFFF, mask_FFFF and mask_FFFF, cnst_sa, r14 ; the GIO access register with autoincrement mov r14, r13 ; copy of the start address lgio+ 0 ; request to read data mov 0x1F, mask_1F ; prepare the masks needed later mov 0xFF, mask_FF #ifdef cnf_compare mov 0, r7 ; error counter #endif ; start loop - next block/single conf_man_un_next: #ifdef debug sra+ r14 ; debugging #endif jmpr cc_busy, 0 lpio GBUSR0, r0 lgio+ 0 ; read the next block/single #ifdef debug sra+ r0 ; debugging #endif shl -1, r0, r0 ; check the 0th bit jmp cc_carry, conf_man_un_sng ; jump to single shl -7, r0, r1 ; shift right the number of words and mask_FF, r1, r1 ; number of words in r1 #ifdef debug sra+ r1 ; debugging #endif jmp cc_zero, conf_man_un_exit ; exit if 0 shl -2, r0, r2 ; shift the width-1 to the right and mask_1F, r2, r2 ; width-1 in r2 add r2, c1, r2 ; width in r2 #ifdef debug sra+ r2 ; debugging #endif and r0, c3, r3 ; step in r3 #ifdef debug sra+ r3 ; debugging #endif shl -15, r0, r0 ; address, not necessary to mask #ifdef debug sra+ r0 ; debugging #endif ; order according to the most frequent appearence!!! cmp r2, 15 jmp cc_eq, conf_man_un_5_15 ; 15 bit case cmp r2, 10 jmp cc_eq, conf_man_un_5_15 ; 10 bit case cmp r2, 5 jmp cc_eq, conf_man_un_5_15 ; 5 bit case cmp r2, 31 jmp cc_eq, conf_man_un_31 ; 31 bit case cmp r2, 6 jmp cc_eq, conf_man_un_5_15 ; 6 bit case cmp r2, 7 jmp cc_eq, conf_man_un_5_15 ; 7 bit case ; if no one of the cases above => exit jmp cc_uncond, conf_man_un_exit ; exit ; 5,6,7,10,15 bit cases conf_man_un_5_15: ; step in r3 ; width in r2 ; address in r0 ; number of words in r1 #ifdef cnf_compare shl r2, c1, mask_data sub mask_data, c1, mask_data ; the mask for the data #endif neg r2, r2 ; later we need -width! mov 0, g1 ; bit counter conf_man_un_rd: add r2, g1, g1 ; actually subtract the bitwidth jmp cc_nneg, conf_man_un_nf ; no need to read new dword ; jmpr cc_busy, 0 ; not necessary! lgio+ 0 ; request the next data lpio GBUSR0, r4 ; read data shl -1, r4, r4 ; shift right, the LSB is always 1 mov 31, g1 ; init the bit counter add r2, g1, g1 ; and subtract the width conf_man_un_nf: ; extract the field #ifdef cnf_compare ; jmpr cc_busy, 0 ; not necessary! lgio 1, r0 and r4, mask_data, r5 ; mask the upper bits jmpr cc_busy, 0 lpio GBUSR1, r6 ; read data and r6, mask_data, r6 ; mask the upper bits cmp r5, r6 ; compare ; jmpr cc_eq, +3 ; skip the error inc and the SGIO! jmpr cc_eq, +2 ; skip the error inc add r7, c1, r7 ; count up the errors #else ; jmpr cc_busy, 0 ; not necessary! #endif sgio r4, r0 ; store to GIO the unmasked data #ifdef debug sra+ r0 ; debugging nop sra+ r5 #endif add r0, r3, r0 ; increment the write address shl r2, r4, r4 ; shift right the data sub r1, c1, r1 ; decrement the number of words jmp cc_nzero, conf_man_un_rd ; extract next field jmp cc_uncond, conf_man_un_next ; next block/single ; 31 bit case conf_man_un_31: ; step in r3 ; address in r0 ; number of words in r1 shl -1, c7, mask_data conf_man_un_31n: ; jmpr cc_busy, 0 lgio+ 0 ; request the next data lpio GBUSR0, r4 ; read data shl -1, r4, r4 ; shift right, the LSB is always 1 #ifdef cnf_compare jmpr cc_busy, 0 lgio 1, r0 jmpr cc_busy, 0 lpio GBUSR1, r5 ; read data and r5, mask_data, r5 ; mask the upper bit cmp r5, r4 ; compare ; jmpr cc_eq, +3 ; skip the error inc and the SGIO! jmpr cc_eq, +2 ; skip the error inc add r7, c1, r7 ; count up the errors #else jmpr cc_busy, 0 #endif sgio r4, r0 ; store to GIO #ifdef debug sra+ r0 ; debugging nop sra+ r4 #endif add r0, r3, r0 ; increment the write address sub r1, c1, r1 ; decrement the number of words jmp cc_nzero, conf_man_un_31n ; next 32 bit word jmp cc_uncond, conf_man_un_next ; next block/single ; single case conf_man_un_sng: ; here we have in r0 the data shl -1, r0, r1 and r1, mask_FFFF, r1 ; data in r1, up to 16 bits jmp cc_ncarry, conf_man_un_s16 ; 1 means more than 16 bits ; read the upper bits from the next dword ;jmpr cc_busy, 0 lgio+ 0 ; request the next data lpio GBUSR0, r4 ; read data and r4, c6, r4 ; clear the bit 0 or r4, r1, r1 ; combine all together to 32 bits conf_man_un_s16: shl -11, r0, r0 ;[30..17] contain the address shl -6, r0, r0 ; address in r0 #ifdef cnf_compare jmpr cc_busy, 0 lgio 1, r0 jmpr cc_busy, 0 lpio GBUSR1, r5 ; read data ; and r5, mask_FFFF, r5 ; data in r1 cmp r5, r1 ; compare ; jmpr cc_eq, +3 ; skip the error inc and the SGIO! jmpr cc_eq, +2 ; skip the error inc add r7, c1, r7 ; count up the errors #else ;jmpr cc_busy, 0 #endif sgio r1, r0 #ifdef debug sra+ r0 ; debugging nop sra+ r1 #endif jmp cc_uncond, conf_man_un_next conf_man_un_exit: #ifdef debugT #ifdef cnf_compare jmpr cc_busy, 0 iext cmp_errors_un sgio r7, cmp_errors_un ; c11 of CPU0 in GIO #endif lpio CTPDOUT, r6 ; the number of CPU clocks jmpr cc_busy, 0 iext cpu_clocks_un sgio r6, cpu_clocks_un #endif sub r14, c1, r14 jmp cc_uncond, crc32 ; calculate crc32 conf_man_un_lp: #ifdef debugT lpio CTPDOUT, r6 ; the number of CPU clocks after CRC32 jmpr cc_busy, 0 iext cpu_clocks_crc sgio r6, cpu_clocks_crc #endif ; low power shl 4, c12, r0 ; event counter << 2 or r0, c3, r0 ; (event counter << 2) | 3 iext LP_REP mov LP_REP, r1 add r1, c5, r1 ; + CPU# jmpr cc_busy, 0 sgio r0, r1 mov cmd_lp, r0 jmpr cc_busy, 0 sgio r0, SMCMD ; go to low power, will be changed later jmpr cc_busy, 0 jmpr cc_uncond, 0 nop ; execution time about 400 us ; input: r14 points to the CRC32 word ; r13 points to the packed conf. block (typically 0xf000) crc32: mov r14, r2 ; r14 last value points to the CRC32 word mov r13, r14 ; the GIO access register with autoincrement lgio+ 0 ; request the first word from DBANK mov c7, r1 ; the CRC32 register is r1, now initilised with FFF..FF iext 0x04C11DB7 mov 0x04C11DB7, r3 ; the generator Poly jmpr cc_busy, 0 crc32_next_word: mov 32, r5 ; init the bit counter lpio GBUSR0, r0 ; hold the data, requested before lgio+ 0 ; request the next word crc32_next_bit: shl -1, r0, r0 ; shift right r0, carry=LSB adc r1, r1, r1 ; shift left r1 and add the carry jmpr cc_ncarry, +2 ; skip the xor if now carry-out xor r1, r3, r1 ; xor with the generator Poly sub r5, c1, r5 ; dec the bit counter jmp cc_nzero, crc32_next_bit ; end of the bit loop cmp r14, r2 ; compare with the end address jmp cc_leu, crc32_next_word ; end of the loop xor r1, c7, r1 ; xor with FF..FF at the end lpio GBUSR0, r0 cmp r1, r0 ; compare the appended and the calculated crc32 sgio+ r1 ; store the calculated crc32 jmp cc_uncond, conf_man_un_lp nop ; pack ; input: in c8 the start address in GIO ; output (typically, dep. on c8): in 0xf000..0xf0d2 ; packing time 89 us conf_man_pk: #ifdef debugT mov 0x640, r0 spio r0, CTPCTRL mov 0, r0 spio r0, CTPDINI #endif ; initialization #ifdef debug mov 0 , r15 ; the DMEM pointer, used in debug mode #endif iext 0xFFFF mov 0xFFFF, mask_FFFF and mask_FFFF, cnst_sa, r14 ; the GIO access register with autoincrement mov 0x1F, mask_1F ; prepare the masks needed later mov 0xFF, mask_FF ; start loop - next block/single conf_man_pk_next: jmpr cc_busy, 0 lgio+ 0 ; read the next block/single jmpr cc_busy, 0 lpio GBUSR0, r0 shl -1, r0, r0 ; check the 0th bit jmp cc_carry, conf_man_pk_sng ; jump to single shl -7, r0, r1 ; shift right the number of words and mask_FF, r1, r1 ; number of words in r1 jmp cc_zero, conf_man_pk_exit ; exit if 0 shl -2, r0, r2 ; shift the width-1 to the right and mask_1F, r2, r2 ; width-1 in r2 add r2, c1, r2 ; width in r2 and r0, c3, r3 ; step in r3 shl -15, r0, r0 ; address, not necessary to mask ; order according to the most frequent appearence!!! cmp r2, 15 jmp cc_eq, conf_man_pk_5_15 ; 15 bit case cmp r2, 10 jmp cc_eq, conf_man_pk_5_15 ; 10 bit case cmp r2, 5 jmp cc_eq, conf_man_pk_5_15 ; 5 bit case cmp r2, 31 jmp cc_eq, conf_man_pk_31 ; 31 bit case cmp r2, 6 jmp cc_eq, conf_man_pk_5_15 ; 6 bit case cmp r2, 7 jmp cc_eq, conf_man_pk_5_15 ; 7 bit case ; if no one of the cases above => exit jmp cc_uncond, conf_man_pk_exit ; exit ; 5,6,7,10,15 bit cases conf_man_pk_5_15: ; step in r3 ; width in r2 ; address in r0 ; number of words in r1 shl r2, c1, mask_data sub mask_data, c1, mask_data ; the mask for the data conf_man_pk_rp: mov 0, r8 ; bit counter mov 0, r5 ; packed data register conf_man_pk_rd: jmpr cc_busy, 0 lgio 0, r0 ; request the next data add r0, r3, r0 ; increment the write address shl -1, r8, r7 jmpr cc_busy, 0 lpio GBUSR0, r4 ; read data and r4, mask_data, r4 ; mask the upper bits cmp r8, 15 jmp cc_gtu, conf_man_pk_rdl conf_man_pk_rds: shl r8, r4, r4 ; r4 = r4 << bitcnt (direct, as bitcnt < 16) conf_man_pk_ns: or r4, r5, r5 ; r5 = r5 | r4 sub r1, c1, r1 ; remaining number of registers jmp cc_zero, conf_man_pk_wr ; no more registers in the block, write end exit add r8, r2, r8 ; increment the bit counter cmp r8, 27 jmp cc_ltu, conf_man_pk_rd ; more bits available in the word, read the next reg ; no more bits, write the packed registers shl 1, r5, r5 ; r5 = (r5 << 1) | 1 or r5, c1, r5 sgio+ r5 ; store jmp cc_uncond, conf_man_pk_rp ; read next group of CNF reg conf_man_pk_rdl: shl r7, r4, r4 ; shift right in two steps sub r8, r7, r7 shl r7, r4, r4 ; r4 = r4 << bitcnt jmp cc_uncond, conf_man_pk_ns ; write and exit conf_man_pk_wr: shl 1, r5, r5 ; r5 = (r5 << 1) | 1 or r5, c1, r5 sgio+ r5 jmp cc_uncond, conf_man_pk_next ; next block/single ; 31 bit case conf_man_pk_31: ; step in r3 ; address in r0 ; number of words in r1 jmpr cc_busy, 0 lgio 0, r0 ; request the next data add r0, r3, r0 ; increment the write address jmpr cc_busy, 0 lpio GBUSR0, r4 ; read data shl 1, r4, r4 ; shift right, the LSB is always 1 or r4, c1, r4 sgio+ r4 sub r1, c1, r1 ; decrement the number of words jmp cc_nzero, conf_man_pk_31 ; next 32 bit word jmp cc_uncond, conf_man_pk_next ; next block/single ; single case conf_man_pk_sng: ; here we have in r0 the data shifted to the right shl -11, r0, r1 ;[30..17] contain the address shl -6, r1, r1 ; address in r1 jmpr cc_busy, 0 lgio 0, r1 and r0, c1, r3 ; copy the flag32 in r3 sub r14, c1, r14 ; decrease r14, as was autoincremented, but in single case ; we need to write on the same address! jmpr cc_busy, 0 lpio GBUSR0, r4 ; read data shl 14, r1, r5 ; r5 = (addr << 14) shl 2, r5, r5 ; r5 = (addr << 16) and r4, mask_FFFF, r2 ; r2 = Lo(data) or r5, r2, r5 ; (addr << 16) | (data & 0xFFFF) shl 1, r5, r5 or r5, r3, r5 ; (addr << 17) | ((data & 0xFFFF) << 1) | flag32 shl 1, r5, r5 or r5, c1, r5 ; (addr << 18) | ((data & 0xFFFF) << 2) | (flag32 << 1) | 1 sgio+ r5 and r3, c1, r3 ; check if > 16 bits jmp cc_zero, conf_man_pk_next ; exit if not ; > 16 bits single or r4, c1, r5 jmpr cc_busy, 0 sgio+ r5 jmp cc_uncond, conf_man_pk_next conf_man_pk_exit: #ifdef debugT lpio CTPDOUT, r6 ; the number of CPU clocks jmpr cc_busy, 0 iext cpu_clocks_pk sgio r6, cpu_clocks_pk #else nop #endif ; sub r14, c1, r14 ; jmp cc_uncond, crc32 ; calculate crc32 conf_man_pk_lp: ; #ifdef debugT ; lpio CTPDOUT, r6 ; the number of CPU clocks after CRC32 ; jmpr cc_busy, 0 ; iext cpu_clocks_crc ; sgio r6, cpu_clocks_crc ; #endif ; low power shl 4, c12, r0 ; event counter << 2 or r0, c3, r0 ; (event counter << 2) | 3 iext LP_REP mov LP_REP, r1 add r1, c5, r1 ; + CPU# jmpr cc_busy, 0 sgio r0, r1 mov cmd_lp, r0 jmpr cc_busy, 0 sgio r0, SMCMD ; go to low power, will be changed later jmpr cc_busy, 0 jmpr cc_uncond, 0 nop