diff options
Diffstat (limited to 'lib/lib8tion/scale8.h')
-rw-r--r-- | lib/lib8tion/scale8.h | 542 |
1 files changed, 0 insertions, 542 deletions
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h deleted file mode 100644 index 9895fd4d79..0000000000 --- a/lib/lib8tion/scale8.h +++ /dev/null @@ -1,542 +0,0 @@ -#ifndef __INC_LIB8TION_SCALE_H -#define __INC_LIB8TION_SCALE_H - -///@ingroup lib8tion - -///@defgroup Scaling Scaling functions -/// Fast, efficient 8-bit scaling functions specifically -/// designed for high-performance LED programming. -/// -/// Because of the AVR(Arduino) and ARM assembly language -/// implementations provided, using these functions often -/// results in smaller and faster code than the equivalent -/// program using plain "C" arithmetic and logic. -///@{ - -/// scale one byte by a second one, which is treated as -/// the numerator of a fraction whose denominator is 256 -/// In other words, it computes i * (scale / 256) -/// 4 clocks AVR with MUL, 2 clocks ARM -LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale) -{ -#if SCALE8_C == 1 -#if (FASTLED_SCALE8_FIXED == 1) - return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8; -#else - return ((uint16_t)i * (uint16_t)(scale) ) >> 8; -#endif -#elif SCALE8_AVRASM == 1 -#if defined(LIB8_ATTINY) -#if (FASTLED_SCALE8_FIXED == 1) - uint8_t work=i; -#else - uint8_t work=0; -#endif - uint8_t cnt=0x80; - asm volatile( -#if (FASTLED_SCALE8_FIXED == 1) - " inc %[scale] \n\t" - " breq DONE_%= \n\t" - " clr %[work] \n\t" -#endif - "LOOP_%=: \n\t" - /*" sbrc %[scale], 0 \n\t" - " add %[work], %[i] \n\t" - " ror %[work] \n\t" - " lsr %[scale] \n\t" - " clc \n\t"*/ - " sbrc %[scale], 0 \n\t" - " add %[work], %[i] \n\t" - " ror %[work] \n\t" - " lsr %[scale] \n\t" - " lsr %[cnt] \n\t" - "brcc LOOP_%= \n\t" - "DONE_%=: \n\t" - : [work] "+r" (work), [cnt] "+r" (cnt) - : [scale] "r" (scale), [i] "r" (i) - : - ); - return work; -#else - asm volatile( -#if (FASTLED_SCALE8_FIXED==1) - // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 - "mul %0, %1 \n\t" - // Add i to r0, possibly setting the carry flag - "add r0, %0 \n\t" - // load the immediate 0 into i (note, this does _not_ touch any flags) - "ldi %0, 0x00 \n\t" - // walk and chew gum at the same time - "adc %0, r1 \n\t" -#else - /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ - "mul %0, %1 \n\t" - /* Move the high 8-bits of the product (r1) back to i */ - "mov %0, r1 \n\t" - /* Restore r1 to "0"; it's expected to always be that */ -#endif - "clr __zero_reg__ \n\t" - - : "+a" (i) /* writes to i */ - : "a" (scale) /* uses scale */ - : "r0", "r1" /* clobbers r0, r1 */ ); - - /* Return the result */ - return i; -#endif -#else -#error "No implementation for scale8 available." -#endif -} - - -/// The "video" version of scale8 guarantees that the output will -/// be only be zero if one or both of the inputs are zero. If both -/// inputs are non-zero, the output is guaranteed to be non-zero. -/// This makes for better 'video'/LED dimming, at the cost of -/// several additional cycles. -LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale) -{ -#if SCALE8_C == 1 || defined(LIB8_ATTINY) - uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); - // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; - // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; - return j; -#elif SCALE8_AVRASM == 1 - uint8_t j=0; - asm volatile( - " tst %[i]\n\t" - " breq L_%=\n\t" - " mul %[i], %[scale]\n\t" - " mov %[j], r1\n\t" - " clr __zero_reg__\n\t" - " cpse %[scale], r1\n\t" - " subi %[j], 0xFF\n\t" - "L_%=: \n\t" - : [j] "+a" (j) - : [i] "a" (i), [scale] "a" (scale) - : "r0", "r1"); - - return j; - // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; - // asm volatile( - // " tst %0 \n" - // " breq L_%= \n" - // " mul %0, %1 \n" - // " mov %0, r1 \n" - // " add %0, %2 \n" - // " clr __zero_reg__ \n" - // "L_%=: \n" - - // : "+a" (i) - // : "a" (scale), "a" (nonzeroscale) - // : "r0", "r1"); - - // // Return the result - // return i; -#else -#error "No implementation for scale8_video available." -#endif -} - - -/// This version of scale8 does not clean up the R1 register on AVR -/// If you are doing several 'scale8's in a row, use this, and -/// then explicitly call cleanup_R1. -LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) -{ -#if SCALE8_C == 1 -#if (FASTLED_SCALE8_FIXED == 1) - return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8; -#else - return ((int)i * (int)(scale) ) >> 8; -#endif -#elif SCALE8_AVRASM == 1 - asm volatile( - #if (FASTLED_SCALE8_FIXED==1) - // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 - "mul %0, %1 \n\t" - // Add i to r0, possibly setting the carry flag - "add r0, %0 \n\t" - // load the immediate 0 into i (note, this does _not_ touch any flags) - "ldi %0, 0x00 \n\t" - // walk and chew gum at the same time - "adc %0, r1 \n\t" - #else - /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ - "mul %0, %1 \n\t" - /* Move the high 8-bits of the product (r1) back to i */ - "mov %0, r1 \n\t" - #endif - /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ - /* "clr __zero_reg__ \n\t" */ - - : "+a" (i) /* writes to i */ - : "a" (scale) /* uses scale */ - : "r0", "r1" /* clobbers r0, r1 */ ); - - // Return the result - return i; -#else -#error "No implementation for scale8_LEAVING_R1_DIRTY available." -#endif -} - - -/// This version of scale8_video does not clean up the R1 register on AVR -/// If you are doing several 'scale8_video's in a row, use this, and -/// then explicitly call cleanup_R1. -LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) -{ -#if SCALE8_C == 1 || defined(LIB8_ATTINY) - uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); - // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; - // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; - return j; -#elif SCALE8_AVRASM == 1 - uint8_t j=0; - asm volatile( - " tst %[i]\n\t" - " breq L_%=\n\t" - " mul %[i], %[scale]\n\t" - " mov %[j], r1\n\t" - " breq L_%=\n\t" - " subi %[j], 0xFF\n\t" - "L_%=: \n\t" - : [j] "+a" (j) - : [i] "a" (i), [scale] "a" (scale) - : "r0", "r1"); - - return j; - // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; - // asm volatile( - // " tst %0 \n" - // " breq L_%= \n" - // " mul %0, %1 \n" - // " mov %0, r1 \n" - // " add %0, %2 \n" - // " clr __zero_reg__ \n" - // "L_%=: \n" - - // : "+a" (i) - // : "a" (scale), "a" (nonzeroscale) - // : "r0", "r1"); - - // // Return the result - // return i; -#else -#error "No implementation for scale8_video_LEAVING_R1_DIRTY available." -#endif -} - -/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls -LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void) -{ -#if CLEANUP_R1_AVRASM == 1 - // Restore r1 to "0"; it's expected to always be that - asm volatile( "clr __zero_reg__ \n\t" : : : "r1" ); -#endif -} - - -/// scale a 16-bit unsigned value by an 8-bit value, -/// considered as numerator of a fraction whose denominator -/// is 256. In other words, it computes i * (scale / 256) - -LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale ) -{ -#if SCALE16BY8_C == 1 - uint16_t result; -#if FASTLED_SCALE8_FIXED == 1 - result = (i * (1+((uint16_t)scale))) >> 8; -#else - result = (i * scale) / 256; -#endif - return result; -#elif SCALE16BY8_AVRASM == 1 -#if FASTLED_SCALE8_FIXED == 1 - uint16_t result = 0; - asm volatile( - // result.A = HighByte( (i.A x scale) + i.A ) - " mul %A[i], %[scale] \n\t" - " add r0, %A[i] \n\t" - // " adc r1, [zero] \n\t" - // " mov %A[result], r1 \n\t" - " adc %A[result], r1 \n\t" - - // result.A-B += i.B x scale - " mul %B[i], %[scale] \n\t" - " add %A[result], r0 \n\t" - " adc %B[result], r1 \n\t" - - // cleanup r1 - " clr __zero_reg__ \n\t" - - // result.A-B += i.B - " add %A[result], %B[i] \n\t" - " adc %B[result], __zero_reg__ \n\t" - - : [result] "+r" (result) - : [i] "r" (i), [scale] "r" (scale) - : "r0", "r1" - ); - return result; -#else - uint16_t result = 0; - asm volatile( - // result.A = HighByte(i.A x j ) - " mul %A[i], %[scale] \n\t" - " mov %A[result], r1 \n\t" - //" clr %B[result] \n\t" - - // result.A-B += i.B x j - " mul %B[i], %[scale] \n\t" - " add %A[result], r0 \n\t" - " adc %B[result], r1 \n\t" - - // cleanup r1 - " clr __zero_reg__ \n\t" - - : [result] "+r" (result) - : [i] "r" (i), [scale] "r" (scale) - : "r0", "r1" - ); - return result; -#endif -#else - #error "No implementation for scale16by8 available." -#endif -} - -/// scale a 16-bit unsigned value by a 16-bit value, -/// considered as numerator of a fraction whose denominator -/// is 65536. In other words, it computes i * (scale / 65536) - -LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) -{ - #if SCALE16_C == 1 - uint16_t result; -#if FASTLED_SCALE8_FIXED == 1 - result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536; -#else - result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536; -#endif - return result; -#elif SCALE16_AVRASM == 1 -#if FASTLED_SCALE8_FIXED == 1 - // implemented sort of like - // result = ((i * scale) + i ) / 65536 - // - // why not like this, you may ask? - // result = (i * (scale+1)) / 65536 - // the answer is that if scale is 65535, then scale+1 - // will be zero, which is not what we want. - uint32_t result; - asm volatile( - // result.A-B = i.A x scale.A - " mul %A[i], %A[scale] \n\t" - // save results... - // basic idea: - //" mov %A[result], r0 \n\t" - //" mov %B[result], r1 \n\t" - // which can be written as... - " movw %A[result], r0 \n\t" - // Because we're going to add i.A-B to - // result.A-D, we DO need to keep both - // the r0 and r1 portions of the product - // UNlike in the 'unfixed scale8' version. - // So the movw here is needed. - : [result] "=r" (result) - : [i] "r" (i), - [scale] "r" (scale) - : "r0", "r1" - ); - - asm volatile( - // result.C-D = i.B x scale.B - " mul %B[i], %B[scale] \n\t" - //" mov %C[result], r0 \n\t" - //" mov %D[result], r1 \n\t" - " movw %C[result], r0 \n\t" - : [result] "+r" (result) - : [i] "r" (i), - [scale] "r" (scale) - : "r0", "r1" - ); - - const uint8_t zero = 0; - asm volatile( - // result.B-D += i.B x scale.A - " mul %B[i], %A[scale] \n\t" - - " add %B[result], r0 \n\t" - " adc %C[result], r1 \n\t" - " adc %D[result], %[zero] \n\t" - - // result.B-D += i.A x scale.B - " mul %A[i], %B[scale] \n\t" - - " add %B[result], r0 \n\t" - " adc %C[result], r1 \n\t" - " adc %D[result], %[zero] \n\t" - - // cleanup r1 - " clr r1 \n\t" - - : [result] "+r" (result) - : [i] "r" (i), - [scale] "r" (scale), - [zero] "r" (zero) - : "r0", "r1" - ); - - asm volatile( - // result.A-D += i.A-B - " add %A[result], %A[i] \n\t" - " adc %B[result], %B[i] \n\t" - " adc %C[result], %[zero] \n\t" - " adc %D[result], %[zero] \n\t" - : [result] "+r" (result) - : [i] "r" (i), - [zero] "r" (zero) - ); - - result = result >> 16; - return result; -#else - uint32_t result; - asm volatile( - // result.A-B = i.A x scale.A - " mul %A[i], %A[scale] \n\t" - // save results... - // basic idea: - //" mov %A[result], r0 \n\t" - //" mov %B[result], r1 \n\t" - // which can be written as... - " movw %A[result], r0 \n\t" - // We actually don't need to do anything with r0, - // as result.A is never used again here, so we - // could just move the high byte, but movw is - // one clock cycle, just like mov, so might as - // well, in case we want to use this code for - // a generic 16x16 multiply somewhere. - - : [result] "=r" (result) - : [i] "r" (i), - [scale] "r" (scale) - : "r0", "r1" - ); - - asm volatile( - // result.C-D = i.B x scale.B - " mul %B[i], %B[scale] \n\t" - //" mov %C[result], r0 \n\t" - //" mov %D[result], r1 \n\t" - " movw %C[result], r0 \n\t" - : [result] "+r" (result) - : [i] "r" (i), - [scale] "r" (scale) - : "r0", "r1" - ); - - const uint8_t zero = 0; - asm volatile( - // result.B-D += i.B x scale.A - " mul %B[i], %A[scale] \n\t" - - " add %B[result], r0 \n\t" - " adc %C[result], r1 \n\t" - " adc %D[result], %[zero] \n\t" - - // result.B-D += i.A x scale.B - " mul %A[i], %B[scale] \n\t" - - " add %B[result], r0 \n\t" - " adc %C[result], r1 \n\t" - " adc %D[result], %[zero] \n\t" - - // cleanup r1 - " clr r1 \n\t" - - : [result] "+r" (result) - : [i] "r" (i), - [scale] "r" (scale), - [zero] "r" (zero) - : "r0", "r1" - ); - - result = result >> 16; - return result; -#endif -#else - #error "No implementation for scale16 available." -#endif -} -///@} - -///@defgroup Dimming Dimming and brightening functions -/// -/// Dimming and brightening functions -/// -/// The eye does not respond in a linear way to light. -/// High speed PWM'd LEDs at 50% duty cycle appear far -/// brighter then the 'half as bright' you might expect. -/// -/// If you want your midpoint brightness leve (128) to -/// appear half as bright as 'full' brightness (255), you -/// have to apply a 'dimming function'. -///@{ - -/// Adjust a scaling value for dimming -LIB8STATIC uint8_t dim8_raw( uint8_t x) -{ - return scale8( x, x); -} - -/// Adjust a scaling value for dimming for video (value will never go below 1) -LIB8STATIC uint8_t dim8_video( uint8_t x) -{ - return scale8_video( x, x); -} - -/// Linear version of the dimming function that halves for values < 128 -LIB8STATIC uint8_t dim8_lin( uint8_t x ) -{ - if( x & 0x80 ) { - x = scale8( x, x); - } else { - x += 1; - x /= 2; - } - return x; -} - -/// inverse of the dimming function, brighten a value -LIB8STATIC uint8_t brighten8_raw( uint8_t x) -{ - uint8_t ix = 255 - x; - return 255 - scale8( ix, ix); -} - -/// inverse of the dimming function, brighten a value -LIB8STATIC uint8_t brighten8_video( uint8_t x) -{ - uint8_t ix = 255 - x; - return 255 - scale8_video( ix, ix); -} - -/// inverse of the dimming function, brighten a value -LIB8STATIC uint8_t brighten8_lin( uint8_t x ) -{ - uint8_t ix = 255 - x; - if( ix & 0x80 ) { - ix = scale8( ix, ix); - } else { - ix += 1; - ix /= 2; - } - return 255 - ix; -} - -///@} -#endif |