summaryrefslogtreecommitdiffstats
path: root/lib/lib8tion/scale8.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lib8tion/scale8.h')
-rw-r--r--lib/lib8tion/scale8.h542
1 files changed, 542 insertions, 0 deletions
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h
new file mode 100644
index 000000000..9895fd4d7
--- /dev/null
+++ b/lib/lib8tion/scale8.h
@@ -0,0 +1,542 @@
+#ifndef __INC_LIB8TION_SCALE_H
+#define __INC_LIB8TION_SCALE_H
+
+///@ingroup lib8tion
+
+///@defgroup Scaling Scaling functions
+/// Fast, efficient 8-bit scaling functions specifically
+/// designed for high-performance LED programming.
+///
+/// Because of the AVR(Arduino) and ARM assembly language
+/// implementations provided, using these functions often
+/// results in smaller and faster code than the equivalent
+/// program using plain "C" arithmetic and logic.
+///@{
+
+/// scale one byte by a second one, which is treated as
+/// the numerator of a fraction whose denominator is 256
+/// In other words, it computes i * (scale / 256)
+/// 4 clocks AVR with MUL, 2 clocks ARM
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1
+#if (FASTLED_SCALE8_FIXED == 1)
+ return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8;
+#else
+ return ((uint16_t)i * (uint16_t)(scale) ) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+#if defined(LIB8_ATTINY)
+#if (FASTLED_SCALE8_FIXED == 1)
+ uint8_t work=i;
+#else
+ uint8_t work=0;
+#endif
+ uint8_t cnt=0x80;
+ asm volatile(
+#if (FASTLED_SCALE8_FIXED == 1)
+ " inc %[scale] \n\t"
+ " breq DONE_%= \n\t"
+ " clr %[work] \n\t"
+#endif
+ "LOOP_%=: \n\t"
+ /*" sbrc %[scale], 0 \n\t"
+ " add %[work], %[i] \n\t"
+ " ror %[work] \n\t"
+ " lsr %[scale] \n\t"
+ " clc \n\t"*/
+ " sbrc %[scale], 0 \n\t"
+ " add %[work], %[i] \n\t"
+ " ror %[work] \n\t"
+ " lsr %[scale] \n\t"
+ " lsr %[cnt] \n\t"
+ "brcc LOOP_%= \n\t"
+ "DONE_%=: \n\t"
+ : [work] "+r" (work), [cnt] "+r" (cnt)
+ : [scale] "r" (scale), [i] "r" (i)
+ :
+ );
+ return work;
+#else
+ asm volatile(
+#if (FASTLED_SCALE8_FIXED==1)
+ // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+ "mul %0, %1 \n\t"
+ // Add i to r0, possibly setting the carry flag
+ "add r0, %0 \n\t"
+ // load the immediate 0 into i (note, this does _not_ touch any flags)
+ "ldi %0, 0x00 \n\t"
+ // walk and chew gum at the same time
+ "adc %0, r1 \n\t"
+#else
+ /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Move the high 8-bits of the product (r1) back to i */
+ "mov %0, r1 \n\t"
+ /* Restore r1 to "0"; it's expected to always be that */
+#endif
+ "clr __zero_reg__ \n\t"
+
+ : "+a" (i) /* writes to i */
+ : "a" (scale) /* uses scale */
+ : "r0", "r1" /* clobbers r0, r1 */ );
+
+ /* Return the result */
+ return i;
+#endif
+#else
+#error "No implementation for scale8 available."
+#endif
+}
+
+
+/// The "video" version of scale8 guarantees that the output will
+/// be only be zero if one or both of the inputs are zero. If both
+/// inputs are non-zero, the output is guaranteed to be non-zero.
+/// This makes for better 'video'/LED dimming, at the cost of
+/// several additional cycles.
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1 || defined(LIB8_ATTINY)
+ uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
+ // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
+ return j;
+#elif SCALE8_AVRASM == 1
+ uint8_t j=0;
+ asm volatile(
+ " tst %[i]\n\t"
+ " breq L_%=\n\t"
+ " mul %[i], %[scale]\n\t"
+ " mov %[j], r1\n\t"
+ " clr __zero_reg__\n\t"
+ " cpse %[scale], r1\n\t"
+ " subi %[j], 0xFF\n\t"
+ "L_%=: \n\t"
+ : [j] "+a" (j)
+ : [i] "a" (i), [scale] "a" (scale)
+ : "r0", "r1");
+
+ return j;
+ // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ // asm volatile(
+ // " tst %0 \n"
+ // " breq L_%= \n"
+ // " mul %0, %1 \n"
+ // " mov %0, r1 \n"
+ // " add %0, %2 \n"
+ // " clr __zero_reg__ \n"
+ // "L_%=: \n"
+
+ // : "+a" (i)
+ // : "a" (scale), "a" (nonzeroscale)
+ // : "r0", "r1");
+
+ // // Return the result
+ // return i;
+#else
+#error "No implementation for scale8_video available."
+#endif
+}
+
+
+/// This version of scale8 does not clean up the R1 register on AVR
+/// If you are doing several 'scale8's in a row, use this, and
+/// then explicitly call cleanup_R1.
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1
+#if (FASTLED_SCALE8_FIXED == 1)
+ return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
+#else
+ return ((int)i * (int)(scale) ) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+ asm volatile(
+ #if (FASTLED_SCALE8_FIXED==1)
+ // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+ "mul %0, %1 \n\t"
+ // Add i to r0, possibly setting the carry flag
+ "add r0, %0 \n\t"
+ // load the immediate 0 into i (note, this does _not_ touch any flags)
+ "ldi %0, 0x00 \n\t"
+ // walk and chew gum at the same time
+ "adc %0, r1 \n\t"
+ #else
+ /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Move the high 8-bits of the product (r1) back to i */
+ "mov %0, r1 \n\t"
+ #endif
+ /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
+ /* "clr __zero_reg__ \n\t" */
+
+ : "+a" (i) /* writes to i */
+ : "a" (scale) /* uses scale */
+ : "r0", "r1" /* clobbers r0, r1 */ );
+
+ // Return the result
+ return i;
+#else
+#error "No implementation for scale8_LEAVING_R1_DIRTY available."
+#endif
+}
+
+
+/// This version of scale8_video does not clean up the R1 register on AVR
+/// If you are doing several 'scale8_video's in a row, use this, and
+/// then explicitly call cleanup_R1.
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1 || defined(LIB8_ATTINY)
+ uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
+ // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
+ return j;
+#elif SCALE8_AVRASM == 1
+ uint8_t j=0;
+ asm volatile(
+ " tst %[i]\n\t"
+ " breq L_%=\n\t"
+ " mul %[i], %[scale]\n\t"
+ " mov %[j], r1\n\t"
+ " breq L_%=\n\t"
+ " subi %[j], 0xFF\n\t"
+ "L_%=: \n\t"
+ : [j] "+a" (j)
+ : [i] "a" (i), [scale] "a" (scale)
+ : "r0", "r1");
+
+ return j;
+ // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ // asm volatile(
+ // " tst %0 \n"
+ // " breq L_%= \n"
+ // " mul %0, %1 \n"
+ // " mov %0, r1 \n"
+ // " add %0, %2 \n"
+ // " clr __zero_reg__ \n"
+ // "L_%=: \n"
+
+ // : "+a" (i)
+ // : "a" (scale), "a" (nonzeroscale)
+ // : "r0", "r1");
+
+ // // Return the result
+ // return i;
+#else
+#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
+#endif
+}
+
+/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
+LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void)
+{
+#if CLEANUP_R1_AVRASM == 1
+ // Restore r1 to "0"; it's expected to always be that
+ asm volatile( "clr __zero_reg__ \n\t" : : : "r1" );
+#endif
+}
+
+
+/// scale a 16-bit unsigned value by an 8-bit value,
+/// considered as numerator of a fraction whose denominator
+/// is 256. In other words, it computes i * (scale / 256)
+
+LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
+{
+#if SCALE16BY8_C == 1
+ uint16_t result;
+#if FASTLED_SCALE8_FIXED == 1
+ result = (i * (1+((uint16_t)scale))) >> 8;
+#else
+ result = (i * scale) / 256;
+#endif
+ return result;
+#elif SCALE16BY8_AVRASM == 1
+#if FASTLED_SCALE8_FIXED == 1
+ uint16_t result = 0;
+ asm volatile(
+ // result.A = HighByte( (i.A x scale) + i.A )
+ " mul %A[i], %[scale] \n\t"
+ " add r0, %A[i] \n\t"
+ // " adc r1, [zero] \n\t"
+ // " mov %A[result], r1 \n\t"
+ " adc %A[result], r1 \n\t"
+
+ // result.A-B += i.B x scale
+ " mul %B[i], %[scale] \n\t"
+ " add %A[result], r0 \n\t"
+ " adc %B[result], r1 \n\t"
+
+ // cleanup r1
+ " clr __zero_reg__ \n\t"
+
+ // result.A-B += i.B
+ " add %A[result], %B[i] \n\t"
+ " adc %B[result], __zero_reg__ \n\t"
+
+ : [result] "+r" (result)
+ : [i] "r" (i), [scale] "r" (scale)
+ : "r0", "r1"
+ );
+ return result;
+#else
+ uint16_t result = 0;
+ asm volatile(
+ // result.A = HighByte(i.A x j )
+ " mul %A[i], %[scale] \n\t"
+ " mov %A[result], r1 \n\t"
+ //" clr %B[result] \n\t"
+
+ // result.A-B += i.B x j
+ " mul %B[i], %[scale] \n\t"
+ " add %A[result], r0 \n\t"
+ " adc %B[result], r1 \n\t"
+
+ // cleanup r1
+ " clr __zero_reg__ \n\t"
+
+ : [result] "+r" (result)
+ : [i] "r" (i), [scale] "r" (scale)
+ : "r0", "r1"
+ );
+ return result;
+#endif
+#else
+ #error "No implementation for scale16by8 available."
+#endif
+}
+
+/// scale a 16-bit unsigned value by a 16-bit value,
+/// considered as numerator of a fraction whose denominator
+/// is 65536. In other words, it computes i * (scale / 65536)
+
+LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
+{
+ #if SCALE16_C == 1
+ uint16_t result;
+#if FASTLED_SCALE8_FIXED == 1
+ result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536;
+#else
+ result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
+#endif
+ return result;
+#elif SCALE16_AVRASM == 1
+#if FASTLED_SCALE8_FIXED == 1
+ // implemented sort of like
+ // result = ((i * scale) + i ) / 65536
+ //
+ // why not like this, you may ask?
+ // result = (i * (scale+1)) / 65536
+ // the answer is that if scale is 65535, then scale+1
+ // will be zero, which is not what we want.
+ uint32_t result;
+ asm volatile(
+ // result.A-B = i.A x scale.A
+ " mul %A[i], %A[scale] \n\t"
+ // save results...
+ // basic idea:
+ //" mov %A[result], r0 \n\t"
+ //" mov %B[result], r1 \n\t"
+ // which can be written as...
+ " movw %A[result], r0 \n\t"
+ // Because we're going to add i.A-B to
+ // result.A-D, we DO need to keep both
+ // the r0 and r1 portions of the product
+ // UNlike in the 'unfixed scale8' version.
+ // So the movw here is needed.
+ : [result] "=r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale)
+ : "r0", "r1"
+ );
+
+ asm volatile(
+ // result.C-D = i.B x scale.B
+ " mul %B[i], %B[scale] \n\t"
+ //" mov %C[result], r0 \n\t"
+ //" mov %D[result], r1 \n\t"
+ " movw %C[result], r0 \n\t"
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale)
+ : "r0", "r1"
+ );
+
+ const uint8_t zero = 0;
+ asm volatile(
+ // result.B-D += i.B x scale.A
+ " mul %B[i], %A[scale] \n\t"
+
+ " add %B[result], r0 \n\t"
+ " adc %C[result], r1 \n\t"
+ " adc %D[result], %[zero] \n\t"
+
+ // result.B-D += i.A x scale.B
+ " mul %A[i], %B[scale] \n\t"
+
+ " add %B[result], r0 \n\t"
+ " adc %C[result], r1 \n\t"
+ " adc %D[result], %[zero] \n\t"
+
+ // cleanup r1
+ " clr r1 \n\t"
+
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale),
+ [zero] "r" (zero)
+ : "r0", "r1"
+ );
+
+ asm volatile(
+ // result.A-D += i.A-B
+ " add %A[result], %A[i] \n\t"
+ " adc %B[result], %B[i] \n\t"
+ " adc %C[result], %[zero] \n\t"
+ " adc %D[result], %[zero] \n\t"
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [zero] "r" (zero)
+ );
+
+ result = result >> 16;
+ return result;
+#else
+ uint32_t result;
+ asm volatile(
+ // result.A-B = i.A x scale.A
+ " mul %A[i], %A[scale] \n\t"
+ // save results...
+ // basic idea:
+ //" mov %A[result], r0 \n\t"
+ //" mov %B[result], r1 \n\t"
+ // which can be written as...
+ " movw %A[result], r0 \n\t"
+ // We actually don't need to do anything with r0,
+ // as result.A is never used again here, so we
+ // could just move the high byte, but movw is
+ // one clock cycle, just like mov, so might as
+ // well, in case we want to use this code for
+ // a generic 16x16 multiply somewhere.
+
+ : [result] "=r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale)
+ : "r0", "r1"
+ );
+
+ asm volatile(
+ // result.C-D = i.B x scale.B
+ " mul %B[i], %B[scale] \n\t"
+ //" mov %C[result], r0 \n\t"
+ //" mov %D[result], r1 \n\t"
+ " movw %C[result], r0 \n\t"
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale)
+ : "r0", "r1"
+ );
+
+ const uint8_t zero = 0;
+ asm volatile(
+ // result.B-D += i.B x scale.A
+ " mul %B[i], %A[scale] \n\t"
+
+ " add %B[result], r0 \n\t"
+ " adc %C[result], r1 \n\t"
+ " adc %D[result], %[zero] \n\t"
+
+ // result.B-D += i.A x scale.B
+ " mul %A[i], %B[scale] \n\t"
+
+ " add %B[result], r0 \n\t"
+ " adc %C[result], r1 \n\t"
+ " adc %D[result], %[zero] \n\t"
+
+ // cleanup r1
+ " clr r1 \n\t"
+
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale),
+ [zero] "r" (zero)
+ : "r0", "r1"
+ );
+
+ result = result >> 16;
+ return result;
+#endif
+#else
+ #error "No implementation for scale16 available."
+#endif
+}
+///@}
+
+///@defgroup Dimming Dimming and brightening functions
+///
+/// Dimming and brightening functions
+///
+/// The eye does not respond in a linear way to light.
+/// High speed PWM'd LEDs at 50% duty cycle appear far
+/// brighter then the 'half as bright' you might expect.
+///
+/// If you want your midpoint brightness leve (128) to
+/// appear half as bright as 'full' brightness (255), you
+/// have to apply a 'dimming function'.
+///@{
+
+/// Adjust a scaling value for dimming
+LIB8STATIC uint8_t dim8_raw( uint8_t x)
+{
+ return scale8( x, x);
+}
+
+/// Adjust a scaling value for dimming for video (value will never go below 1)
+LIB8STATIC uint8_t dim8_video( uint8_t x)
+{
+ return scale8_video( x, x);
+}
+
+/// Linear version of the dimming function that halves for values < 128
+LIB8STATIC uint8_t dim8_lin( uint8_t x )
+{
+ if( x & 0x80 ) {
+ x = scale8( x, x);
+ } else {
+ x += 1;
+ x /= 2;
+ }
+ return x;
+}
+
+/// inverse of the dimming function, brighten a value
+LIB8STATIC uint8_t brighten8_raw( uint8_t x)
+{
+ uint8_t ix = 255 - x;
+ return 255 - scale8( ix, ix);
+}
+
+/// inverse of the dimming function, brighten a value
+LIB8STATIC uint8_t brighten8_video( uint8_t x)
+{
+ uint8_t ix = 255 - x;
+ return 255 - scale8_video( ix, ix);
+}
+
+/// inverse of the dimming function, brighten a value
+LIB8STATIC uint8_t brighten8_lin( uint8_t x )
+{
+ uint8_t ix = 255 - x;
+ if( ix & 0x80 ) {
+ ix = scale8( ix, ix);
+ } else {
+ ix += 1;
+ ix /= 2;
+ }
+ return 255 - ix;
+}
+
+///@}
+#endif