Revision 1126
Saved 53%-62% processor time on orbs sorting
lights.c | ||
---|---|---|
176 | 176 |
// ** Types ** |
177 | 177 |
// *********** |
178 | 178 |
|
179 |
struct pwm_channel_t |
|
179 |
struct pwm_channel_t // 2 bytes
|
|
180 | 180 |
{ |
181 | 181 |
uint8_t time; |
182 | 182 |
uint8_t mask; |
183 | 183 |
}; |
184 | 184 |
|
185 |
struct pwm_t |
|
185 |
struct pwm_t // 13 bytes
|
|
186 | 186 |
{ |
187 | 187 |
uint8_t init_mask; |
188 | 188 |
struct pwm_channel_t channel[num_pwm_channels]; |
... | ... | |
199 | 199 |
// The PWM channels and the buffer pointers. This data structure is triple |
200 | 200 |
// buffered, see above for the reasons. |
201 | 201 |
struct pwm_t pwm_buffer[3]; |
202 |
// TODO using pointers might be faster (or might be slower because pointers are |
|
203 |
// 16 bit long). |
|
204 | 202 |
struct pwm_t *pwm_read_buffer =&pwm_buffer[0]; // The front buffer the ISR reads from. Other thread may not touch this pointer or the buffer it points to. |
205 | 203 |
struct pwm_t *pwm_write_buffer=&pwm_buffer[1]; // The back buffer we can write to. The ISR may not touch this pointer or the buffer it points to. |
206 | 204 |
struct pwm_t *pwm_free_buffer =&pwm_buffer[2]; // The back buffer to flip with. |
... | ... | |
211 | 209 |
// pwm_channels. |
212 | 210 |
uint8_t orb_values[NUM_ORBS][NUM_COLORS]; |
213 | 211 |
|
214 |
// TODO: random note: what happens if the main process is sorting and a different |
|
215 |
// interrupt calls set_orb? |
|
216 | 212 |
|
217 |
|
|
218 | 213 |
// **************** |
219 | 214 |
// ** Timer ISRs ** |
220 | 215 |
// **************** |
... | ... | |
222 | 217 |
// Not volatile - only accessed in the interrupt handler |
223 | 218 |
uint8_t current_pwm_channel=0; |
224 | 219 |
|
225 |
uint8_t lastim=0; |
|
226 |
|
|
227 | 220 |
SIGNAL (SIG_OVERFLOW0) |
228 | 221 |
{ |
229 | 222 |
PORTF|=4; |
... | ... | |
259 | 252 |
PORTF|=4; |
260 | 253 |
// TODO: |
261 | 254 |
// - delayed interrupt |
262 |
// - synchronization OK? |
|
263 | 255 |
|
264 | 256 |
// If the interrupt is executed w/o delay, TCNT0 == time+1 (and TIME=OCR0) |
265 | 257 |
|
266 | 258 |
// TODO improve (check overflow; maybe use return after last, maybe use |
267 | 259 |
// pointers instead of indicies) |
260 |
// (but find out interrupt time before to measure improvement) |
|
268 | 261 |
while (TCNT0==pwm_read_buffer->channel[current_pwm_channel].time+1) |
269 | 262 |
{ |
270 | 263 |
// Turn the current channel off |
... | ... | |
287 | 280 |
// ** Internal orb setting functions ** |
288 | 281 |
// ************************************ |
289 | 282 |
|
290 |
#define SYNC_START uint8_t tmp_sreg; do { tmp_sreg=SREG; cli (); } while (false) |
|
291 |
#define SYNC_RESTART do { tmp_sreg=SREG; cli (); } while (false) |
|
292 |
#define SYNC_END do { SREG=tmp_sreg; } while (false) |
|
283 |
static void sort_orbs_buffer (void) |
|
284 |
{ |
|
285 |
// This function applies an optimized bubble sort. TODO document, and other |
|
286 |
// methods would probably not be faster. |
|
287 |
// Considering the low number of data points, more |
|
288 |
// sophisticated algorithms are unlikely to be faster, |
|
289 |
// especially as this function is fairly optimized. |
|
290 |
|
|
291 |
// Macro to swap two values of any type. Requires a temp variable of the |
|
292 |
// appropriate type. |
|
293 |
#define swap(a,b) { temp=a; a=b; b=temp; } |
|
293 | 294 |
|
295 |
// Macro to do one bubble sorting step (compare & swap) |
|
296 |
#define bubble \ |
|
297 |
if(a->time > b->time) \ |
|
298 |
{ \ |
|
299 |
swap (a->time, b->time); \ |
|
300 |
swap (a->mask, b->mask); \ |
|
301 |
done=false; \ |
|
302 |
} |
|
303 |
|
|
304 |
// Macro to move to the next bubble sort pair |
|
305 |
#define next { a++; b++; } |
|
306 |
|
|
307 |
// Whether no change was made during the last run, which means that all |
|
308 |
// values are in correct order. |
|
309 |
bool done; |
|
310 |
|
|
311 |
// A temporary variable for swapping. |
|
312 |
uint8_t temp; |
|
313 |
|
|
314 |
// Precompute the first PWM channel (tested faster). |
|
315 |
struct pwm_channel_t *first=&(pwm_write_buffer->channel[0]); |
|
316 |
|
|
317 |
// Pointers to the two PWM channels under inspection |
|
318 |
struct pwm_channel_t *a, *b; |
|
319 |
|
|
320 |
// The actual sorting |
|
321 |
|
|
322 |
a=first; b=a+1; done=true; |
|
323 |
bubble next bubble next bubble next bubble next bubble |
|
324 |
if (done) return; |
|
325 |
|
|
326 |
a=first; b=a+1; done=true; |
|
327 |
bubble next bubble next bubble next bubble |
|
328 |
if (done) return; |
|
329 |
|
|
330 |
a=first; b=a+1; done=true; |
|
331 |
bubble next bubble next bubble |
|
332 |
if (done) return; |
|
333 |
|
|
334 |
a=first; b=a+1; done=true; |
|
335 |
bubble next bubble |
|
336 |
if (done) return; |
|
337 |
|
|
338 |
a=first; b=a+1; done=true; |
|
339 |
bubble |
|
340 |
if (done) return; |
|
341 |
|
|
342 |
// Undefine the macros so they do not disturb some other function. |
|
343 |
#undef next |
|
344 |
#undef bubble |
|
345 |
#undef swap |
|
346 |
} |
|
347 |
|
|
348 |
static void fill_orbs_buffer (void) |
|
349 |
{ |
|
350 |
#define copy_value(orb, color) \ |
|
351 |
index=NUM_COLORS*orb+color; \ |
|
352 |
time=orb_values[orb][color]; \ |
|
353 |
mask=orb_mask[orb][color]; \ |
|
354 |
\ |
|
355 |
pwm_write_buffer->channel[index].time=time-1; \ |
|
356 |
pwm_write_buffer->channel[index].mask=mask; \ |
|
357 |
\ |
|
358 |
if (time!=0) \ |
|
359 |
pwm_write_buffer->init_mask &= ~mask; \ |
|
360 |
|
|
361 |
// TODO try using pointers. Might make it even faster. |
|
362 |
uint8_t index, time, mask; |
|
363 |
copy_value(0,0); copy_value(0,1); copy_value(0,2); |
|
364 |
copy_value(1,0); copy_value(1,1); copy_value(1,2); |
|
365 |
|
|
366 |
#undef copy_value |
|
367 |
} |
|
368 |
|
|
294 | 369 |
static void apply_orbs (void) |
295 | 370 |
{ |
371 |
// Time for apply_orbs (enable_orb_pwm block only), with interrupts disabled |
|
372 |
// (difference to naive bs): |
|
373 |
// Correct order Reverse order |
|
374 |
// Naive bubble sort: 147 216 |
|
375 |
// Aborting bubble sort: 70 (-52%) 231 (+7%) |
|
376 |
// Aborting w/ top: 72 (-51%) 188 (-13%) |
|
377 |
|
|
378 |
// Rolled out with aborting: 60 (-59%) 119 (-45%) |
|
379 |
// +pointers: 61 (-59%) 97 (-55%) |
|
380 |
// Also unrolled copy loop: 34 (-77%) 71 (-67%) (turns out 27us were spent on loop overhead) |
|
381 |
|
|
382 |
// Improvement of rolling out + pointers: -53%/-62% |
|
383 |
|
|
296 | 384 |
if (enable_orb_pwm) |
297 | 385 |
{ |
298 | 386 |
// PWM mode |
... | ... | |
303 | 391 |
pwm_write_buffer->init_mask=~0; |
304 | 392 |
|
305 | 393 |
// 1. Write the orb values and corresponding masks to the pwm channels |
306 |
// array unsorted |
|
307 |
|
|
308 |
for (uint8_t orb=0; orb<2; ++orb) |
|
309 |
{ |
|
310 |
for (uint8_t color=0; color<3; ++color) |
|
311 |
{ |
|
312 |
// TODO this should be faster w/o multiplication |
|
313 |
uint8_t index=NUM_COLORS*orb+color; |
|
314 |
uint8_t time=orb_values[orb][color]; |
|
315 |
uint8_t mask=orb_mask[orb][color]; |
|
316 |
|
|
317 |
pwm_write_buffer->channel[index].time=time-1; |
|
318 |
pwm_write_buffer->channel[index].mask=mask; |
|
319 |
|
|
320 |
// If the channel is not off (0), it has to be turned on |
|
321 |
// (low) at the beginning of the PWM cycle. |
|
322 |
if (time!=0) |
|
323 |
pwm_write_buffer->init_mask &= ~mask; |
|
324 |
} |
|
325 |
} |
|
394 |
// array unsorted. |
|
395 |
fill_orbs_buffer (); |
|
326 | 396 |
|
327 |
// 2. Sort the values. Use bubble sort. |
|
328 |
// Considering the low number of data points, more |
|
329 |
// sophisticated algorithms are unlikely to be faster. |
|
330 |
bool done; |
|
331 |
// For 6 channels, we count i=0..4 and compare i with i+1 |
|
332 |
uint8_t top=num_pwm_channels-1; |
|
333 |
do |
|
334 |
{ |
|
335 |
done=true; // We are done unless we do some swapping |
|
336 |
|
|
337 |
for (uint8_t i=0; i<top; ++i) |
|
338 |
{ |
|
339 |
#define channel_a pwm_write_buffer->channel[i] |
|
340 |
#define channel_b pwm_write_buffer->channel[i+1] |
|
397 |
// 2. sort the buffer. |
|
398 |
sort_orbs_buffer (); |
|
341 | 399 |
|
342 |
if (channel_a.time>channel_b.time) |
|
343 |
{ |
|
344 |
uint8_t temp; |
|
345 |
|
|
346 |
|
|
347 |
// Swap the times |
|
348 |
temp = channel_a.time; |
|
349 |
channel_a.time = channel_b.time; |
|
350 |
channel_b.time = temp; |
|
351 |
|
|
352 |
// Swap the masks |
|
353 |
temp = channel_a.mask; |
|
354 |
channel_a.mask = channel_b.mask; |
|
355 |
channel_b.mask = temp; |
|
356 |
|
|
357 |
done=false; // No, we're not done yet. |
|
358 |
} |
|
359 |
|
|
360 |
#undef channel_a |
|
361 |
#undef channel_b |
|
362 |
} |
|
363 |
|
|
364 |
// On the next iteration, we need one less comparison |
|
365 |
top--; |
|
366 |
} while (!done); |
|
367 |
|
|
368 |
// Flip the write buffer with the free buffer |
|
400 |
// Flip the write buffer with the free buffer. |
|
369 | 401 |
SYNC |
370 | 402 |
{ |
371 | 403 |
struct pwm_t *temp = pwm_write_buffer; |
... | ... | |
373 | 405 |
pwm_free_buffer = temp; |
374 | 406 |
} |
375 | 407 |
|
376 |
// On the next overflow, do the page flip.
|
|
408 |
// On the next overflow, flip the read buffer with the free buffer.
|
|
377 | 409 |
pwm_page_flip=true; |
378 | 410 |
|
379 | 411 |
PORTF&=~2; |
... | ... | |
585 | 617 |
// ** Initialization ** |
586 | 618 |
// ******************** |
587 | 619 |
|
620 |
void orb_disable_timer (void) |
|
621 |
{ |
|
622 |
TIMSK&=~( _BV(OCIE0) | _BV(TOIE0)); |
|
623 |
} |
|
624 |
|
|
625 |
|
|
626 |
void orb_enable_timer (void) |
|
627 |
{ |
|
628 |
TIMSK|= _BV(OCIE0) | _BV(TOIE0); |
|
629 |
} |
|
630 |
|
|
588 | 631 |
/** |
589 | 632 |
* Initializes the PWM for Orb control. This must be called before |
590 | 633 |
* the orbs are used for them to function. |
... | ... | |
624 | 667 |
TCCR0=_BV(CS02) | _BV(CS01); // 1024, 30 Hz |
625 | 668 |
|
626 | 669 |
// Enable compare match and overflow interrupts |
627 |
TIMSK=_BV(OCIE0) | _BV(TOIE0);
|
|
670 |
orb_enable_timer ();
|
|
628 | 671 |
|
629 | 672 |
// Debug |
630 | 673 |
DDRF=6; |
... | ... | |
661 | 704 |
// } |
662 | 705 |
} |
663 | 706 |
|
664 |
// Pure sorting time/us (interrupts disabled!) (difference to naive bs): |
|
665 |
// Correct order Reverse order |
|
666 |
// Naive bubble sort: 147 216 |
|
667 |
// Aborting bubble sort: 70 (-52%) 231 (+7%) |
|
668 |
// Aborting w/ top: 72 (-51%) 188 (-13%) |
Also available in: Unified diff