Cycles on instruction in the delay loop on the hand

I'm trying to figure out some kind of builder generated for the stm32f103 chipset using arm-none-eabi-gcc, which seems to work exactly half the speed I expect. I'm not so good at assembler, but since everyone always says they read asm, if you want to understand what your compiler is doing, I can see how far I am. Its simple function:

void delay(volatile uint32_t num) { 
    volatile uint32_t index = 0; 
    for(index = (6000 * num); index != 0; index--) {} 
}

The clock speed is 72 MHz, and the above function gives me a delay of 1 ms, but I expect 0.5 ms (s (6000 * 6) / 72000000 = 0,0005).

Assembler:

delay:
        @ args = 0, pretend = 0, frame = 16
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        sub     sp, sp, #16         stack pointer = stack pointer - 16
        movs    r3, #0              move 0 into r3 and update condition flags
        str     r0, [sp, #4]        store r0 at location stack pointer+4
        str     r3, [sp, #12]       store r3 at location stack pointer+12 
        ldr     r3, [sp, #4]        load r3 with data at location stack pointer+4 
        movw    r2, #6000           move 6000 into r2 (make r2 6000)
        mul     r3, r2, r3          r3 = r2 * r3
        str     r3, [sp, #12]       store r3 at stack pointer+12
        ldr     r3, [sp, #12]       load r3 with data at stack pointer+12
        cbz     r3, .L1             Compare and Branch on Zero
.L4:
        ldr     r3, [sp, #12]   2   load r3 with data at location stack pointer+12
        subs    r3, r3, #1      1   subtract 1 from r3 with 'set APSR flag' if any conditions met
        str     r3, [sp, #12]   2   store r3 at location sp+12 
        ldr     r3, [sp, #12]   2   load r3 with data at location sp+12
        cmp     r3, #0          1   status = 0 - r3 (if r3 is 0, set status flag)
        bne     .L4             1   branch to .L4 if not equal
.L1:
        add     sp, sp, #16         add 16 back to the stack pointer
        @ sp needed
        bx      lr
        .size   delay, .-delay
        .align  2
        .global blink
        .thumb
        .thumb_func
        .type   blink, %function

, , , . , .L4 , 6 . , , , , , , , , , 2 .

: , , 5 ccd, . ( , ), . , , , ( 100 ) , . , , .

, , , , .

: , arm tech ref . . 9 , 12, . ?

TIA,

, ElderBug, , , . , , , 20 , C , , , gcc, , . BTW Elder wait_cycles , . .

+4
2

-, "-" C - . , -O0 ( ), , (EDIT: , , , , volatile, ). C . , ​​, , , ( , , ).

, 1 , , 1 . , bne , . , , . , DMA. , , STR LDR, . STR, LDR ( -O0); MCU "store-to-load", .


1 .

( < 1μs, , 1 ), :

void wait_us( uint32_t us ) {
    uint32_t mark = GET_TIMER();
    us *= TIMER_FREQ/1000000;
    while( us > GET_TIMER() - mark );
}

mark , , . :

uint32_t mark = GET_TIMER();
some_task();
wait_us( mark, 200 );

ARM Cortex-M4 ( ):

#define CYCLES_PER_LOOP 3
inline void wait_cycles( uint32_t n ) {
    uint32_t l = n/CYCLES_PER_LOOP;
    asm volatile( "0:" "SUBS %[count], 1;" "BNE 0b;" :[count]"+r"(l) );
}

, . , CYCLES_PER_LOOP, , MCU ( 1 + 2 SUBS+BNE).

+9

-3, ? ram / ( ), .

, (, sram, ) ram (- ), , , m3 , ( ) , , , , , ram- , ram, , , nops , , (, , ), , , , (, 8 ).

, , , , , , , . , , .

, ( ) , - - - , , - , . gpio , /. gpio, , , , .

cpld, , rev .

+3

Source: https://habr.com/ru/post/1608509/


All Articles