This article has the purpose to explain how Atmel Studio converts C/C++ language software into Assembly code, and then into a HEX file. In a previous article, I already explained how to compile a simple C program using Atmel Studio, and flash it on an Arduino Nano equipped with Atmel ATmega328p microcontroller. The "Hello World" program simply blinks LED 13 ON (about 500ms) and OFF (about 2500ms), so the total cycle time is about 3000ms. The project file, including the C file source code, is available here: nano_helloworld.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#include <avr/io.h> #define F_CPU 16000000UL // 16 MHz #include <util/delay.h> uint8_t cicli_ON = 5; uint8_t cicli_OFF = 25; uint8_t aspetta_100ms(uint8_t n_cicli){ for (uint8_t i = 0; i<n_cicli; i++){ _delay_ms(100); } return 0; } int main(void) { DDRB = 0xFF; // all output pins /* Replace with your application code */ while (1) { PORTB = 1 << 5; aspetta_100ms(cicli_ON); PORTB = 0; aspetta_100ms(cicli_OFF); } } |
The source code is very simple, as visible above. Inside the main() function, the microcontroller port B is at first initialized as output, by the instruction DDRB=0xFF. Then, inside the while cycle, which is repeated indefinitely, the output pin B5 is continuously turned ON and OFF. After turning ON the pin, the microcontroller waits for 500ms, using the function aspetta_100ms(), which repeats a 100ms delay period for 5 times. After turning OFF the pin, the same function is called and repeats 25 times a 100ms delay period, for a total of 2500ms.
The Assembly source file (created by Atmel Studio) is inside the project ZIP file (nano_helloworld), and its name is "Nano_HelloWorld.lss". The complete Assembly code created is visible below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
Nano_HelloWorld.elf: file format elf32-avr Sections: Idx Name Size VMA LMA File off Algn 0 .data 00000002 00800100 000000d8 0000014c 2**0 CONTENTS, ALLOC, LOAD, DATA 1 .text 000000d8 00000000 00000000 00000074 2**1 CONTENTS, ALLOC, LOAD, READONLY, CODE 2 .comment 00000030 00000000 00000000 0000014e 2**0 CONTENTS, READONLY 3 .note.gnu.avr.deviceinfo 00000040 00000000 00000000 00000180 2**2 CONTENTS, READONLY 4 .debug_aranges 00000028 00000000 00000000 000001c0 2**0 CONTENTS, READONLY, DEBUGGING 5 .debug_info 0000019f 00000000 00000000 000001e8 2**0 CONTENTS, READONLY, DEBUGGING 6 .debug_abbrev 00000123 00000000 00000000 00000387 2**0 CONTENTS, READONLY, DEBUGGING 7 .debug_line 0000013c 00000000 00000000 000004aa 2**0 CONTENTS, READONLY, DEBUGGING 8 .debug_frame 00000034 00000000 00000000 000005e8 2**2 CONTENTS, READONLY, DEBUGGING 9 .debug_str 00000201 00000000 00000000 0000061c 2**0 CONTENTS, READONLY, DEBUGGING 10 .debug_loc 00000088 00000000 00000000 0000081d 2**0 CONTENTS, READONLY, DEBUGGING 11 .debug_ranges 00000018 00000000 00000000 000008a5 2**0 CONTENTS, READONLY, DEBUGGING Disassembly of section .text: 00000000 <__vectors>: 0: 0c 94 34 00 jmp 0x68 ; 0x68 <__ctors_end> 4: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 8: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> c: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 10: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 14: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 18: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 1c: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 20: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 24: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 28: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 2c: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 30: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 34: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 38: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 3c: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 40: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 44: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 48: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 4c: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 50: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 54: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 58: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 5c: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 60: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 64: 0c 94 49 00 jmp 0x92 ; 0x92 <__bad_interrupt> 00000068 <__ctors_end>: 68: 11 24 eor r1, r1 6a: 1f be out 0x3f, r1 ; 63 6c: cf ef ldi r28, 0xFF ; 255 6e: d8 e0 ldi r29, 0x08 ; 8 70: de bf out 0x3e, r29 ; 62 72: cd bf out 0x3d, r28 ; 61 00000074 <__do_copy_data>: 74: 11 e0 ldi r17, 0x01 ; 1 76: a0 e0 ldi r26, 0x00 ; 0 78: b1 e0 ldi r27, 0x01 ; 1 7a: e8 ed ldi r30, 0xD8 ; 216 7c: f0 e0 ldi r31, 0x00 ; 0 7e: 02 c0 rjmp .+4 ; 0x84 <__do_copy_data+0x10> 80: 05 90 lpm r0, Z+ 82: 0d 92 st X+, r0 84: a2 30 cpi r26, 0x02 ; 2 86: b1 07 cpc r27, r17 88: d9 f7 brne .-10 ; 0x80 <__do_copy_data+0xc> 8a: 0e 94 5c 00 call 0xb8 ; 0xb8 <main> 8e: 0c 94 6a 00 jmp 0xd4 ; 0xd4 <_exit> 00000092 <__bad_interrupt>: 92: 0c 94 00 00 jmp 0 ; 0x0 <__vectors> 00000096 <_Z13aspetta_100msh>: uint8_t cicli_ON = 5; uint8_t cicli_OFF = 25; uint8_t aspetta_100ms(uint8_t n_cicli){ for (uint8_t i = 0; i<n_cicli; i++){ 96: 88 23 and r24, r24 98: 69 f0 breq .+26 ; 0xb4 <_Z13aspetta_100msh+0x1e> 9a: 90 e0 ldi r25, 0x00 ; 0 #else //round up by default __ticks_dc = (uint32_t)(ceil(fabs(__tmp))); #endif __builtin_avr_delay_cycles(__ticks_dc); 9c: 2f ef ldi r18, 0xFF ; 255 9e: 31 ee ldi r19, 0xE1 ; 225 a0: 44 e0 ldi r20, 0x04 ; 4 a2: 21 50 subi r18, 0x01 ; 1 a4: 30 40 sbci r19, 0x00 ; 0 a6: 40 40 sbci r20, 0x00 ; 0 a8: e1 f7 brne .-8 ; 0xa2 <_Z13aspetta_100msh+0xc> aa: 00 c0 rjmp .+0 ; 0xac <_Z13aspetta_100msh+0x16> ac: 00 00 nop ae: 9f 5f subi r25, 0xFF ; 255 b0: 98 13 cpse r25, r24 b2: f4 cf rjmp .-24 ; 0x9c <_Z13aspetta_100msh+0x6> _delay_ms(100); } return 0; } b4: 80 e0 ldi r24, 0x00 ; 0 b6: 08 95 ret 000000b8 <main>: int main(void) { DDRB = 0xFF; // all output pins b8: 8f ef ldi r24, 0xFF ; 255 ba: 84 b9 out 0x04, r24 ; 4 /* Replace with your application code */ while (1) { PORTB = 1 << 5; bc: c0 e2 ldi r28, 0x20 ; 32 be: c5 b9 out 0x05, r28 ; 5 aspetta_100ms(cicli_ON); c0: 80 91 01 01 lds r24, 0x0101 c4: 0e 94 4b 00 call 0x96 ; 0x96 <_Z13aspetta_100msh> PORTB = 0; c8: 15 b8 out 0x05, r1 ; 5 aspetta_100ms(cicli_OFF); ca: 80 91 00 01 lds r24, 0x0100 ce: 0e 94 4b 00 call 0x96 ; 0x96 <_Z13aspetta_100msh> _delay_ms(100); } return 0; } int main(void) d2: f5 cf rjmp .-22 ; 0xbe <main+0x6> 000000d4 <_exit>: d4: f8 94 cli 000000d6 <__stop_program>: d6: ff cf rjmp .-2 ; 0xd6 <__stop_program> |
In the program file, it is written how much ROM memory space is used by each part of the program:
- "data", which corresponds to initialized global variables, is using 0x02 bytes. This corresponds to the initial values of cicli_ON (initialized to 5) and cicli_OFF (initialized to 25). Each of the 2 global variables uses 1 byte of ROM memory. These initial values are stored in the ROM memory, and then copied into RAM memory at microcontroller startup.
- "text", which corresponds to program instructions, is using 0xd8 bytes, which means a total size of 216 bytes. According to the ATmega328p datasheet, each instruction uses 16 bit (2 bytes, for most instruction), or 32 bit (4 bytes, for few instructions, such as "jump"). For this reason, the number of instruction is about half of the number of bytes inside "text" section.
As shown in the table above, which is inside ATmega328p microcontroller datasheet, for each interrupt, 2 program addresses (4 bytes in total, since 1 program address corresponds to 2 bytes = 16 bit size) are used. In fact, inside Assembly file, it can be noticed that flash memory bytes 0x00 - 0x67 (a total of 104 bytes) contain "jmp" (jump) instructions which are executed when an interrupt happens. The most important interrupt is the "reset" interrupt, which is located at byte 0x00. When the microcontroller program starts (reset event), the program starts to be executed from byte 0x00, and suddenly jumps to the instruction located at byte 0x68.
The following instructions have the purpose of initializing the SREG register (status register), which is located at RAM relative address 0x3F (inside IO memory, absolute address is 0x5F) and the stack pointer: RAM IO memory relative addresses 0x3D (SPL) and 0x3E (SPH) are initialized at the value of 0x08FF, which corresponds to the address of the last byte of RAM memory (internal RAM memory has a size of 2048 bytes, starting from absolute address 0x0100 and ending at address 0x08FF).
1 2 3 4 5 6 7 |
00000068 <__ctors_end>: 68: 11 24 eor r1, r1 6a: 1f be out 0x3f, r1 ; 63 6c: cf ef ldi r28, 0xFF ; 255 6e: d8 e0 ldi r29, 0x08 ; 8 70: de bf out 0x3e, r29 ; 62 72: cd bf out 0x3d, r28 ; 61 |
After initializing the SREG register and stack pointer registers SPL and SPH, the program does the following. First of all, the RAM memory addresses 0x0100 and 0x0101 are initialized with the values of cicli_ON=5 and cicli_OFF=25. These initialization values are saved into flash memory, at addresses 0xD8 and 0xD9, which means the last 2 bytes of the HEX file (which has a total of 218 bytes). After performing this initialization, main() function is called, by jumping to program instruction located at memory address 0xB8.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
00000074 <__do_copy_data>: 74: 11 e0 ldi r17, 0x01 ; 1 76: a0 e0 ldi r26, 0x00 ; 0 78: b1 e0 ldi r27, 0x01 ; 1 7a: e8 ed ldi r30, 0xD8 ; 216 7c: f0 e0 ldi r31, 0x00 ; 0 7e: 02 c0 rjmp .+4 ; 0x84 <__do_copy_data+0x10> 80: 05 90 lpm r0, Z+ 82: 0d 92 st X+, r0 84: a2 30 cpi r26, 0x02 ; 2 86: b1 07 cpc r27, r17 88: d9 f7 brne .-10 ; 0x80 <__do_copy_data+0xc> 8a: 0e 94 5c 00 call 0xb8 ; 0xb8 <main> 8e: 0c 94 6a 00 jmp 0xd4 ; 0xd4 <_exit> |
The main() function is composed by the following instructions. First of all, DDRB register (port B direction register) is set as output. After that, the program instruction loop between bytes 0xBE and 0xD3 continues to be executed, until microcontroller power is removed or reset button is pressed. Inside each loop cycle, the waiting function "aspetta_100ms()" is called 2 times, with different parameters: the first time (ON), the delay cycles number loaded is 5 (value stored in RAM address 0x0101); the second time (OFF), the delay cycles numbers loaded is 25 (values stored in RAM address 0x0100). This byte parameter passed to the function "aspetta_100ms()" is located on fast register "r24". The function is called with Assembly instruction "call", and the software execution jumps to ROM memory address 0x96.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
int main(void) { DDRB = 0xFF; // all output pins b8: 8f ef ldi r24, 0xFF ; 255 ba: 84 b9 out 0x04, r24 ; 4 /* Replace with your application code */ while (1) { PORTB = 1 << 5; bc: c0 e2 ldi r28, 0x20 ; 32 be: c5 b9 out 0x05, r28 ; 5 aspetta_100ms(cicli_ON); c0: 80 91 01 01 lds r24, 0x0101 c4: 0e 94 4b 00 call 0x96 ; 0x96 <_Z13aspetta_100msh> PORTB = 0; c8: 15 b8 out 0x05, r1 ; 5 aspetta_100ms(cicli_OFF); ca: 80 91 00 01 lds r24, 0x0100 ce: 0e 94 4b 00 call 0x96 ; 0x96 <_Z13aspetta_100msh> _delay_ms(100); } return 0; } int main(void) d2: f5 cf rjmp .-22 ; 0xbe <main+0x6> |
The function Assembly code is shown below. The function instructions start at ROM address 0x96. The 100ms waiting cycles are executed until "r25" address has the same value as "r24", which is the function input parameter. When this condition is respected, the function sets register "r24" to 0 (return 0) and the program flow comes back to main().
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
uint8_t aspetta_100ms(uint8_t n_cicli){ for (uint8_t i = 0; i<n_cicli; i++){ 96: 88 23 and r24, r24 98: 69 f0 breq .+26 ; 0xb4 <_Z13aspetta_100msh+0x1e> 9a: 90 e0 ldi r25, 0x00 ; 0 #else //round up by default __ticks_dc = (uint32_t)(ceil(fabs(__tmp))); #endif __builtin_avr_delay_cycles(__ticks_dc); 9c: 2f ef ldi r18, 0xFF ; 255 9e: 31 ee ldi r19, 0xE1 ; 225 a0: 44 e0 ldi r20, 0x04 ; 4 a2: 21 50 subi r18, 0x01 ; 1 a4: 30 40 sbci r19, 0x00 ; 0 a6: 40 40 sbci r20, 0x00 ; 0 a8: e1 f7 brne .-8 ; 0xa2 <_Z13aspetta_100msh+0xc> aa: 00 c0 rjmp .+0 ; 0xac <_Z13aspetta_100msh+0x16> ac: 00 00 nop ae: 9f 5f subi r25, 0xFF ; 255 b0: 98 13 cpse r25, r24 b2: f4 cf rjmp .-24 ; 0x9c <_Z13aspetta_100msh+0x6> _delay_ms(100); } return 0; } b4: 80 e0 ldi r24, 0x00 ; 0 b6: 08 95 ret |