Load/Store Instructions

The Nios II architecture is a load-store architecture. Load and store instructions handle all data movement between registers, memory, and peripherals. Memories and peripherals share a common address space. Some Nios II processor cores use memory caching and/or write buffering to improve memory bandwidth. The architecture provides instructions for both cached and uncached accesses.

Wide Data Transfer Instructions

The following table describes the wide (32-bit) load and store instructions. Data transfers for I/O peripherals should use ldwio and stwio.

Instruction Description Cache
ldw
stw
transfer 32-bit data words from/to memory. yes
ldwio
stwio
transfer 32-bit data words from/to peripherals.no

The effective address is the sum of a register's contents and a signed immediate value contained in the instruction.

Ordinary memory transfers can be cached or buffered to improve program performance. This caching and buffering might cause memory cycles to occur out of order, and caching might suppress some cycles entirely.

Access cycles for ldwio and stwio instructions are guaranteed to occur in instruction order and are never suppressed.

Narrow Data Transfer Instructions

Memory accesses can be cached or buffered to improve performance. To transfer data to I/O peripherals, use the “io” versions of the instructions, described below.

LoadStoreDescriptionBit Extension
on Load
ldbstbsigned byte transfer.sign
ldhsthsigned halfword transfer. sign
ldbu
unsigned byte transfer.zero
ldhu
unsigned halfword transfer.zero

The following operations load/store byte and half-word data from/to peripherals without caching or buffering.

LoadStoreDescriptionBit Extension
on Load
ldbiostbiosigned byte transfer.sign
ldhiosthiosigned halfword transfer. sign
ldbuio
unsigned byte transfer.zero
ldhuio
unsigned halfword transfer.zero

C Source

Download: mem3.zip


01: #include <stdio.h>
02: 
03: char *phrase = "The quick brown fox jumps over the lazy dog";
04: 
05: signed char peppers[] = {'p','e','p','p','e','r','s',-5,0};
06: 
07: unsigned short bins[96];
08: 
09: double v[4] = {1.0, 2.0, 3.0, 4.0};
10: 
11: int strlen(char *buf);
12: void do_hist(char *str, int len, short bins[96]);
13: void show_bins(short bins[96]);
14: double cum_sum(double v[], int len);
15: 
16: int main()
17: {
18:         int i, nv, ns;
19:         printf("peppers = %x\n",(unsigned int) peppers);
20:         nv = strlen(peppers);
21:         printf("string length %d\n",nv);
22:         ns = 0;
23:         for (i=0; i<nv; i++) {
24:                 if (peppers[i]<0) ns++;
25:         }
26:         printf("negative bytes: %d\n",ns);
27:         printf("phrase = %x\n",(unsigned int) phrase);
28:         nv = strlen(phrase);
29:         printf("bins = %x\n",(unsigned int) bins);
30:         do_hist(phrase,nv,bins);
31:         show_bins(bins);
32:         printf("v = %x\n",(unsigned int) v);
33:         nv = sizeof(v) / sizeof(v[0]);
34:         printf("v length %d\n",nv);
35:         cum_sum(v,nv);
36:     /* small_library can not print doubles
37:         for (i=0; i<nv; i++) {
38:                 printf("%g\n",v[i]);
39:         }
40:     */
41:         return 0;
42: }
43: 
44: int strlen(char *str)
45: {
46:         int count = 0;
47:         while (*str++) count++;
48:         return count;
49: }
50: 
51: void show_bins(short bins[96])
52: {
53:         int i, n;
54:         unsigned char ch;
55:         unsigned char str[60];
56:         unsigned char *cp;
57:         cp = str;
58:         for (i=1; i<60; i++) {
59:                 *cp = 0;
60:                 ch = i+32;
61:                 n = bins[i];
62:                 if (n==0) continue;
63:                 *cp++ = ch;
64:                 //printf("%c %4d\n",ch,bins[i]);
65:         }
66:         printf("characters present:\n%s\n",str);
67:         n = strlen(str);
68:         printf("number of characters: %d\n",n);
69: }
70: 
71: void do_hist(char *str, int len, short bins[96])
72: {
73:         int i, n;
74:         for (i=0; i<96; i++) bins[i] = 0;
75:         for (i=0; i<len; i++) {
76:                 n = str[i]&0x7F;
77:                 if (n==0) break;
78:                 if (n>96) n -= 32;
79:                 if (n>32) n -= 32;
80:                 else n = 0;
81:                 bins[n]++;
82:         }
83: }
84: 
85: double cum_sum(double v[], int len)
86: {
87:         int i;
88:         double sum = v[0];
89:         for (i=1; i<len; i++) {
90:                 sum += v[i];
91:                 v[i] = sum;
92:         }
93:         return sum;
94: }


Results

peppers = 9860
string length 8
negative bytes: 1
phrase = 96fc
bins = 98fc
characters present:
ABCDEFGHIJKLMNOPQRSTUVWXYZ
number of characters: 26
v = 986c
v length 4

nameaddresssectionremarks
peppers0x9860.rwdata + 0data memory
phrase0x96fc.rodata + 0progam memory (read-only)
bins0x98fc.bss + 4 memory set to zero
v0x986c.rwdata + 12data memory

The symbol phrase is assigned to location 0x000098e0, in section .rwdata. The contents of that address are iniitialized to 0x000096fc.

Load/Store Instances

The disassembly listing below contains instances of the various load/store instructions:

ldw   stw   ldb   ldbu   stb   ldh   ldhu   sth  

Disassembly

0000806c <main>:
void show_bins(short bins[96]);
double cum_sum(double v[], int len);

int main()
{
    806c:    defffb04     addi    sp,sp,-20
    8070:    dfc00415     stw    ra,16(sp)
    8074:    df000315     stw    fp,12(sp)
    8078:    d839883a     mov    fp,sp
    int i, nv, ns;
    printf("peppers = %x\n",(unsigned int) peppers);
    807c:    01000074     movhi    r4,1
    8080:    2125ca04     addi    r4,r4,-26840
    8084:    01400074     movhi    r5,1
    8088:    29661804     addi    r5,r5,-26528
    808c:    0008d4c0     call    8d4c <printf>
    nv = strlen(peppers);
    8090:    01000074     movhi    r4,1
    8094:    21261804     addi    r4,r4,-26528
    8098:    00081ac0     call    81ac <strlen>
    809c:    e0800115     stw    r2,4(fp)
    printf("string length %d\n",nv);
    80a0:    01000074     movhi    r4,1
    80a4:    2125ce04     addi    r4,r4,-26824
    80a8:    e1400117     ldw    r5,4(fp)
    80ac:    0008d4c0     call    8d4c <printf>
    ns = 0;
    80b0:    e0000215     stw    zero,8(fp)
    for (i=0; i<nv; i++) {
    80b4:    e0000015     stw    zero,0(fp)
    80b8:    e0c00017     ldw    r3,0(fp)
    80bc:    e0800117     ldw    r2,4(fp)
    80c0:    18800e0e     bge    r3,r2,80fc <main+0x90>
        if (peppers[i]<0) ns++;
    80c4:    00c00074     movhi    r3,1
    80c8:    18e61804     addi    r3,r3,-26528
    80cc:    e0800017     ldw    r2,0(fp)
    80d0:    1885883a     add    r2,r3,r2
    80d4:    10800007     ldb    r2,0(r2)
    80d8:    1004403a     cmpge    r2,r2,zero
    80dc:    1000031e     bne    r2,zero,80ec <main+0x80>
    80e0:    e0800217     ldw    r2,8(fp)
    80e4:    10800044     addi    r2,r2,1
    80e8:    e0800215     stw    r2,8(fp)
    80ec:    e0800017     ldw    r2,0(fp)
    80f0:    10800044     addi    r2,r2,1
    80f4:    e0800015     stw    r2,0(fp)
    80f8:    003fef06     br    80b8 <main+0x4c>
    }
    printf("negative bytes: %d\n",ns);
    80fc:    01000074     movhi    r4,1
    8100:    2125d304     addi    r4,r4,-26804
    8104:    e1400217     ldw    r5,8(fp)
    8108:    0008d4c0     call    8d4c <printf>
    printf("phrase = %x\n",(unsigned int) phrase);
    810c:    01000074     movhi    r4,1
    8110:    2125d804     addi    r4,r4,-26784
    8114:    d1600017     ldw    r5,-32768(gp)
    8118:    0008d4c0     call    8d4c <printf>
    nv = strlen(phrase);
    811c:    d1200017     ldw    r4,-32768(gp)
    8120:    00081ac0     call    81ac <strlen>
    8124:    e0800115     stw    r2,4(fp)
    printf("bins = %x\n",(unsigned int) bins);
    8128:    01000074     movhi    r4,1
    812c:    2125dc04     addi    r4,r4,-26768
    8130:    01400074     movhi    r5,1
    8134:    29663f04     addi    r5,r5,-26372
    8138:    0008d4c0     call    8d4c <printf>
    do_hist(phrase,nv,bins);
    813c:    d1200017     ldw    r4,-32768(gp)
    8140:    e1400117     ldw    r5,4(fp)
    8144:    01800074     movhi    r6,1
    8148:    31a63f04     addi    r6,r6,-26372
    814c:    00082dc0     call    82dc <do_hist>
    show_bins(bins);
    8150:    01000074     movhi    r4,1
    8154:    21263f04     addi    r4,r4,-26372
    8158:    00082040     call    8204 <show_bins>
    printf("v = %x\n",(unsigned int) v);
    815c:    01000074     movhi    r4,1
    8160:    2125df04     addi    r4,r4,-26756
    8164:    01400074     movhi    r5,1
    8168:    29661b04     addi    r5,r5,-26516
    816c:    0008d4c0     call    8d4c <printf>
    nv = sizeof(v) / sizeof(v[0]);
    8170:    00800104     movi    r2,4
    8174:    e0800115     stw    r2,4(fp)
    printf("v length %d\n",nv);
    8178:    01000074     movhi    r4,1
    817c:    2125e104     addi    r4,r4,-26748
    8180:    e1400117     ldw    r5,4(fp)
    8184:    0008d4c0     call    8d4c <printf>
    cum_sum(v,nv);
    8188:    01000074     movhi    r4,1
    818c:    21261b04     addi    r4,r4,-26516
    8190:    e1400117     ldw    r5,4(fp)
    8194:    00083ec0     call    83ec <cum_sum>
    /* small_library can not print doubles
    for (i=0; i<nv; i++) {
        printf("%g\n",v[i]);
    }
    */
    return 0;
    8198:    0005883a     mov    r2,zero
}
    819c:    dfc00417     ldw    ra,16(sp)
    81a0:    df000317     ldw    fp,12(sp)
    81a4:    dec00504     addi    sp,sp,20
    81a8:    f800283a     ret

000081ac <strlen>:

int strlen(char *str)
{
    81ac:    defffd04     addi    sp,sp,-12
    81b0:    df000215     stw    fp,8(sp)
    81b4:    d839883a     mov    fp,sp
    81b8:    e1000015     stw    r4,0(fp)
    int count = 0;
    81bc:    e0000115     stw    zero,4(fp)
    while (*str++) count++;
    81c0:    e0800017     ldw    r2,0(fp)
    81c4:    10c00003     ldbu    r3,0(r2)
    81c8:    10800044     addi    r2,r2,1
    81cc:    e0800015     stw    r2,0(fp)
    81d0:    18803fcc     andi    r2,r3,255
    81d4:    1080201c     xori    r2,r2,128
    81d8:    10bfe004     addi    r2,r2,-128
    81dc:    1005003a     cmpeq    r2,r2,zero
    81e0:    1000041e     bne    r2,zero,81f4 <strlen+0x48>
    81e4:    e0800117     ldw    r2,4(fp)
    81e8:    10800044     addi    r2,r2,1
    81ec:    e0800115     stw    r2,4(fp)
    81f0:    003ff306     br    81c0 <strlen+0x14>
    return count;
    81f4:    e0800117     ldw    r2,4(fp)
}
    81f8:    df000217     ldw    fp,8(sp)
    81fc:    dec00304     addi    sp,sp,12
    8200:    f800283a     ret

00008204 <show_bins>:

void show_bins(short bins[96])
{
    8204:    deffea04     addi    sp,sp,-88
    8208:    dfc01515     stw    ra,84(sp)
    820c:    df001415     stw    fp,80(sp)
    8210:    d839883a     mov    fp,sp
    8214:    e1000015     stw    r4,0(fp)
    int i, n;
    unsigned char ch;
    unsigned char str[60];
    unsigned char *cp;
    cp = str;
    8218:    e0800404     addi    r2,fp,16
    821c:    e0801315     stw    r2,76(fp)
    for (i=1; i<60; i++) {
    8220:    00800044     movi    r2,1
    8224:    e0800115     stw    r2,4(fp)
    8228:    e0800117     ldw    r2,4(fp)
    822c:    10800f08     cmpgei    r2,r2,60
    8230:    10001b1e     bne    r2,zero,82a0 <show_bins+0x9c>
        *cp = 0;
    8234:    e0801317     ldw    r2,76(fp)
    8238:    10000005     stb    zero,0(r2)
        ch = i+32;
    823c:    e0800117     ldw    r2,4(fp)
    8240:    10800804     addi    r2,r2,32
    8244:    e0800305     stb    r2,12(fp)
        n = bins[i];
    8248:    e0c00117     ldw    r3,4(fp)
    824c:    1805883a     mov    r2,r3
    8250:    10c7883a     add    r3,r2,r3
    8254:    e0800017     ldw    r2,0(fp)
    8258:    1885883a     add    r2,r3,r2
    825c:    1080000f     ldh    r2,0(r2)
    8260:    e0800215     stw    r2,8(fp)
        if (n==0) continue;
    8264:    e0800217     ldw    r2,8(fp)
    8268:    1004c03a     cmpne    r2,r2,zero
    826c:    1000011e     bne    r2,zero,8274 <show_bins+0x70>
    8270:    00000706     br    8290 <show_bins+0x8c>
        *cp++ = ch;
    8274:    e1401304     addi    r5,fp,76
    8278:    28800017     ldw    r2,0(r5)
    827c:    1009883a     mov    r4,r2
    8280:    e0c00303     ldbu    r3,12(fp)
    8284:    20c00005     stb    r3,0(r4)
    8288:    10800044     addi    r2,r2,1
    828c:    28800015     stw    r2,0(r5)
    8290:    e0800117     ldw    r2,4(fp)
    8294:    10800044     addi    r2,r2,1
    8298:    e0800115     stw    r2,4(fp)
    829c:    003fe206     br    8228 <show_bins+0x24>
        //printf("%c %4d\n",ch,bins[i]);
    }
    printf("characters present:\n%s\n",str);
    82a0:    e1400404     addi    r5,fp,16
    82a4:    01000074     movhi    r4,1
    82a8:    2125e504     addi    r4,r4,-26732
    82ac:    0008d4c0     call    8d4c <printf>
    n = strlen(str);
    82b0:    e1000404     addi    r4,fp,16
    82b4:    00081ac0     call    81ac <strlen>
    82b8:    e0800215     stw    r2,8(fp)
    printf("number of characters: %d\n",n);
    82bc:    01000074     movhi    r4,1
    82c0:    2125eb04     addi    r4,r4,-26708
    82c4:    e1400217     ldw    r5,8(fp)
    82c8:    0008d4c0     call    8d4c <printf>
}
    82cc:    dfc01517     ldw    ra,84(sp)
    82d0:    df001417     ldw    fp,80(sp)
    82d4:    dec01604     addi    sp,sp,88
    82d8:    f800283a     ret

000082dc <do_hist>:

void do_hist(char *str, int len, short bins[96])
{
    82dc:    defffa04     addi    sp,sp,-24
    82e0:    df000515     stw    fp,20(sp)
    82e4:    d839883a     mov    fp,sp
    82e8:    e1000015     stw    r4,0(fp)
    82ec:    e1400115     stw    r5,4(fp)
    82f0:    e1800215     stw    r6,8(fp)
    int i, n;
    for (i=0; i<96; i++) bins[i] = 0;
    82f4:    e0000315     stw    zero,12(fp)
    82f8:    e0800317     ldw    r2,12(fp)
    82fc:    10801808     cmpgei    r2,r2,96
    8300:    10000a1e     bne    r2,zero,832c <do_hist+0x50>
    8304:    e0c00317     ldw    r3,12(fp)
    8308:    1805883a     mov    r2,r3
    830c:    10c7883a     add    r3,r2,r3
    8310:    e0800217     ldw    r2,8(fp)
    8314:    1885883a     add    r2,r3,r2
    8318:    1000000d     sth    zero,0(r2)
    831c:    e0800317     ldw    r2,12(fp)
    8320:    10800044     addi    r2,r2,1
    8324:    e0800315     stw    r2,12(fp)
    8328:    003ff306     br    82f8 <do_hist+0x1c>
    for (i=0; i<len; i++) {
    832c:    e0000315     stw    zero,12(fp)
    8330:    e0c00317     ldw    r3,12(fp)
    8334:    e0800117     ldw    r2,4(fp)
    8338:    1880290e     bge    r3,r2,83e0 <do_hist+0x104>
        n = str[i]&0x7F;
    833c:    e0c00017     ldw    r3,0(fp)
    8340:    e0800317     ldw    r2,12(fp)
    8344:    1885883a     add    r2,r3,r2
    8348:    10800003     ldbu    r2,0(r2)
    834c:    10801fcc     andi    r2,r2,127
    8350:    e0800415     stw    r2,16(fp)
        if (n==0) break;
    8354:    e0800417     ldw    r2,16(fp)
    8358:    1004c03a     cmpne    r2,r2,zero
    835c:    1000011e     bne    r2,zero,8364 <do_hist+0x88>
    8360:    00001f06     br    83e0 <do_hist+0x104>
        if (n>96) n -= 32;
    8364:    e0800417     ldw    r2,16(fp)
    8368:    10801850     cmplti    r2,r2,97
    836c:    1000031e     bne    r2,zero,837c <do_hist+0xa0>
    8370:    e0800417     ldw    r2,16(fp)
    8374:    10bff804     addi    r2,r2,-32
    8378:    e0800415     stw    r2,16(fp)
        if (n>32) n -= 32;
    837c:    e0800417     ldw    r2,16(fp)
    8380:    10800850     cmplti    r2,r2,33
    8384:    1000041e     bne    r2,zero,8398 <do_hist+0xbc>
    8388:    e0800417     ldw    r2,16(fp)
    838c:    10bff804     addi    r2,r2,-32
    8390:    e0800415     stw    r2,16(fp)
    8394:    00000106     br    839c <do_hist+0xc0>
        else n = 0;
    8398:    e0000415     stw    zero,16(fp)
        bins[n]++;
    839c:    e0c00417     ldw    r3,16(fp)
    83a0:    1805883a     mov    r2,r3
    83a4:    10c7883a     add    r3,r2,r3
    83a8:    e0800217     ldw    r2,8(fp)
    83ac:    1889883a     add    r4,r3,r2
    83b0:    e0c00417     ldw    r3,16(fp)
    83b4:    1805883a     mov    r2,r3
    83b8:    10c7883a     add    r3,r2,r3
    83bc:    e0800217     ldw    r2,8(fp)
    83c0:    1885883a     add    r2,r3,r2
    83c4:    1080000b     ldhu    r2,0(r2)
    83c8:    10800044     addi    r2,r2,1
    83cc:    2080000d     sth    r2,0(r4)
    83d0:    e0800317     ldw    r2,12(fp)
    83d4:    10800044     addi    r2,r2,1
    83d8:    e0800315     stw    r2,12(fp)
    83dc:    003fd406     br    8330 <do_hist+0x54>
    }
}
    83e0:    df000517     ldw    fp,20(sp)
    83e4:    dec00604     addi    sp,sp,24
    83e8:    f800283a     ret

000083ec <cum_sum>:

double cum_sum(double v[], int len)
{
    83ec:    defff904     addi    sp,sp,-28
    83f0:    dfc00615     stw    ra,24(sp)
    83f4:    df000515     stw    fp,20(sp)
    83f8:    d839883a     mov    fp,sp
    83fc:    e1000015     stw    r4,0(fp)
    8400:    e1400115     stw    r5,4(fp)
    int i;
    double sum = v[0];
    8404:    e0c00017     ldw    r3,0(fp)
    8408:    18800017     ldw    r2,0(r3)
    840c:    e0800315     stw    r2,12(fp)
    8410:    18800117     ldw    r2,4(r3)
    8414:    e0800415     stw    r2,16(fp)
    for (i=1; i<len; i++) {
    8418:    00800044     movi    r2,1
    841c:    e0800215     stw    r2,8(fp)
    8420:    e0c00217     ldw    r3,8(fp)
    8424:    e0800117     ldw    r2,4(fp)
    8428:    1880190e     bge    r3,r2,8490 <cum_sum+0xa4>
        sum += v[i];
    842c:    e0800217     ldw    r2,8(fp)
    8430:    10c00224     muli    r3,r2,8
    8434:    e0800017     ldw    r2,0(fp)
    8438:    1885883a     add    r2,r3,r2
    843c:    e1000317     ldw    r4,12(fp)
    8440:    e1400417     ldw    r5,16(fp)
    8444:    11800017     ldw    r6,0(r2)
    8448:    11c00117     ldw    r7,4(r2)
    844c:    00087e80     call    87e8 <__adddf3>
    8450:    1009883a     mov    r4,r2
    8454:    180b883a     mov    r5,r3
    8458:    e1000315     stw    r4,12(fp)
    845c:    e1400415     stw    r5,16(fp)
        v[i] = sum;
    8460:    e0800217     ldw    r2,8(fp)
    8464:    10c00224     muli    r3,r2,8
    8468:    e0800017     ldw    r2,0(fp)
    846c:    1887883a     add    r3,r3,r2
    8470:    e0800317     ldw    r2,12(fp)
    8474:    18800015     stw    r2,0(r3)
    8478:    e0800417     ldw    r2,16(fp)
    847c:    18800115     stw    r2,4(r3)
    8480:    e0800217     ldw    r2,8(fp)
    8484:    10800044     addi    r2,r2,1
    8488:    e0800215     stw    r2,8(fp)
    848c:    003fe406     br    8420 <cum_sum+0x34>
    }
    return sum;
    8490:    e0800317     ldw    r2,12(fp)
    8494:    e0c00417     ldw    r3,16(fp)
    8498:    1009883a     mov    r4,r2
    849c:    180b883a     mov    r5,r3
}
    84a0:    2005883a     mov    r2,r4
    84a4:    2807883a     mov    r3,r5
    84a8:    dfc00617     ldw    ra,24(sp)
    84ac:    df000517     ldw    fp,20(sp)
    84b0:    dec00704     addi    sp,sp,28
    84b4:    f800283a     ret


Maintained by John Loomis, updated Sat Sep 27 16:07:16 2008