1

A C loop is

  while( *from)
  {
    *to++ = *from++;
  }

I think I basically want to know which MIPS instructions the *to++ = *from++; translates to. My result is that the instructions are 14:

$L2:
   lw    $2,12($fp)
   lb    $3,0($2)
   bne    $3,$0,$L4
   j    $L3
$L4:
   lw    $2,8($fp)
   addu    $3,$fp,12
   lw    $4,0($3)
   lbu    $5,0($4)
   sb    $5,0($2)
   addu    $4,$4,1
   sw    $4,0($3)
   addu    $2,$2,1
   sw    $2,8($fp)
   j    $L2

I came to this conclusion from looking at the complete C program:

/* strcpy.c */

#include <stdio.h>
#include <idt_entrypt.h>

/* C stringcopy */

static void str_cpy( char *to, const char *from)
{
  while( *from)
  {
    *to++ = *from++;
  }
  *to = '\0';
}

int main()
{
  static char* hello = "Hello World!";
  static char to[4711] = "blaha blaj blurk bletch";
  int Time;

  printf("Strangen hello ser ut sa har: %s\n", hello);
  flush_cache();          /* toem cache-minnet */
  timer_start();          /* nollstall tidmatning */

  str_cpy( to, hello);

  Time = timer_stop();            /* las av tiden */
  printf("Time to copy: %d\n",Time);
  printf("Och kopian sa har: %s\n", to);
}

Compiling it to MIPS assembly turns out this:

    .file   1 "strcpy.c"

 # -G value = 8, Cpu = 3000, ISA = 1
 # GNU C version cygnus-2.7.2-970404 (mips-mips-ecoff) compiled by GNU C version cygnus-2.7.2-970404.
 # options passed:  -msoft-float
 # options enabled:  -fpeephole -ffunction-cse -fkeep-static-consts
 # -fpcc-struct-return -fcommon -fverbose-asm -fgnu-linker -msoft-float
 # -meb -mcpu=3000

gcc2_compiled.:
__gnu_compiled_c:
    .text
    .align  2
    .ent    str_cpy
str_cpy:
    .frame  $fp,8,$31       # vars= 0, regs= 1/0, args= 0, extra= 0
    .mask   0x40000000,-8
    .fmask  0x00000000,0
    subu    $sp,$sp,8
    sw  $fp,0($sp)
    move    $fp,$sp
    sw  $4,8($fp)
    sw  $5,12($fp)
$L2:
    lw  $2,12($fp)
    lb  $3,0($2)
    bne $3,$0,$L4
    j   $L3
$L4:
    lw  $2,8($fp)
    addu    $3,$fp,12
    lw  $4,0($3)
    lbu $5,0($4)
    sb  $5,0($2)
    addu    $4,$4,1
    sw  $4,0($3)
    addu    $2,$2,1
    sw  $2,8($fp)
    j   $L2
$L3:
    lw  $2,8($fp)
    sb  $0,0($2)
$L1:
    move    $sp,$fp         # sp not trusted here
    lw  $fp,0($sp)
    addu    $sp,$sp,8
    j   $31
    .end    str_cpy
    .rdata
    .align  2
$LC0:
    .ascii  "Hello World!\000"
    .sdata
    .align  2
hello.4:
    .word   $LC0
    .data
    .align  2
to.5:
    .ascii  "blaha blaj blurk bletch\000"
    .space  4687
    .rdata
    .align  2
$LC1:
    .ascii  "Strangen hello ser ut sa har: %s\n\000"
    .align  2
$LC2:
    .ascii  "Time to copy: %d\n\000"
    .align  2
$LC3:
    .ascii  "Och kopian sa har: %s\n\000"
    .text
    .align  2
    .globl  main
    .ent    main
main:
    .frame  $fp,32,$31      # vars= 8, regs= 2/0, args= 16, extra= 0
    .mask   0xc0000000,-4
    .fmask  0x00000000,0
    subu    $sp,$sp,32
    sw  $31,28($sp)
    sw  $fp,24($sp)
    move    $fp,$sp
    jal __main
    la  $4,$LC1
    lw  $5,hello.4
    jal printf
    jal flush_cache
    jal timer_start
    la  $4,to.5
    lw  $5,hello.4
    jal str_cpy
    jal timer_stop
    sw  $2,16($fp)
    la  $4,$LC2
    lw  $5,16($fp)
    jal printf
    la  $4,$LC3
    la  $5,to.5
    jal printf
$L5:
    move    $sp,$fp         # sp not trusted here
    lw  $31,28($sp)
    lw  $fp,24($sp)
    addu    $sp,$sp,32
    j   $31
    .end    main

So I analyzed the above and found that the number of instructions performed in one cycle of the while loop is 14. Is my reasoning correct?

4

4 に答える 4

2
$L2:
 lw    $2,12($fp)  ;  12($fp) is 'from' - load it in to $2
 lb    $3,0($2)    ; read a byte
 bne    $3,$0,$L4  ; if it's non-zero, jump into the main loop
 j    $L3          ; otherwise exit (this is the while clause)
$L4:
 lw    $2,8($fp)   ; 8($fp) is 'to' - load it into $2
 addu    $3,$fp,12 ; Load the address of 'from' into $3
 lw    $4,0($3)    ; Load 'from' into $4
 lbu    $5,0($4)   ; Read the byte again (this is the = *from)
 sb    $5,0($2)    ; Store the byte (*to = )
 addu    $4,$4,1   ; increment from (from++)
 sw    $4,0($3)    ; store it back
 addu    $2,$2,1   ; increment to (to++)
 sw    $2,8($fp)   ; store it back
 j    $L2          ; do it all again

So 13 ops in the loop, as the j $L3 is skipped.

However, as markgz points out, MIPS has branch delay slots, which may require the compiler or assembler to add nops or switch instructions. You should look at the disassembly of the final code, as well as the intermediate assembler output.

It's probable in this case that there will at least be an extra nop after the initial bne instruction, but the assembler may re-order the final jump rather than pad with a nop. So 14 instructions may well be the total if you look at the final output.

There's a lot of redundancy in there - half the instructions are just loading/storing back to local variables, rather than just keeping stuff in registers. This is typical of a debug/unoptimised build.

于 2012-12-11T13:30:59.897 に答える
1

Looks correct.

There seem to be lots of redundant loads & stores - is optimisation turned off completely?

于 2012-12-11T13:21:48.570 に答える
1

Actually there are only 13 operation in the execution of that while loop (the operation j $L3 is only executed when the while ends).

于 2012-12-11T13:21:51.250 に答える
1

You are including the test and conditional jump instructions in your counting, which doesn't seem appropriate to me.

Already, you have one branching that is too much in your code. Try

  while ((*to++ = *from++));

My compiler (gcc for x86) produces better code with that only has one conditional jump. On that architecture (which seems to have nicer addressing modes) that one compiles to

    xorl    %eax, %eax
.L8:
    movzbl  (%rsi,%rax), %edx
    movb    %dl, (%rdi,%rax)
    addq    $1, %rax
    testb   %dl, %dl
    jne .L8
    ret

So here the inner part is only three instructions plus one initialization, since the increment is only done once inside the loop and not twice. Generally you have to be careful when asking questions like this, there is not really a cost to (*to++ = *from++) by itself, but only as embedded into the surrounding code.

于 2012-12-11T13:36:55.783 に答える