I am trying to fix gcc so that after fdivd the destination register is stored on the stack, i.e.:
fdivd% f0,% f2,% f4; => becomes fdivd% f0,% f2,% f4; std% f4, [% fp + ...]
I generate rtl for divdf3 using the sequence (emit_insn, DONE) in define_expand (see below).
In the assembler output phase, I use define_insn and write out "fdivd \ t %% 1, %% 2, %% 0; std %% 0, %% 3" as an expression string.
The generated code looks fine. But:
My question is:
How can I mark a template so that it is not replaced with a delay slot ? How can I indicate that the output will be two instructions and a hint to the scheduler about this? Is the attribute (set_attr "length" "2") in define_insn divdf3_store (below) already enough?
- Greetings to Conrad
-------------- changed sparc.md ------------------------- ;;;;;;;;;;;;;;;;;; handle divdf3 ;;;;;;;;;;;;;;;; (define_expand "divdf3" [(parallel [(set (match_operand:DF 0 "register_operand" "=e") (div:DF (match_operand:DF 1 "register_operand" "e") (match_operand:DF 2 "register_operand" "e"))) (clobber (match_scratch:SI 3 ""))])] "TARGET_FPU" "{ output_divdf3_emit (operands[0], operands[1], operands[2], operands[3]); DONE; }") (define_insn "divdf3_store" [(set (match_operand:DF 0 "register_operand" "=e") (div:DF (match_operand:DF 1 "register_operand" "e") (match_operand:DF 2 "register_operand" "e"))) (clobber (match_operand:DF 3 "memory_operand" "" ))] "TARGET_FPU && TARGET_STORE_AFTER_DIVSQRT" { return output_divdf3 (operands[0], operands[1], operands[2], operands[3]); } [(set_attr "type" "fpdivd") (set_attr "fptype" "double") (set_attr "length" "2")]) (define_insn "divdf3_nostore" [(set (match_operand:DF 0 "register_operand" "=e") (div:DF (match_operand:DF 1 "register_operand" "e") (match_operand:DF 2 "register_operand" "e")))] "TARGET_FPU && (!TARGET_STORE_AFTER_DIVSQRT)" "fdivd\t%1, %2, %0" [(set_attr "type" "fpdivd") (set_attr "fptype" "double")]) -------------- changed sparc.c ------------------------- char * output_divdf3 (rtx op0, rtx op1, rtx dest, rtx scratch) { static char string[128]; if (debug_patch_divsqrt) { fprintf(stderr, "debug_patch_divsqrt:\n"); debug_rtx(op0); debug_rtx(op1); debug_rtx(dest); fprintf(stderr, "scratch: 0x%x\n",(int)scratch); } sprintf(string,"fdivd\t%%1, %%2, %%0; std %%0, %%3 !!!"); return string; } void output_divdf3_emit (rtx dest, rtx op0, rtx op1, rtx scratch) { rtx slot0, div, divsave; if (debug_patch_divsqrt) { fprintf(stderr, "output_divdf3_emit:\n"); debug_rtx(op0); debug_rtx(op1); debug_rtx(dest); fprintf(stderr, "scratch: 0x%x\n",(int)scratch); } div = gen_rtx_SET (VOIDmode, dest, gen_rtx_DIV (DFmode, op0, op1)); if (TARGET_STORE_AFTER_DIVSQRT) { slot0 = assign_stack_local (DFmode, 8, 8); divsave = gen_rtx_SET (VOIDmode, slot0, dest); emit_insn(divsave); emit_insn (gen_rtx_PARALLEL(VOIDmode, gen_rtvec (2, div, gen_rtx_CLOBBER (SImode, slot0)))); } else { emit_insn(div); } }