...

Text file src/runtime/memmove_loong64.s

Documentation: runtime

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "textflag.h"
     6
     7// See memmove Go doc for important implementation constraints.
     8
     9// Register map
    10//
    11// to		R4
    12// from		R5
    13// n(aka count)	R6
    14// to-end	R7
    15// from-end	R8
    16// data		R11-R18
    17// tmp		R9
    18
    19// Algorithm:
    20//
    21// Memory alignment check is only performed for copy size greater
    22// than 64 bytes to minimize overhead.
    23//
    24// when copy size <= 64 bytes, jump to label tail, according to the
    25// copy size to select the appropriate case and copy directly.
    26// Based on the common memory access instructions of loong64, the
    27// currently implemented cases are:
    28// move_0, move_1, move_2, move_3, move_4, move_5through7, move_8,
    29// move_9through16, move_17through32, move_33through64
    30//
    31// when copy size > 64 bytes, use the destination-aligned copying,
    32// adopt the following strategy to copy in 3 parts:
    33// 1. Head: do the memory alignment
    34// 2. Body: a 64-byte loop structure
    35// 3. Tail: processing of the remaining part (<= 64 bytes)
    36//
    37// forward:
    38//
    39//    Dst           NewDst                           Dstend
    40//     |               |<----count after correction---->|
    41//     |<-------------count before correction---------->|
    42//     |<--8-(Dst&7)-->|               |<---64 bytes--->|
    43//     +------------------------------------------------+
    44//     |   Head        |      Body     |      Tail      |
    45//     +---------------+---------------+----------------+
    46//    NewDst = Dst - (Dst & 7) + 8
    47//    count = count - 8 + (Dst & 7)
    48//    Src = Src - (Dst & 7) + 8
    49//
    50// backward:
    51//
    52//    Dst                             NewDstend          Dstend
    53//     |<-----count after correction------>|                |
    54//     |<------------count before correction--------------->|
    55//     |<---64 bytes--->|                  |<---Dstend&7--->|
    56//     +----------------------------------------------------+
    57//     |   Tail         |      Body        |      Head      |
    58//     +----------------+------------------+----------------+
    59//    NewDstend = Dstend - (Dstend & 7)
    60//    count = count - (Dstend & 7)
    61//    Srcend = Srcend - (Dstend & 7)
    62
    63// func memmove(to, from unsafe.Pointer, n uintptr)
    64TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
    65	BEQ	R4, R5, move_0
    66	BEQ	R6, move_0
    67
    68	ADDV	R4, R6, R7 // to-end pointer
    69	ADDV	R5, R6, R8 // from-end pointer
    70
    71tail:
    72	//copy size <= 64 bytes, copy directly, not check aligned
    73
    74	// < 2 bytes
    75	SGTU	$2, R6, R9
    76	BNE	R9, move_1
    77
    78	// < 3 bytes
    79	SGTU	$3, R6, R9
    80	BNE	R9, move_2
    81
    82	// < 4 bytes
    83	SGTU	$4, R6, R9
    84	BNE	R9, move_3
    85
    86	// < 5 bytes
    87	SGTU	$5, R6, R9
    88	BNE	R9, move_4
    89
    90	// >= 5 bytes and < 8 bytes
    91	SGTU	$8, R6, R9
    92	BNE	R9, move_5through7
    93
    94	// < 9 bytes
    95	SGTU	$9, R6, R9
    96	BNE	R9, move_8
    97
    98	// >= 9 bytes and < 17 bytes
    99	SGTU	$17, R6, R9
   100	BNE	R9, move_9through16
   101
   102	// >= 17 bytes and < 33 bytes
   103	SGTU	$33, R6, R9
   104	BNE	R9, move_17through32
   105
   106	// >= 33 bytes and < 65 bytes
   107	SGTU	$65, R6, R9
   108	BNE	R9, move_33through64
   109
   110	// if (dst > src) && (dst < src + count), regarded as memory
   111	// overlap, jump to backward
   112	// else, jump to forward
   113	BGEU	R5, R4, forward
   114	ADDV	R5, R6, R10
   115	BLTU	R4, R10, backward
   116
   117forward:
   118	AND	$7, R4, R9	// dst & 7
   119	BEQ	R9, body
   120head:
   121	MOVV	$8, R10
   122	SUBV	R9, R10		// head = 8 - (dst & 7)
   123	MOVB	(R5), R11
   124	SUBV	$1, R10
   125	ADDV	$1, R5
   126	MOVB	R11, (R4)
   127	ADDV	$1, R4
   128	BNE	R10, -5(PC)
   129	ADDV	R9, R6
   130	ADDV	$-8, R6		// newcount = count + (dst & 7) - 8
   131	// if newcount < 65 bytes, use move_33through64 to copy is enough
   132	SGTU	$65, R6, R9
   133	BNE	R9, move_33through64
   134
   135body:
   136	MOVV	(R5), R11
   137	MOVV	8(R5), R12
   138	MOVV	16(R5), R13
   139	MOVV	24(R5), R14
   140	MOVV	32(R5), R15
   141	MOVV	40(R5), R16
   142	MOVV	48(R5), R17
   143	MOVV	56(R5), R18
   144	MOVV	R11, (R4)
   145	MOVV	R12, 8(R4)
   146	MOVV	R13, 16(R4)
   147	MOVV	R14, 24(R4)
   148	MOVV	R15, 32(R4)
   149	MOVV	R16, 40(R4)
   150	MOVV	R17, 48(R4)
   151	MOVV	R18, 56(R4)
   152	ADDV	$-64, R6
   153	ADDV	$64, R4
   154	ADDV	$64, R5
   155	SGTU	$64, R6, R9
   156	// if the remaining part >= 64 bytes, jmp to body
   157	BEQ	R9, body
   158	// if the remaining part == 0 bytes, use move_0 to return
   159	BEQ	R6, move_0
   160	// if the remaining part in (0, 63] bytes, jmp to tail
   161	JMP	tail
   162
   163// The backward copy algorithm is the same as the forward copy,
   164// except for the direction.
   165backward:
   166	AND	$7, R7, R9	 // dstend & 7
   167	BEQ	R9, b_body
   168b_head:
   169	MOVV	-8(R8), R11
   170	SUBV	R9, R6		// newcount = count - (dstend & 7)
   171	SUBV	R9, R8		// newsrcend = srcend - (dstend & 7)
   172	MOVV	-8(R8), R12
   173 	MOVV	R11, -8(R7)
   174	SUBV	R9, R7		// newdstend = dstend - (dstend & 7)
   175 	MOVV	R12, -8(R7)
   176	SUBV	$8, R6
   177	SUBV	$8, R7
   178	SUBV	$8, R8
   179	SGTU    $65, R6, R9
   180	BNE     R9, move_33through64
   181
   182b_body:
   183	MOVV	-8(R8), R11
   184	MOVV	-16(R8), R12
   185	MOVV	-24(R8), R13
   186	MOVV	-32(R8), R14
   187	MOVV	-40(R8), R15
   188	MOVV	-48(R8), R16
   189	MOVV	-56(R8), R17
   190	MOVV	-64(R8), R18
   191	MOVV	R11, -8(R7)
   192	MOVV	R12, -16(R7)
   193	MOVV	R13, -24(R7)
   194	MOVV	R14, -32(R7)
   195	MOVV	R15, -40(R7)
   196	MOVV	R16, -48(R7)
   197	MOVV	R17, -56(R7)
   198	MOVV	R18, -64(R7)
   199	ADDV	$-64, R6
   200	ADDV	$-64, R7
   201	ADDV	$-64, R8
   202	SGTU	$64, R6, R9
   203	BEQ	R9, b_body
   204	BEQ	R6, move_0
   205	JMP	tail
   206
   207move_0:
   208	RET
   209
   210move_1:
   211	MOVB	(R5), R11
   212	MOVB	R11, (R4)
   213	RET
   214move_2:
   215	MOVH	(R5), R11
   216	MOVH	R11, (R4)
   217	RET
   218move_3:
   219	MOVH	(R5), R11
   220	MOVB	-1(R8), R12
   221	MOVH	R11, (R4)
   222	MOVB	R12, -1(R7)
   223	RET
   224move_4:
   225	MOVW	(R5), R11
   226	MOVW	R11, (R4)
   227	RET
   228move_5through7:
   229	MOVW	(R5), R11
   230	MOVW	-4(R8), R12
   231	MOVW	R11, (R4)
   232	MOVW	R12, -4(R7)
   233	RET
   234move_8:
   235	MOVV	(R5), R11
   236	MOVV	R11, (R4)
   237	RET
   238move_9through16:
   239	MOVV	(R5), R11
   240	MOVV	-8(R8), R12
   241	MOVV	R11, (R4)
   242	MOVV	R12, -8(R7)
   243	RET
   244move_17through32:
   245	MOVV	(R5), R11
   246	MOVV	8(R5), R12
   247	MOVV	-16(R8), R13
   248	MOVV	-8(R8), R14
   249	MOVV	R11, (R4)
   250	MOVV	R12, 8(R4)
   251	MOVV	R13, -16(R7)
   252	MOVV	R14, -8(R7)
   253	RET
   254move_33through64:
   255	MOVV	(R5), R11
   256	MOVV	8(R5), R12
   257	MOVV	16(R5), R13
   258	MOVV	24(R5), R14
   259	MOVV	-32(R8), R15
   260	MOVV	-24(R8), R16
   261	MOVV	-16(R8), R17
   262	MOVV	-8(R8), R18
   263	MOVV	R11, (R4)
   264	MOVV	R12, 8(R4)
   265	MOVV	R13, 16(R4)
   266	MOVV	R14, 24(R4)
   267	MOVV	R15, -32(R7)
   268	MOVV	R16, -24(R7)
   269	MOVV	R17, -16(R7)
   270	MOVV	R18, -8(R7)
   271	RET

View as plain text