...

Text file src/math/big/arith_riscv64.s

Documentation: math/big

     1// Copyright 2020 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !math_big_pure_go && riscv64
     6
     7#include "textflag.h"
     8
     9// This file provides fast assembly versions for the elementary
    10// arithmetic operations on vectors implemented in arith.go.
    11
    12TEXT ·addVV(SB),NOSPLIT,$0
    13	MOV	x+24(FP), X5
    14	MOV	y+48(FP), X6
    15	MOV	z+0(FP), X7
    16	MOV	z_len+8(FP), X30
    17
    18	MOV	$4, X28
    19	MOV	$0, X29		// c = 0
    20
    21	BEQZ	X30, done
    22	BLTU	X30, X28, loop1
    23
    24loop4:
    25	MOV	0(X5), X8	// x[0]
    26	MOV	0(X6), X9	// y[0]
    27	MOV	8(X5), X11	// x[1]
    28	MOV	8(X6), X12	// y[1]
    29	MOV	16(X5), X14	// x[2]
    30	MOV	16(X6), X15	// y[2]
    31	MOV	24(X5), X17	// x[3]
    32	MOV	24(X6), X18	// y[3]
    33
    34	ADD	X8, X9, X21	// z[0] = x[0] + y[0]
    35	SLTU	X8, X21, X22
    36	ADD	X21, X29, X10	// z[0] = x[0] + y[0] + c
    37	SLTU	X21, X10, X23
    38	ADD	X22, X23, X29	// next c
    39
    40	ADD	X11, X12, X24	// z[1] = x[1] + y[1]
    41	SLTU	X11, X24, X25
    42	ADD	X24, X29, X13	// z[1] = x[1] + y[1] + c
    43	SLTU	X24, X13, X26
    44	ADD	X25, X26, X29	// next c
    45
    46	ADD	X14, X15, X21	// z[2] = x[2] + y[2]
    47	SLTU	X14, X21, X22
    48	ADD	X21, X29, X16	// z[2] = x[2] + y[2] + c
    49	SLTU	X21, X16, X23
    50	ADD	X22, X23, X29	// next c
    51
    52	ADD	X17, X18, X21	// z[3] = x[3] + y[3]
    53	SLTU	X17, X21, X22
    54	ADD	X21, X29, X19	// z[3] = x[3] + y[3] + c
    55	SLTU	X21, X19, X23
    56	ADD	X22, X23, X29	// next c
    57
    58	MOV	X10, 0(X7)	// z[0]
    59	MOV	X13, 8(X7)	// z[1]
    60	MOV	X16, 16(X7)	// z[2]
    61	MOV	X19, 24(X7)	// z[3]
    62
    63	ADD	$32, X5
    64	ADD	$32, X6
    65	ADD	$32, X7
    66	SUB	$4, X30
    67
    68	BGEU	X30, X28, loop4
    69	BEQZ	X30, done
    70
    71loop1:
    72	MOV	0(X5), X10	// x
    73	MOV	0(X6), X11	// y
    74
    75	ADD	X10, X11, X12	// z = x + y
    76	SLTU	X10, X12, X14
    77	ADD	X12, X29, X13	// z = x + y + c
    78	SLTU	X12, X13, X15
    79	ADD	X14, X15, X29	// next c
    80
    81	MOV	X13, 0(X7)	// z
    82
    83	ADD	$8, X5
    84	ADD	$8, X6
    85	ADD	$8, X7
    86	SUB	$1, X30
    87
    88	BNEZ	X30, loop1
    89
    90done:
    91	MOV	X29, c+72(FP)	// return c
    92	RET
    93
    94TEXT ·subVV(SB),NOSPLIT,$0
    95	MOV	x+24(FP), X5
    96	MOV	y+48(FP), X6
    97	MOV	z+0(FP), X7
    98	MOV	z_len+8(FP), X30
    99
   100	MOV	$4, X28
   101	MOV	$0, X29		// b = 0
   102
   103	BEQZ	X30, done
   104	BLTU	X30, X28, loop1
   105
   106loop4:
   107	MOV	0(X5), X8	// x[0]
   108	MOV	0(X6), X9	// y[0]
   109	MOV	8(X5), X11	// x[1]
   110	MOV	8(X6), X12	// y[1]
   111	MOV	16(X5), X14	// x[2]
   112	MOV	16(X6), X15	// y[2]
   113	MOV	24(X5), X17	// x[3]
   114	MOV	24(X6), X18	// y[3]
   115
   116	SUB	X9, X8, X21	// z[0] = x[0] - y[0]
   117	SLTU	X21, X8, X22
   118	SUB	X29, X21, X10	// z[0] = x[0] - y[0] - b
   119	SLTU	X10, X21, X23
   120	ADD	X22, X23, X29	// next b
   121
   122	SUB	X12, X11, X24	// z[1] = x[1] - y[1]
   123	SLTU	X24, X11, X25
   124	SUB	X29, X24, X13	// z[1] = x[1] - y[1] - b
   125	SLTU	X13, X24, X26
   126	ADD	X25, X26, X29	// next b
   127
   128	SUB	X15, X14, X21	// z[2] = x[2] - y[2]
   129	SLTU	X21, X14, X22
   130	SUB	X29, X21, X16	// z[2] = x[2] - y[2] - b
   131	SLTU	X16, X21, X23
   132	ADD	X22, X23, X29	// next b
   133
   134	SUB	X18, X17, X21	// z[3] = x[3] - y[3]
   135	SLTU	X21, X17, X22
   136	SUB	X29, X21, X19	// z[3] = x[3] - y[3] - b
   137	SLTU	X19, X21, X23
   138	ADD	X22, X23, X29	// next b
   139
   140	MOV	X10, 0(X7)	// z[0]
   141	MOV	X13, 8(X7)	// z[1]
   142	MOV	X16, 16(X7)	// z[2]
   143	MOV	X19, 24(X7)	// z[3]
   144
   145	ADD	$32, X5
   146	ADD	$32, X6
   147	ADD	$32, X7
   148	SUB	$4, X30
   149
   150	BGEU	X30, X28, loop4
   151	BEQZ	X30, done
   152
   153loop1:
   154	MOV	0(X5), X10	// x
   155	MOV	0(X6), X11	// y
   156
   157	SUB	X11, X10, X12	// z = x - y
   158	SLTU	X12, X10, X14
   159	SUB	X29, X12, X13	// z = x - y - b
   160	SLTU	X13, X12, X15
   161	ADD	X14, X15, X29	// next b
   162
   163	MOV	X13, 0(X7)	// z
   164
   165	ADD	$8, X5
   166	ADD	$8, X6
   167	ADD	$8, X7
   168	SUB	$1, X30
   169
   170	BNEZ	X30, loop1
   171
   172done:
   173	MOV	X29, c+72(FP)	// return b
   174	RET
   175
   176TEXT ·addVW(SB),NOSPLIT,$0
   177	MOV	x+24(FP), X5
   178	MOV	y+48(FP), X6
   179	MOV	z+0(FP), X7
   180	MOV	z_len+8(FP), X30
   181
   182	MOV	$4, X28
   183	MOV	X6, X29		// c = y
   184
   185	BEQZ	X30, done
   186	BLTU	X30, X28, loop1
   187
   188loop4:
   189	MOV	0(X5), X8	// x[0]
   190	MOV	8(X5), X11	// x[1]
   191	MOV	16(X5), X14	// x[2]
   192	MOV	24(X5), X17	// x[3]
   193
   194	ADD	X8, X29, X10	// z[0] = x[0] + c
   195	SLTU	X8, X10, X29	// next c
   196
   197	ADD	X11, X29, X13	// z[1] = x[1] + c
   198	SLTU	X11, X13, X29	// next c
   199
   200	ADD	X14, X29, X16	// z[2] = x[2] + c
   201	SLTU	X14, X16, X29	// next c
   202
   203	ADD	X17, X29, X19	// z[3] = x[3] + c
   204	SLTU	X17, X19, X29	// next c
   205
   206	MOV	X10, 0(X7)	// z[0]
   207	MOV	X13, 8(X7)	// z[1]
   208	MOV	X16, 16(X7)	// z[2]
   209	MOV	X19, 24(X7)	// z[3]
   210
   211	ADD	$32, X5
   212	ADD	$32, X7
   213	SUB	$4, X30
   214
   215	BGEU	X30, X28, loop4
   216	BEQZ	X30, done
   217
   218loop1:
   219	MOV	0(X5), X10	// x
   220
   221	ADD	X10, X29, X12	// z = x + c
   222	SLTU	X10, X12, X29	// next c
   223
   224	MOV	X12, 0(X7)	// z
   225
   226	ADD	$8, X5
   227	ADD	$8, X7
   228	SUB	$1, X30
   229
   230	BNEZ	X30, loop1
   231
   232done:
   233	MOV	X29, c+56(FP)	// return c
   234	RET
   235
   236TEXT ·subVW(SB),NOSPLIT,$0
   237	MOV	x+24(FP), X5
   238	MOV	y+48(FP), X6
   239	MOV	z+0(FP), X7
   240	MOV	z_len+8(FP), X30
   241
   242	MOV	$4, X28
   243	MOV	X6, X29		// b = y
   244
   245	BEQZ	X30, done
   246	BLTU	X30, X28, loop1
   247
   248loop4:
   249	MOV	0(X5), X8	// x[0]
   250	MOV	8(X5), X11	// x[1]
   251	MOV	16(X5), X14	// x[2]
   252	MOV	24(X5), X17	// x[3]
   253
   254	SUB	X29, X8, X10	// z[0] = x[0] - b
   255	SLTU	X10, X8, X29	// next b
   256
   257	SUB	X29, X11, X13	// z[1] = x[1] - b
   258	SLTU	X13, X11, X29	// next b
   259
   260	SUB	X29, X14, X16	// z[2] = x[2] - b
   261	SLTU	X16, X14, X29	// next b
   262
   263	SUB	X29, X17, X19	// z[3] = x[3] - b
   264	SLTU	X19, X17, X29	// next b
   265
   266	MOV	X10, 0(X7)	// z[0]
   267	MOV	X13, 8(X7)	// z[1]
   268	MOV	X16, 16(X7)	// z[2]
   269	MOV	X19, 24(X7)	// z[3]
   270
   271	ADD	$32, X5
   272	ADD	$32, X7
   273	SUB	$4, X30
   274
   275	BGEU	X30, X28, loop4
   276	BEQZ	X30, done
   277
   278loop1:
   279	MOV	0(X5), X10	// x
   280
   281	SUB	X29, X10, X12	// z = x - b
   282	SLTU	X12, X10, X29	// next b
   283
   284	MOV	X12, 0(X7)	// z
   285
   286	ADD	$8, X5
   287	ADD	$8, X7
   288	SUB	$1, X30
   289
   290	BNEZ	X30, loop1
   291
   292done:
   293	MOV	X29, c+56(FP)	// return b
   294	RET
   295
   296TEXT ·shlVU(SB),NOSPLIT,$0
   297	JMP ·shlVU_g(SB)
   298
   299TEXT ·shrVU(SB),NOSPLIT,$0
   300	JMP ·shrVU_g(SB)
   301
   302TEXT ·mulAddVWW(SB),NOSPLIT,$0
   303	MOV	x+24(FP), X5
   304	MOV	y+48(FP), X6
   305	MOV	z+0(FP), X7
   306	MOV	z_len+8(FP), X30
   307	MOV	r+56(FP), X29
   308
   309	MOV	$4, X28
   310
   311	BEQ	ZERO, X30, done
   312	BLTU	X30, X28, loop1
   313
   314loop4:
   315	MOV	0(X5), X8	// x[0]
   316	MOV	8(X5), X11	// x[1]
   317	MOV	16(X5), X14	// x[2]
   318	MOV	24(X5), X17	// x[3]
   319
   320	MULHU	X8, X6, X9	// z_hi[0] = x[0] * y
   321	MUL	X8, X6, X8	// z_lo[0] = x[0] * y
   322	ADD	X8, X29, X10	// z[0] = z_lo[0] + c
   323	SLTU	X8, X10, X23
   324	ADD	X23, X9, X29	// next c
   325
   326	MULHU	X11, X6, X12	// z_hi[1] = x[1] * y
   327	MUL	X11, X6, X11	// z_lo[1] = x[1] * y
   328	ADD	X11, X29, X13	// z[1] = z_lo[1] + c
   329	SLTU	X11, X13, X23
   330	ADD	X23, X12, X29	// next c
   331
   332	MULHU	X14, X6, X15	// z_hi[2] = x[2] * y
   333	MUL	X14, X6, X14	// z_lo[2] = x[2] * y
   334	ADD	X14, X29, X16	// z[2] = z_lo[2] + c
   335	SLTU	X14, X16, X23
   336	ADD	X23, X15, X29	// next c
   337
   338	MULHU	X17, X6, X18	// z_hi[3] = x[3] * y
   339	MUL	X17, X6, X17	// z_lo[3] = x[3] * y
   340	ADD	X17, X29, X19	// z[3] = z_lo[3] + c
   341	SLTU	X17, X19, X23
   342	ADD	X23, X18, X29	// next c
   343
   344	MOV	X10, 0(X7)	// z[0]
   345	MOV	X13, 8(X7)	// z[1]
   346	MOV	X16, 16(X7)	// z[2]
   347	MOV	X19, 24(X7)	// z[3]
   348
   349	ADD	$32, X5
   350	ADD	$32, X7
   351	SUB	$4, X30
   352
   353	BGEU	X30, X28, loop4
   354	BEQZ	X30, done
   355
   356loop1:
   357	MOV	0(X5), X10	// x
   358
   359	MULHU	X10, X6, X12	// z_hi = x * y
   360	MUL	X10, X6, X10	// z_lo = x * y
   361	ADD	X10, X29, X13	// z_lo + c
   362	SLTU	X10, X13, X15
   363	ADD	X12, X15, X29	// next c
   364
   365	MOV	X13, 0(X7)	// z
   366
   367	ADD	$8, X5
   368	ADD	$8, X7
   369	SUB	$1, X30
   370
   371	BNEZ	X30, loop1
   372
   373done:
   374	MOV	X29, c+64(FP)	// return c
   375	RET
   376
   377TEXT ·addMulVVW(SB),NOSPLIT,$0
   378	MOV	x+24(FP), X5
   379	MOV	y+48(FP), X6
   380	MOV	z+0(FP), X7
   381	MOV	z_len+8(FP), X30
   382
   383	MOV	$4, X28
   384	MOV	$0, X29		// c = 0
   385
   386	BEQZ	X30, done
   387	BLTU	X30, X28, loop1
   388
   389loop4:
   390	MOV	0(X5), X8	// x[0]
   391	MOV	0(X7), X10	// z[0]
   392	MOV	8(X5), X11	// x[1]
   393	MOV	8(X7), X13	// z[1]
   394	MOV	16(X5), X14	// x[2]
   395	MOV	16(X7), X16	// z[2]
   396	MOV	24(X5), X17	// x[3]
   397	MOV	24(X7), X19	// z[3]
   398
   399	MULHU	X8, X6, X9	// z_hi[0] = x[0] * y
   400	MUL	X8, X6, X8	// z_lo[0] = x[0] * y
   401	ADD	X8, X10, X21	// z_lo[0] = x[0] * y + z[0]
   402	SLTU	X8, X21, X22
   403	ADD	X9, X22, X9	// z_hi[0] = x[0] * y + z[0]
   404	ADD	X21, X29, X10	// z[0] = x[0] * y + z[0] + c
   405	SLTU	X21, X10, X22
   406	ADD	X9, X22, X29	// next c
   407
   408	MULHU	X11, X6, X12	// z_hi[1] = x[1] * y
   409	MUL	X11, X6, X11	// z_lo[1] = x[1] * y
   410	ADD	X11, X13, X21	// z_lo[1] = x[1] * y + z[1]
   411	SLTU	X11, X21, X22
   412	ADD	X12, X22, X12	// z_hi[1] = x[1] * y + z[1]
   413	ADD	X21, X29, X13	// z[1] = x[1] * y + z[1] + c
   414	SLTU	X21, X13, X22
   415	ADD	X12, X22, X29	// next c
   416
   417	MULHU	X14, X6, X15	// z_hi[2] = x[2] * y
   418	MUL	X14, X6, X14	// z_lo[2] = x[2] * y
   419	ADD	X14, X16, X21	// z_lo[2] = x[2] * y + z[2]
   420	SLTU	X14, X21, X22
   421	ADD	X15, X22, X15	// z_hi[2] = x[2] * y + z[2]
   422	ADD	X21, X29, X16	// z[2] = x[2] * y + z[2] + c
   423	SLTU	X21, X16, X22
   424	ADD	X15, X22, X29	// next c
   425
   426	MULHU	X17, X6, X18	// z_hi[3] = x[3] * y
   427	MUL	X17, X6, X17	// z_lo[3] = x[3] * y
   428	ADD	X17, X19, X21	// z_lo[3] = x[3] * y + z[3]
   429	SLTU	X17, X21, X22
   430	ADD	X18, X22, X18	// z_hi[3] = x[3] * y + z[3]
   431	ADD	X21, X29, X19	// z[3] = x[3] * y + z[3] + c
   432	SLTU	X21, X19, X22
   433	ADD	X18, X22, X29	// next c
   434
   435	MOV	X10, 0(X7)	// z[0]
   436	MOV	X13, 8(X7)	// z[1]
   437	MOV	X16, 16(X7)	// z[2]
   438	MOV	X19, 24(X7)	// z[3]
   439
   440	ADD	$32, X5
   441	ADD	$32, X7
   442	SUB	$4, X30
   443
   444	BGEU	X30, X28, loop4
   445	BEQZ	X30, done
   446
   447loop1:
   448	MOV	0(X5), X10	// x
   449	MOV	0(X7), X11	// z
   450
   451	MULHU	X10, X6, X12	// z_hi = x * y
   452	MUL	X10, X6, X10	// z_lo = x * y
   453	ADD	X10, X11, X13	// z_lo = x * y + z
   454	SLTU	X10, X13, X15
   455	ADD	X12, X15, X12	// z_hi = x * y + z
   456	ADD	X13, X29, X10	// z = x * y + z + c
   457	SLTU	X13, X10, X15
   458	ADD	X12, X15, X29	// next c
   459
   460	MOV	X10, 0(X7)	// z
   461
   462	ADD	$8, X5
   463	ADD	$8, X7
   464	SUB	$1, X30
   465
   466	BNEZ	X30, loop1
   467
   468done:
   469	MOV	X29, c+56(FP)	// return c
   470	RET

View as plain text