        -:    0:Source:/usr/lib/gcc/x86_64-linux-gnu/14/include/emmintrin.h
        -:    0:Graph:tspi4.gcno
        -:    0:Data:tspi4.gcda
        -:    0:Runs:1
        -:    1:/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
        -:    2:
        -:    3:   This file is part of GCC.
        -:    4:
        -:    5:   GCC is free software; you can redistribute it and/or modify
        -:    6:   it under the terms of the GNU General Public License as published by
        -:    7:   the Free Software Foundation; either version 3, or (at your option)
        -:    8:   any later version.
        -:    9:
        -:   10:   GCC is distributed in the hope that it will be useful,
        -:   11:   but WITHOUT ANY WARRANTY; without even the implied warranty of
        -:   12:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        -:   13:   GNU General Public License for more details.
        -:   14:
        -:   15:   Under Section 7 of GPL version 3, you are granted additional
        -:   16:   permissions described in the GCC Runtime Library Exception, version
        -:   17:   3.1, as published by the Free Software Foundation.
        -:   18:
        -:   19:   You should have received a copy of the GNU General Public License and
        -:   20:   a copy of the GCC Runtime Library Exception along with this program;
        -:   21:   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
        -:   22:   <http://www.gnu.org/licenses/>.  */
        -:   23:
        -:   24:/* Implemented from the specification included in the Intel C++ Compiler
        -:   25:   User Guide and Reference, version 9.0.  */
        -:   26:
        -:   27:#ifndef _EMMINTRIN_H_INCLUDED
        -:   28:#define _EMMINTRIN_H_INCLUDED
        -:   29:
        -:   30:/* We need definitions from the SSE header files*/
        -:   31:#include <xmmintrin.h>
        -:   32:
        -:   33:#ifndef __SSE2__
        -:   34:#pragma GCC push_options
        -:   35:#pragma GCC target("sse2")
        -:   36:#define __DISABLE_SSE2__
        -:   37:#endif /* __SSE2__ */
        -:   38:
        -:   39:/* SSE2 */
        -:   40:typedef double __v2df __attribute__ ((__vector_size__ (16)));
        -:   41:typedef long long __v2di __attribute__ ((__vector_size__ (16)));
        -:   42:typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
        -:   43:typedef int __v4si __attribute__ ((__vector_size__ (16)));
        -:   44:typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
        -:   45:typedef short __v8hi __attribute__ ((__vector_size__ (16)));
        -:   46:typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
        -:   47:typedef char __v16qi __attribute__ ((__vector_size__ (16)));
        -:   48:typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
        -:   49:typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
        -:   50:
        -:   51:/* The Intel API is flexible enough that we must allow aliasing with other
        -:   52:   vector types, and their scalar components.  */
        -:   53:typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
        -:   54:typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
        -:   55:
        -:   56:/* Unaligned version of the same types.  */
        -:   57:typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
        -:   58:typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
        -:   59:
        -:   60:/* Create a selector for use with the SHUFPD instruction.  */
        -:   61:#define _MM_SHUFFLE2(fp1,fp0) \
        -:   62: (((fp1) << 1) | (fp0))
        -:   63:
        -:   64:/* Create a vector with element 0 as F and the rest zero.  */
        -:   65:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:   66:_mm_set_sd (double __F)
        -:   67:{
        -:   68:  return __extension__ (__m128d){ __F, 0.0 };
        -:   69:}
        -:   70:
        -:   71:/* Create a vector with both elements equal to F.  */
        -:   72:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:   73:_mm_set1_pd (double __F)
        -:   74:{
        -:   75:  return __extension__ (__m128d){ __F, __F };
        -:   76:}
        -:   77:
        -:   78:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:   79:_mm_set_pd1 (double __F)
        -:   80:{
        -:   81:  return _mm_set1_pd (__F);
        -:   82:}
        -:   83:
        -:   84:/* Create a vector with the lower value X and upper value W.  */
        -:   85:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:   86:_mm_set_pd (double __W, double __X)
        -:   87:{
        -:   88:  return __extension__ (__m128d){ __X, __W };
        -:   89:}
        -:   90:
        -:   91:/* Create a vector with the lower value W and upper value X.  */
        -:   92:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:   93:_mm_setr_pd (double __W, double __X)
        -:   94:{
        -:   95:  return __extension__ (__m128d){ __W, __X };
        -:   96:}
        -:   97:
        -:   98:/* Create an undefined vector.  */
        -:   99:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  100:_mm_undefined_pd (void)
        -:  101:{
        -:  102:#pragma GCC diagnostic push
        -:  103:#pragma GCC diagnostic ignored "-Winit-self"
        -:  104:  __m128d __Y = __Y;
        -:  105:#pragma GCC diagnostic pop
        -:  106:  return __Y;
        -:  107:}
        -:  108:
        -:  109:/* Create a vector of zeros.  */
        -:  110:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  111:_mm_setzero_pd (void)
        -:  112:{
        -:  113:  return __extension__ (__m128d){ 0.0, 0.0 };
        -:  114:}
        -:  115:
        -:  116:/* Sets the low DPFP value of A from the low value of B.  */
        -:  117:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  118:_mm_move_sd (__m128d __A, __m128d __B)
        -:  119:{
        -:  120:  return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
        -:  121:}
        -:  122:
        -:  123:/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
        -:  124:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  125:_mm_load_pd (double const *__P)
        -:  126:{
        -:  127:  return *(__m128d *)__P;
        -:  128:}
        -:  129:
        -:  130:/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
        -:  131:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  132:_mm_loadu_pd (double const *__P)
        -:  133:{
        -:  134:  return *(__m128d_u *)__P;
        -:  135:}
        -:  136:
        -:  137:/* Create a vector with all two elements equal to *P.  */
        -:  138:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  139:_mm_load1_pd (double const *__P)
        -:  140:{
        -:  141:  return _mm_set1_pd (*__P);
        -:  142:}
        -:  143:
        -:  144:/* Create a vector with element 0 as *P and the rest zero.  */
        -:  145:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  146:_mm_load_sd (double const *__P)
        -:  147:{
        -:  148:  return _mm_set_sd (*__P);
        -:  149:}
        -:  150:
        -:  151:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  152:_mm_load_pd1 (double const *__P)
        -:  153:{
        -:  154:  return _mm_load1_pd (__P);
        -:  155:}
        -:  156:
        -:  157:/* Load two DPFP values in reverse order.  The address must be aligned.  */
        -:  158:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  159:_mm_loadr_pd (double const *__P)
        -:  160:{
        -:  161:  __m128d __tmp = _mm_load_pd (__P);
        -:  162:  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
        -:  163:}
        -:  164:
        -:  165:/* Store two DPFP values.  The address must be 16-byte aligned.  */
        -:  166:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  167:_mm_store_pd (double *__P, __m128d __A)
        -:  168:{
        -:  169:  *(__m128d *)__P = __A;
        -:  170:}
        -:  171:
        -:  172:/* Store two DPFP values.  The address need not be 16-byte aligned.  */
        -:  173:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  174:_mm_storeu_pd (double *__P, __m128d __A)
        -:  175:{
        -:  176:  *(__m128d_u *)__P = __A;
        -:  177:}
        -:  178:
        -:  179:/* Stores the lower DPFP value.  */
        -:  180:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  181:_mm_store_sd (double *__P, __m128d __A)
        -:  182:{
    90182:  183:  *__P = ((__v2df)__A)[0];
        -:  184:}
        -:  185:
        -:  186:extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  187:_mm_cvtsd_f64 (__m128d __A)
        -:  188:{
        -:  189:  return ((__v2df)__A)[0];
        -:  190:}
        -:  191:
        -:  192:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  193:_mm_storel_pd (double *__P, __m128d __A)
        -:  194:{
        -:  195:  _mm_store_sd (__P, __A);
        -:  196:}
        -:  197:
        -:  198:/* Stores the upper DPFP value.  */
        -:  199:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  200:_mm_storeh_pd (double *__P, __m128d __A)
        -:  201:{
        -:  202:  *__P = ((__v2df)__A)[1];
        -:  203:}
        -:  204:
        -:  205:/* Store the lower DPFP value across two words.
        -:  206:   The address must be 16-byte aligned.  */
        -:  207:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  208:_mm_store1_pd (double *__P, __m128d __A)
        -:  209:{
        -:  210:  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
        -:  211:}
        -:  212:
        -:  213:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  214:_mm_store_pd1 (double *__P, __m128d __A)
        -:  215:{
        -:  216:  _mm_store1_pd (__P, __A);
        -:  217:}
        -:  218:
        -:  219:/* Store two DPFP values in reverse order.  The address must be aligned.  */
        -:  220:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  221:_mm_storer_pd (double *__P, __m128d __A)
        -:  222:{
        -:  223:  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
        -:  224:}
        -:  225:
        -:  226:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  227:_mm_cvtsi128_si32 (__m128i __A)
        -:  228:{
        -:  229:  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
        -:  230:}
        -:  231:
        -:  232:#ifdef __x86_64__
        -:  233:/* Intel intrinsic.  */
        -:  234:extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  235:_mm_cvtsi128_si64 (__m128i __A)
        -:  236:{
        -:  237:  return ((__v2di)__A)[0];
        -:  238:}
        -:  239:
        -:  240:/* Microsoft intrinsic.  */
        -:  241:extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  242:_mm_cvtsi128_si64x (__m128i __A)
        -:  243:{
        -:  244:  return ((__v2di)__A)[0];
        -:  245:}
        -:  246:#endif
        -:  247:
        -:  248:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  249:_mm_add_pd (__m128d __A, __m128d __B)
        -:  250:{
        -:  251:  return (__m128d) ((__v2df)__A + (__v2df)__B);
        -:  252:}
        -:  253:
        -:  254:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  255:_mm_add_sd (__m128d __A, __m128d __B)
        -:  256:{
        -:  257:  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
        -:  258:}
        -:  259:
        -:  260:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  261:_mm_sub_pd (__m128d __A, __m128d __B)
        -:  262:{
        -:  263:  return (__m128d) ((__v2df)__A - (__v2df)__B);
        -:  264:}
        -:  265:
        -:  266:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  267:_mm_sub_sd (__m128d __A, __m128d __B)
        -:  268:{
        -:  269:  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
        -:  270:}
        -:  271:
        -:  272:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  273:_mm_mul_pd (__m128d __A, __m128d __B)
        -:  274:{
        -:  275:  return (__m128d) ((__v2df)__A * (__v2df)__B);
        -:  276:}
        -:  277:
        -:  278:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  279:_mm_mul_sd (__m128d __A, __m128d __B)
        -:  280:{
        -:  281:  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
        -:  282:}
        -:  283:
        -:  284:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  285:_mm_div_pd (__m128d __A, __m128d __B)
        -:  286:{
        -:  287:  return (__m128d) ((__v2df)__A / (__v2df)__B);
        -:  288:}
        -:  289:
        -:  290:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  291:_mm_div_sd (__m128d __A, __m128d __B)
        -:  292:{
        -:  293:  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
        -:  294:}
        -:  295:
        -:  296:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  297:_mm_sqrt_pd (__m128d __A)
        -:  298:{
        -:  299:  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
        -:  300:}
        -:  301:
        -:  302:/* Return pair {sqrt (B[0]), A[1]}.  */
        -:  303:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  304:_mm_sqrt_sd (__m128d __A, __m128d __B)
        -:  305:{
        -:  306:  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
        -:  307:  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
        -:  308:}
        -:  309:
        -:  310:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  311:_mm_min_pd (__m128d __A, __m128d __B)
        -:  312:{
        -:  313:  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
        -:  314:}
        -:  315:
        -:  316:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  317:_mm_min_sd (__m128d __A, __m128d __B)
        -:  318:{
        -:  319:  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
        -:  320:}
        -:  321:
        -:  322:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  323:_mm_max_pd (__m128d __A, __m128d __B)
        -:  324:{
        -:  325:  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
        -:  326:}
        -:  327:
        -:  328:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  329:_mm_max_sd (__m128d __A, __m128d __B)
        -:  330:{
        -:  331:  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
        -:  332:}
        -:  333:
        -:  334:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  335:_mm_and_pd (__m128d __A, __m128d __B)
        -:  336:{
        -:  337:  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
        -:  338:}
        -:  339:
        -:  340:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  341:_mm_andnot_pd (__m128d __A, __m128d __B)
        -:  342:{
        -:  343:  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
        -:  344:}
        -:  345:
        -:  346:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  347:_mm_or_pd (__m128d __A, __m128d __B)
        -:  348:{
        -:  349:  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
        -:  350:}
        -:  351:
        -:  352:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  353:_mm_xor_pd (__m128d __A, __m128d __B)
        -:  354:{
        -:  355:  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
        -:  356:}
        -:  357:
        -:  358:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  359:_mm_cmpeq_pd (__m128d __A, __m128d __B)
        -:  360:{
        -:  361:  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
        -:  362:}
        -:  363:
        -:  364:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  365:_mm_cmplt_pd (__m128d __A, __m128d __B)
        -:  366:{
        -:  367:  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
        -:  368:}
        -:  369:
        -:  370:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  371:_mm_cmple_pd (__m128d __A, __m128d __B)
        -:  372:{
        -:  373:  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
        -:  374:}
        -:  375:
        -:  376:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  377:_mm_cmpgt_pd (__m128d __A, __m128d __B)
        -:  378:{
        -:  379:  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
        -:  380:}
        -:  381:
        -:  382:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  383:_mm_cmpge_pd (__m128d __A, __m128d __B)
        -:  384:{
        -:  385:  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
        -:  386:}
        -:  387:
        -:  388:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  389:_mm_cmpneq_pd (__m128d __A, __m128d __B)
        -:  390:{
        -:  391:  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
        -:  392:}
        -:  393:
        -:  394:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  395:_mm_cmpnlt_pd (__m128d __A, __m128d __B)
        -:  396:{
        -:  397:  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
        -:  398:}
        -:  399:
        -:  400:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  401:_mm_cmpnle_pd (__m128d __A, __m128d __B)
        -:  402:{
        -:  403:  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
        -:  404:}
        -:  405:
        -:  406:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  407:_mm_cmpngt_pd (__m128d __A, __m128d __B)
        -:  408:{
        -:  409:  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
        -:  410:}
        -:  411:
        -:  412:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  413:_mm_cmpnge_pd (__m128d __A, __m128d __B)
        -:  414:{
        -:  415:  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
        -:  416:}
        -:  417:
        -:  418:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  419:_mm_cmpord_pd (__m128d __A, __m128d __B)
        -:  420:{
        -:  421:  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
        -:  422:}
        -:  423:
        -:  424:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  425:_mm_cmpunord_pd (__m128d __A, __m128d __B)
        -:  426:{
        -:  427:  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
        -:  428:}
        -:  429:
        -:  430:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  431:_mm_cmpeq_sd (__m128d __A, __m128d __B)
        -:  432:{
        -:  433:  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
        -:  434:}
        -:  435:
        -:  436:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  437:_mm_cmplt_sd (__m128d __A, __m128d __B)
        -:  438:{
        -:  439:  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
        -:  440:}
        -:  441:
        -:  442:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  443:_mm_cmple_sd (__m128d __A, __m128d __B)
        -:  444:{
        -:  445:  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
        -:  446:}
        -:  447:
        -:  448:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  449:_mm_cmpgt_sd (__m128d __A, __m128d __B)
        -:  450:{
        -:  451:  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
        -:  452:					 (__v2df)
        -:  453:					 __builtin_ia32_cmpltsd ((__v2df) __B,
        -:  454:								 (__v2df)
        -:  455:								 __A));
        -:  456:}
        -:  457:
        -:  458:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  459:_mm_cmpge_sd (__m128d __A, __m128d __B)
        -:  460:{
        -:  461:  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
        -:  462:					 (__v2df)
        -:  463:					 __builtin_ia32_cmplesd ((__v2df) __B,
        -:  464:								 (__v2df)
        -:  465:								 __A));
        -:  466:}
        -:  467:
        -:  468:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  469:_mm_cmpneq_sd (__m128d __A, __m128d __B)
        -:  470:{
        -:  471:  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
        -:  472:}
        -:  473:
        -:  474:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  475:_mm_cmpnlt_sd (__m128d __A, __m128d __B)
        -:  476:{
        -:  477:  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
        -:  478:}
        -:  479:
        -:  480:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  481:_mm_cmpnle_sd (__m128d __A, __m128d __B)
        -:  482:{
        -:  483:  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
        -:  484:}
        -:  485:
        -:  486:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  487:_mm_cmpngt_sd (__m128d __A, __m128d __B)
        -:  488:{
        -:  489:  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
        -:  490:					 (__v2df)
        -:  491:					 __builtin_ia32_cmpnltsd ((__v2df) __B,
        -:  492:								  (__v2df)
        -:  493:								  __A));
        -:  494:}
        -:  495:
        -:  496:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  497:_mm_cmpnge_sd (__m128d __A, __m128d __B)
        -:  498:{
        -:  499:  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
        -:  500:					 (__v2df)
        -:  501:					 __builtin_ia32_cmpnlesd ((__v2df) __B,
        -:  502:								  (__v2df)
        -:  503:								  __A));
        -:  504:}
        -:  505:
        -:  506:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  507:_mm_cmpord_sd (__m128d __A, __m128d __B)
        -:  508:{
        -:  509:  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
        -:  510:}
        -:  511:
        -:  512:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  513:_mm_cmpunord_sd (__m128d __A, __m128d __B)
        -:  514:{
        -:  515:  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
        -:  516:}
        -:  517:
        -:  518:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  519:_mm_comieq_sd (__m128d __A, __m128d __B)
        -:  520:{
        -:  521:  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
        -:  522:}
        -:  523:
        -:  524:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  525:_mm_comilt_sd (__m128d __A, __m128d __B)
        -:  526:{
        -:  527:  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
        -:  528:}
        -:  529:
        -:  530:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  531:_mm_comile_sd (__m128d __A, __m128d __B)
        -:  532:{
        -:  533:  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
        -:  534:}
        -:  535:
        -:  536:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  537:_mm_comigt_sd (__m128d __A, __m128d __B)
        -:  538:{
        -:  539:  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
        -:  540:}
        -:  541:
        -:  542:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  543:_mm_comige_sd (__m128d __A, __m128d __B)
        -:  544:{
        -:  545:  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
        -:  546:}
        -:  547:
        -:  548:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  549:_mm_comineq_sd (__m128d __A, __m128d __B)
        -:  550:{
        -:  551:  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
        -:  552:}
        -:  553:
        -:  554:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  555:_mm_ucomieq_sd (__m128d __A, __m128d __B)
        -:  556:{
        -:  557:  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
        -:  558:}
        -:  559:
        -:  560:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  561:_mm_ucomilt_sd (__m128d __A, __m128d __B)
        -:  562:{
        -:  563:  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
        -:  564:}
        -:  565:
        -:  566:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  567:_mm_ucomile_sd (__m128d __A, __m128d __B)
        -:  568:{
        -:  569:  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
        -:  570:}
        -:  571:
        -:  572:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  573:_mm_ucomigt_sd (__m128d __A, __m128d __B)
        -:  574:{
        -:  575:  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
        -:  576:}
        -:  577:
        -:  578:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  579:_mm_ucomige_sd (__m128d __A, __m128d __B)
        -:  580:{
        -:  581:  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
        -:  582:}
        -:  583:
        -:  584:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  585:_mm_ucomineq_sd (__m128d __A, __m128d __B)
        -:  586:{
        -:  587:  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
        -:  588:}
        -:  589:
        -:  590:/* Create a vector of Qi, where i is the element number.  */
        -:  591:
        -:  592:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  593:_mm_set_epi64x (long long __q1, long long __q0)
        -:  594:{
        -:  595:  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
        -:  596:}
        -:  597:
        -:  598:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  599:_mm_set_epi64 (__m64 __q1,  __m64 __q0)
        -:  600:{
        -:  601:  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
        -:  602:}
        -:  603:
        -:  604:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  605:_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
        -:  606:{
        -:  607:  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
        -:  608:}
        -:  609:
        -:  610:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  611:_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
        -:  612:	       short __q3, short __q2, short __q1, short __q0)
        -:  613:{
        -:  614:  return __extension__ (__m128i)(__v8hi){
        -:  615:    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
        -:  616:}
        -:  617:
        -:  618:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  619:_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
        -:  620:	      char __q11, char __q10, char __q09, char __q08,
        -:  621:	      char __q07, char __q06, char __q05, char __q04,
        -:  622:	      char __q03, char __q02, char __q01, char __q00)
        -:  623:{
        -:  624:  return __extension__ (__m128i)(__v16qi){
        -:  625:    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
        -:  626:    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
        -:  627:  };
        -:  628:}
        -:  629:
        -:  630:/* Set all of the elements of the vector to A.  */
        -:  631:
        -:  632:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  633:_mm_set1_epi64x (long long __A)
        -:  634:{
        -:  635:  return _mm_set_epi64x (__A, __A);
        -:  636:}
        -:  637:
        -:  638:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  639:_mm_set1_epi64 (__m64 __A)
        -:  640:{
        -:  641:  return _mm_set_epi64 (__A, __A);
        -:  642:}
        -:  643:
        -:  644:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  645:_mm_set1_epi32 (int __A)
        -:  646:{
        -:  647:  return _mm_set_epi32 (__A, __A, __A, __A);
        -:  648:}
        -:  649:
        -:  650:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  651:_mm_set1_epi16 (short __A)
        -:  652:{
        -:  653:  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
        -:  654:}
        -:  655:
        -:  656:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  657:_mm_set1_epi8 (char __A)
        -:  658:{
        -:  659:  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
        -:  660:		       __A, __A, __A, __A, __A, __A, __A, __A);
        -:  661:}
        -:  662:
        -:  663:/* Create a vector of Qi, where i is the element number.
        -:  664:   The parameter order is reversed from the _mm_set_epi* functions.  */
        -:  665:
        -:  666:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  667:_mm_setr_epi64 (__m64 __q0, __m64 __q1)
        -:  668:{
        -:  669:  return _mm_set_epi64 (__q1, __q0);
        -:  670:}
        -:  671:
        -:  672:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  673:_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
        -:  674:{
        -:  675:  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
        -:  676:}
        -:  677:
        -:  678:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  679:_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
        -:  680:	        short __q4, short __q5, short __q6, short __q7)
        -:  681:{
        -:  682:  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
        -:  683:}
        -:  684:
        -:  685:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  686:_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
        -:  687:	       char __q04, char __q05, char __q06, char __q07,
        -:  688:	       char __q08, char __q09, char __q10, char __q11,
        -:  689:	       char __q12, char __q13, char __q14, char __q15)
        -:  690:{
        -:  691:  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
        -:  692:		       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
        -:  693:}
        -:  694:
        -:  695:/* Create a vector with element 0 as *P and the rest zero.  */
        -:  696:
        -:  697:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  698:_mm_load_si128 (__m128i const *__P)
        -:  699:{
        -:  700:  return *__P;
        -:  701:}
        -:  702:
        -:  703:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  704:_mm_loadu_si128 (__m128i_u const *__P)
        -:  705:{
        -:  706:  return *__P;
        -:  707:}
        -:  708:
        -:  709:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  710:_mm_loadl_epi64 (__m128i_u const *__P)
        -:  711:{
        -:  712:  return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
        -:  713:}
        -:  714:
        -:  715:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  716:_mm_loadu_si64 (void const *__P)
        -:  717:{
        -:  718:  return _mm_loadl_epi64 ((__m128i_u *)__P);
        -:  719:}
        -:  720:
        -:  721:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  722:_mm_loadu_si32 (void const *__P)
        -:  723:{
        -:  724:  return _mm_set_epi32 (0, 0, 0, (*(__m32_u *)__P)[0]);
        -:  725:}
        -:  726:
        -:  727:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  728:_mm_loadu_si16 (void const *__P)
        -:  729:{
        -:  730:  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, (*(__m16_u *)__P)[0]);
        -:  731:}
        -:  732:
        -:  733:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  734:_mm_store_si128 (__m128i *__P, __m128i __B)
        -:  735:{
        -:  736:  *__P = __B;
        -:  737:}
        -:  738:
        -:  739:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  740:_mm_storeu_si128 (__m128i_u *__P, __m128i __B)
        -:  741:{
        -:  742:  *__P = __B;
        -:  743:}
        -:  744:
        -:  745:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  746:_mm_storel_epi64 (__m128i_u *__P, __m128i __B)
        -:  747:{
        -:  748:  *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
        -:  749:}
        -:  750:
        -:  751:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  752:_mm_storeu_si64 (void *__P, __m128i __B)
        -:  753:{
        -:  754:  _mm_storel_epi64 ((__m128i_u *)__P, __B);
        -:  755:}
        -:  756:
        -:  757:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  758:_mm_storeu_si32 (void *__P, __m128i __B)
        -:  759:{
        -:  760:  *(__m32_u *)__P = (__m32) ((__v4si)__B)[0];
        -:  761:}
        -:  762:
        -:  763:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  764:_mm_storeu_si16 (void *__P, __m128i __B)
        -:  765:{
        -:  766:  *(__m16_u *)__P = (__m16) ((__v8hi)__B)[0];
        -:  767:}
        -:  768:
        -:  769:extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  770:_mm_movepi64_pi64 (__m128i __B)
        -:  771:{
        -:  772:  return (__m64) ((__v2di)__B)[0];
        -:  773:}
        -:  774:
        -:  775:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  776:_mm_movpi64_epi64 (__m64 __A)
        -:  777:{
        -:  778:  return _mm_set_epi64 ((__m64)0LL, __A);
        -:  779:}
        -:  780:
        -:  781:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  782:_mm_move_epi64 (__m128i __A)
        -:  783:{
        -:  784:  return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
        -:  785:}
        -:  786:
        -:  787:/* Create an undefined vector.  */
        -:  788:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  789:_mm_undefined_si128 (void)
        -:  790:{
        -:  791:#pragma GCC diagnostic push
        -:  792:#pragma GCC diagnostic ignored "-Winit-self"
        -:  793:  __m128i __Y = __Y;
        -:  794:#pragma GCC diagnostic pop
        -:  795:  return __Y;
        -:  796:}
        -:  797:
        -:  798:/* Create a vector of zeros.  */
        -:  799:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  800:_mm_setzero_si128 (void)
        -:  801:{
        -:  802:  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
        -:  803:}
        -:  804:
        -:  805:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  806:_mm_cvtepi32_pd (__m128i __A)
        -:  807:{
        -:  808:  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
        -:  809:}
        -:  810:
        -:  811:extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  812:_mm_cvtepi32_ps (__m128i __A)
        -:  813:{
        -:  814:  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
        -:  815:}
        -:  816:
        -:  817:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  818:_mm_cvtpd_epi32 (__m128d __A)
        -:  819:{
        -:  820:  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
        -:  821:}
        -:  822:
        -:  823:extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  824:_mm_cvtpd_pi32 (__m128d __A)
        -:  825:{
        -:  826:  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
        -:  827:}
        -:  828:
        -:  829:extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  830:_mm_cvtpd_ps (__m128d __A)
        -:  831:{
        -:  832:  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
        -:  833:}
        -:  834:
        -:  835:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  836:_mm_cvttpd_epi32 (__m128d __A)
        -:  837:{
        -:  838:  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
        -:  839:}
        -:  840:
        -:  841:extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  842:_mm_cvttpd_pi32 (__m128d __A)
        -:  843:{
        -:  844:  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
        -:  845:}
        -:  846:
        -:  847:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  848:_mm_cvtpi32_pd (__m64 __A)
        -:  849:{
        -:  850:  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
        -:  851:}
        -:  852:
        -:  853:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  854:_mm_cvtps_epi32 (__m128 __A)
        -:  855:{
        -:  856:  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
        -:  857:}
        -:  858:
        -:  859:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  860:_mm_cvttps_epi32 (__m128 __A)
        -:  861:{
        -:  862:  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
        -:  863:}
        -:  864:
        -:  865:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  866:_mm_cvtps_pd (__m128 __A)
        -:  867:{
        -:  868:  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
        -:  869:}
        -:  870:
        -:  871:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  872:_mm_cvtsd_si32 (__m128d __A)
        -:  873:{
        -:  874:  return __builtin_ia32_cvtsd2si ((__v2df) __A);
        -:  875:}
        -:  876:
        -:  877:#ifdef __x86_64__
        -:  878:/* Intel intrinsic.  */
        -:  879:extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  880:_mm_cvtsd_si64 (__m128d __A)
        -:  881:{
        -:  882:  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
        -:  883:}
        -:  884:
        -:  885:/* Microsoft intrinsic.  */
        -:  886:extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  887:_mm_cvtsd_si64x (__m128d __A)
        -:  888:{
        -:  889:  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
        -:  890:}
        -:  891:#endif
        -:  892:
        -:  893:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  894:_mm_cvttsd_si32 (__m128d __A)
        -:  895:{
        -:  896:  return __builtin_ia32_cvttsd2si ((__v2df) __A);
        -:  897:}
        -:  898:
        -:  899:#ifdef __x86_64__
        -:  900:/* Intel intrinsic.  */
        -:  901:extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  902:_mm_cvttsd_si64 (__m128d __A)
        -:  903:{
        -:  904:  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
        -:  905:}
        -:  906:
        -:  907:/* Microsoft intrinsic.  */
        -:  908:extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  909:_mm_cvttsd_si64x (__m128d __A)
        -:  910:{
        -:  911:  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
        -:  912:}
        -:  913:#endif
        -:  914:
        -:  915:extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  916:_mm_cvtsd_ss (__m128 __A, __m128d __B)
        -:  917:{
        -:  918:  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
        -:  919:}
        -:  920:
        -:  921:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  922:_mm_cvtsi32_sd (__m128d __A, int __B)
        -:  923:{
        -:  924:  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
        -:  925:}
        -:  926:
        -:  927:#ifdef __x86_64__
        -:  928:/* Intel intrinsic.  */
        -:  929:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  930:_mm_cvtsi64_sd (__m128d __A, long long __B)
        -:  931:{
        -:  932:  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
        -:  933:}
        -:  934:
        -:  935:/* Microsoft intrinsic.  */
        -:  936:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  937:_mm_cvtsi64x_sd (__m128d __A, long long __B)
        -:  938:{
        -:  939:  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
        -:  940:}
        -:  941:#endif
        -:  942:
        -:  943:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  944:_mm_cvtss_sd (__m128d __A, __m128 __B)
        -:  945:{
        -:  946:  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
        -:  947:}
        -:  948:
        -:  949:#ifdef __OPTIMIZE__
        -:  950:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  951:_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
        -:  952:{
        -:  953:  return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
        -:  954:}
        -:  955:#else
        -:  956:#define _mm_shuffle_pd(A, B, N)						\
        -:  957:  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),		\
        -:  958:				   (__v2df)(__m128d)(B), (int)(N)))
        -:  959:#endif
        -:  960:
        -:  961:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  962:_mm_unpackhi_pd (__m128d __A, __m128d __B)
        -:  963:{
        -:  964:  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
        -:  965:}
        -:  966:
        -:  967:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  968:_mm_unpacklo_pd (__m128d __A, __m128d __B)
        -:  969:{
        -:  970:  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
        -:  971:}
        -:  972:
        -:  973:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  974:_mm_loadh_pd (__m128d __A, double const *__B)
        -:  975:{
        -:  976:  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
        -:  977:}
        -:  978:
        -:  979:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  980:_mm_loadl_pd (__m128d __A, double const *__B)
        -:  981:{
        -:  982:  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
        -:  983:}
        -:  984:
        -:  985:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  986:_mm_movemask_pd (__m128d __A)
        -:  987:{
        -:  988:  return __builtin_ia32_movmskpd ((__v2df)__A);
        -:  989:}
        -:  990:
        -:  991:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  992:_mm_packs_epi16 (__m128i __A, __m128i __B)
        -:  993:{
        -:  994:  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
        -:  995:}
        -:  996:
        -:  997:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -:  998:_mm_packs_epi32 (__m128i __A, __m128i __B)
        -:  999:{
        -: 1000:  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
        -: 1001:}
        -: 1002:
        -: 1003:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1004:_mm_packus_epi16 (__m128i __A, __m128i __B)
        -: 1005:{
        -: 1006:  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
        -: 1007:}
        -: 1008:
        -: 1009:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1010:_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
        -: 1011:{
        -: 1012:  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
        -: 1013:}
        -: 1014:
        -: 1015:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1016:_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
        -: 1017:{
        -: 1018:  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
        -: 1019:}
        -: 1020:
        -: 1021:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1022:_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
        -: 1023:{
        -: 1024:  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
        -: 1025:}
        -: 1026:
        -: 1027:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1028:_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
        -: 1029:{
        -: 1030:  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
        -: 1031:}
        -: 1032:
        -: 1033:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1034:_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
        -: 1035:{
        -: 1036:  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
        -: 1037:}
        -: 1038:
        -: 1039:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1040:_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
        -: 1041:{
        -: 1042:  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
        -: 1043:}
        -: 1044:
        -: 1045:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1046:_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
        -: 1047:{
        -: 1048:  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
        -: 1049:}
        -: 1050:
        -: 1051:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1052:_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
        -: 1053:{
        -: 1054:  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
        -: 1055:}
        -: 1056:
        -: 1057:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1058:_mm_add_epi8 (__m128i __A, __m128i __B)
        -: 1059:{
        -: 1060:  return (__m128i) ((__v16qu)__A + (__v16qu)__B);
        -: 1061:}
        -: 1062:
        -: 1063:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1064:_mm_add_epi16 (__m128i __A, __m128i __B)
        -: 1065:{
        -: 1066:  return (__m128i) ((__v8hu)__A + (__v8hu)__B);
        -: 1067:}
        -: 1068:
        -: 1069:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1070:_mm_add_epi32 (__m128i __A, __m128i __B)
        -: 1071:{
        -: 1072:  return (__m128i) ((__v4su)__A + (__v4su)__B);
        -: 1073:}
        -: 1074:
        -: 1075:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1076:_mm_add_epi64 (__m128i __A, __m128i __B)
        -: 1077:{
        -: 1078:  return (__m128i) ((__v2du)__A + (__v2du)__B);
        -: 1079:}
        -: 1080:
        -: 1081:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1082:_mm_adds_epi8 (__m128i __A, __m128i __B)
        -: 1083:{
        -: 1084:  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
        -: 1085:}
        -: 1086:
        -: 1087:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1088:_mm_adds_epi16 (__m128i __A, __m128i __B)
        -: 1089:{
        -: 1090:  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1091:}
        -: 1092:
        -: 1093:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1094:_mm_adds_epu8 (__m128i __A, __m128i __B)
        -: 1095:{
        -: 1096:  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
        -: 1097:}
        -: 1098:
        -: 1099:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1100:_mm_adds_epu16 (__m128i __A, __m128i __B)
        -: 1101:{
        -: 1102:  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1103:}
        -: 1104:
        -: 1105:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1106:_mm_sub_epi8 (__m128i __A, __m128i __B)
        -: 1107:{
        -: 1108:  return (__m128i) ((__v16qu)__A - (__v16qu)__B);
        -: 1109:}
        -: 1110:
        -: 1111:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1112:_mm_sub_epi16 (__m128i __A, __m128i __B)
        -: 1113:{
        -: 1114:  return (__m128i) ((__v8hu)__A - (__v8hu)__B);
        -: 1115:}
        -: 1116:
        -: 1117:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1118:_mm_sub_epi32 (__m128i __A, __m128i __B)
        -: 1119:{
        -: 1120:  return (__m128i) ((__v4su)__A - (__v4su)__B);
        -: 1121:}
        -: 1122:
        -: 1123:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1124:_mm_sub_epi64 (__m128i __A, __m128i __B)
        -: 1125:{
        -: 1126:  return (__m128i) ((__v2du)__A - (__v2du)__B);
        -: 1127:}
        -: 1128:
        -: 1129:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1130:_mm_subs_epi8 (__m128i __A, __m128i __B)
        -: 1131:{
        -: 1132:  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
        -: 1133:}
        -: 1134:
        -: 1135:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1136:_mm_subs_epi16 (__m128i __A, __m128i __B)
        -: 1137:{
        -: 1138:  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1139:}
        -: 1140:
        -: 1141:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1142:_mm_subs_epu8 (__m128i __A, __m128i __B)
        -: 1143:{
        -: 1144:  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
        -: 1145:}
        -: 1146:
        -: 1147:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1148:_mm_subs_epu16 (__m128i __A, __m128i __B)
        -: 1149:{
        -: 1150:  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1151:}
        -: 1152:
        -: 1153:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1154:_mm_madd_epi16 (__m128i __A, __m128i __B)
        -: 1155:{
        -: 1156:  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
        -: 1157:}
        -: 1158:
        -: 1159:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1160:_mm_mulhi_epi16 (__m128i __A, __m128i __B)
        -: 1161:{
        -: 1162:  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1163:}
        -: 1164:
        -: 1165:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1166:_mm_mullo_epi16 (__m128i __A, __m128i __B)
        -: 1167:{
        -: 1168:  return (__m128i) ((__v8hu)__A * (__v8hu)__B);
        -: 1169:}
        -: 1170:
        -: 1171:extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1172:_mm_mul_su32 (__m64 __A, __m64 __B)
        -: 1173:{
        -: 1174:  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
        -: 1175:}
        -: 1176:
        -: 1177:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1178:_mm_mul_epu32 (__m128i __A, __m128i __B)
        -: 1179:{
        -: 1180:  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
        -: 1181:}
        -: 1182:
        -: 1183:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1184:_mm_slli_epi16 (__m128i __A, int __B)
        -: 1185:{
        -: 1186:  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
        -: 1187:}
        -: 1188:
        -: 1189:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1190:_mm_slli_epi32 (__m128i __A, int __B)
        -: 1191:{
        -: 1192:  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
        -: 1193:}
        -: 1194:
        -: 1195:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1196:_mm_slli_epi64 (__m128i __A, int __B)
        -: 1197:{
        -: 1198:  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
        -: 1199:}
        -: 1200:
        -: 1201:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1202:_mm_srai_epi16 (__m128i __A, int __B)
        -: 1203:{
        -: 1204:  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
        -: 1205:}
        -: 1206:
        -: 1207:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1208:_mm_srai_epi32 (__m128i __A, int __B)
        -: 1209:{
        -: 1210:  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
        -: 1211:}
        -: 1212:
        -: 1213:#ifdef __OPTIMIZE__
        -: 1214:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1215:_mm_bsrli_si128 (__m128i __A, const int __N)
        -: 1216:{
        -: 1217:  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
        -: 1218:}
        -: 1219:
        -: 1220:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1221:_mm_bslli_si128 (__m128i __A, const int __N)
        -: 1222:{
        -: 1223:  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
        -: 1224:}
        -: 1225:
        -: 1226:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1227:_mm_srli_si128 (__m128i __A, const int __N)
        -: 1228:{
        -: 1229:  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
        -: 1230:}
        -: 1231:
        -: 1232:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1233:_mm_slli_si128 (__m128i __A, const int __N)
        -: 1234:{
        -: 1235:  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
        -: 1236:}
        -: 1237:#else
        -: 1238:#define _mm_bsrli_si128(A, N) \
        -: 1239:  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
        -: 1240:#define _mm_bslli_si128(A, N) \
        -: 1241:  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
        -: 1242:#define _mm_srli_si128(A, N) \
        -: 1243:  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
        -: 1244:#define _mm_slli_si128(A, N) \
        -: 1245:  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
        -: 1246:#endif
        -: 1247:
        -: 1248:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1249:_mm_srli_epi16 (__m128i __A, int __B)
        -: 1250:{
        -: 1251:  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
        -: 1252:}
        -: 1253:
        -: 1254:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1255:_mm_srli_epi32 (__m128i __A, int __B)
        -: 1256:{
        -: 1257:  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
        -: 1258:}
        -: 1259:
        -: 1260:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1261:_mm_srli_epi64 (__m128i __A, int __B)
        -: 1262:{
        -: 1263:  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
        -: 1264:}
        -: 1265:
        -: 1266:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1267:_mm_sll_epi16 (__m128i __A, __m128i __B)
        -: 1268:{
        -: 1269:  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
        -: 1270:}
        -: 1271:
        -: 1272:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1273:_mm_sll_epi32 (__m128i __A, __m128i __B)
        -: 1274:{
        -: 1275:  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
        -: 1276:}
        -: 1277:
        -: 1278:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1279:_mm_sll_epi64 (__m128i __A, __m128i __B)
        -: 1280:{
        -: 1281:  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
        -: 1282:}
        -: 1283:
        -: 1284:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1285:_mm_sra_epi16 (__m128i __A, __m128i __B)
        -: 1286:{
        -: 1287:  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1288:}
        -: 1289:
        -: 1290:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1291:_mm_sra_epi32 (__m128i __A, __m128i __B)
        -: 1292:{
        -: 1293:  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
        -: 1294:}
        -: 1295:
        -: 1296:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1297:_mm_srl_epi16 (__m128i __A, __m128i __B)
        -: 1298:{
        -: 1299:  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1300:}
        -: 1301:
        -: 1302:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1303:_mm_srl_epi32 (__m128i __A, __m128i __B)
        -: 1304:{
        -: 1305:  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
        -: 1306:}
        -: 1307:
        -: 1308:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1309:_mm_srl_epi64 (__m128i __A, __m128i __B)
        -: 1310:{
        -: 1311:  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
        -: 1312:}
        -: 1313:
        -: 1314:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1315:_mm_and_si128 (__m128i __A, __m128i __B)
        -: 1316:{
        -: 1317:  return (__m128i) ((__v2du)__A & (__v2du)__B);
        -: 1318:}
        -: 1319:
        -: 1320:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1321:_mm_andnot_si128 (__m128i __A, __m128i __B)
        -: 1322:{
        -: 1323:  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
        -: 1324:}
        -: 1325:
        -: 1326:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1327:_mm_or_si128 (__m128i __A, __m128i __B)
        -: 1328:{
        -: 1329:  return (__m128i) ((__v2du)__A | (__v2du)__B);
        -: 1330:}
        -: 1331:
        -: 1332:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1333:_mm_xor_si128 (__m128i __A, __m128i __B)
        -: 1334:{
        -: 1335:  return (__m128i) ((__v2du)__A ^ (__v2du)__B);
        -: 1336:}
        -: 1337:
        -: 1338:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1339:_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
        -: 1340:{
        -: 1341:  return (__m128i) ((__v16qi)__A == (__v16qi)__B);
        -: 1342:}
        -: 1343:
        -: 1344:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1345:_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
        -: 1346:{
        -: 1347:  return (__m128i) ((__v8hi)__A == (__v8hi)__B);
        -: 1348:}
        -: 1349:
        -: 1350:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1351:_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
        -: 1352:{
        -: 1353:  return (__m128i) ((__v4si)__A == (__v4si)__B);
        -: 1354:}
        -: 1355:
        -: 1356:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1357:_mm_cmplt_epi8 (__m128i __A, __m128i __B)
        -: 1358:{
        -: 1359:  return (__m128i) ((__v16qs)__A < (__v16qs)__B);
        -: 1360:}
        -: 1361:
        -: 1362:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1363:_mm_cmplt_epi16 (__m128i __A, __m128i __B)
        -: 1364:{
        -: 1365:  return (__m128i) ((__v8hi)__A < (__v8hi)__B);
        -: 1366:}
        -: 1367:
        -: 1368:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1369:_mm_cmplt_epi32 (__m128i __A, __m128i __B)
        -: 1370:{
        -: 1371:  return (__m128i) ((__v4si)__A < (__v4si)__B);
        -: 1372:}
        -: 1373:
        -: 1374:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1375:_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
        -: 1376:{
        -: 1377:  return (__m128i) ((__v16qs)__A > (__v16qs)__B);
        -: 1378:}
        -: 1379:
        -: 1380:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1381:_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
        -: 1382:{
        -: 1383:  return (__m128i) ((__v8hi)__A > (__v8hi)__B);
        -: 1384:}
        -: 1385:
        -: 1386:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1387:_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
        -: 1388:{
        -: 1389:  return (__m128i) ((__v4si)__A > (__v4si)__B);
        -: 1390:}
        -: 1391:
        -: 1392:#ifdef __OPTIMIZE__
        -: 1393:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1394:_mm_extract_epi16 (__m128i const __A, int const __N)
        -: 1395:{
        -: 1396:  return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
        -: 1397:}
        -: 1398:
        -: 1399:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1400:_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
        -: 1401:{
        -: 1402:  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
        -: 1403:}
        -: 1404:#else
        -: 1405:#define _mm_extract_epi16(A, N) \
        -: 1406:  ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
        -: 1407:#define _mm_insert_epi16(A, D, N)				\
        -: 1408:  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
        -: 1409:					  (int)(D), (int)(N)))
        -: 1410:#endif
        -: 1411:
        -: 1412:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1413:_mm_max_epi16 (__m128i __A, __m128i __B)
        -: 1414:{
        -: 1415:  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1416:}
        -: 1417:
        -: 1418:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1419:_mm_max_epu8 (__m128i __A, __m128i __B)
        -: 1420:{
        -: 1421:  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
        -: 1422:}
        -: 1423:
        -: 1424:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1425:_mm_min_epi16 (__m128i __A, __m128i __B)
        -: 1426:{
        -: 1427:  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1428:}
        -: 1429:
        -: 1430:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1431:_mm_min_epu8 (__m128i __A, __m128i __B)
        -: 1432:{
        -: 1433:  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
        -: 1434:}
        -: 1435:
        -: 1436:extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1437:_mm_movemask_epi8 (__m128i __A)
        -: 1438:{
        -: 1439:  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
        -: 1440:}
        -: 1441:
        -: 1442:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1443:_mm_mulhi_epu16 (__m128i __A, __m128i __B)
        -: 1444:{
        -: 1445:  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1446:}
        -: 1447:
        -: 1448:#ifdef __OPTIMIZE__
        -: 1449:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1450:_mm_shufflehi_epi16 (__m128i __A, const int __mask)
        -: 1451:{
        -: 1452:  return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
        -: 1453:}
        -: 1454:
        -: 1455:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1456:_mm_shufflelo_epi16 (__m128i __A, const int __mask)
        -: 1457:{
        -: 1458:  return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
        -: 1459:}
        -: 1460:
        -: 1461:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1462:_mm_shuffle_epi32 (__m128i __A, const int __mask)
        -: 1463:{
        -: 1464:  return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
        -: 1465:}
        -: 1466:#else
        -: 1467:#define _mm_shufflehi_epi16(A, N) \
        -: 1468:  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
        -: 1469:#define _mm_shufflelo_epi16(A, N) \
        -: 1470:  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
        -: 1471:#define _mm_shuffle_epi32(A, N) \
        -: 1472:  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
        -: 1473:#endif
        -: 1474:
        -: 1475:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1476:_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
        -: 1477:{
        -: 1478:  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
        -: 1479:}
        -: 1480:
        -: 1481:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1482:_mm_avg_epu8 (__m128i __A, __m128i __B)
        -: 1483:{
        -: 1484:  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
        -: 1485:}
        -: 1486:
        -: 1487:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1488:_mm_avg_epu16 (__m128i __A, __m128i __B)
        -: 1489:{
        -: 1490:  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
        -: 1491:}
        -: 1492:
        -: 1493:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1494:_mm_sad_epu8 (__m128i __A, __m128i __B)
        -: 1495:{
        -: 1496:  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
        -: 1497:}
        -: 1498:
        -: 1499:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1500:_mm_stream_si32 (int *__A, int __B)
        -: 1501:{
        -: 1502:  __builtin_ia32_movnti (__A, __B);
        -: 1503:}
        -: 1504:
        -: 1505:#ifdef __x86_64__
        -: 1506:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1507:_mm_stream_si64 (long long int *__A, long long int __B)
        -: 1508:{
        -: 1509:  __builtin_ia32_movnti64 (__A, __B);
        -: 1510:}
        -: 1511:#endif
        -: 1512:
        -: 1513:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1514:_mm_stream_si128 (__m128i *__A, __m128i __B)
        -: 1515:{
        -: 1516:  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
        -: 1517:}
        -: 1518:
        -: 1519:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1520:_mm_stream_pd (double *__A, __m128d __B)
        -: 1521:{
        -: 1522:  __builtin_ia32_movntpd (__A, (__v2df)__B);
        -: 1523:}
        -: 1524:
        -: 1525:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1526:_mm_clflush (void const *__A)
        -: 1527:{
        -: 1528:  __builtin_ia32_clflush (__A);
        -: 1529:}
        -: 1530:
        -: 1531:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1532:_mm_lfence (void)
        -: 1533:{
        -: 1534:  __builtin_ia32_lfence ();
        -: 1535:}
        -: 1536:
        -: 1537:extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1538:_mm_mfence (void)
        -: 1539:{
        -: 1540:  __builtin_ia32_mfence ();
        -: 1541:}
        -: 1542:
        -: 1543:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1544:_mm_cvtsi32_si128 (int __A)
        -: 1545:{
        -: 1546:  return _mm_set_epi32 (0, 0, 0, __A);
        -: 1547:}
        -: 1548:
        -: 1549:#ifdef __x86_64__
        -: 1550:/* Intel intrinsic.  */
        -: 1551:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1552:_mm_cvtsi64_si128 (long long __A)
        -: 1553:{
        -: 1554:  return _mm_set_epi64x (0, __A);
        -: 1555:}
        -: 1556:
        -: 1557:/* Microsoft intrinsic.  */
        -: 1558:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1559:_mm_cvtsi64x_si128 (long long __A)
        -: 1560:{
        -: 1561:  return _mm_set_epi64x (0, __A);
        -: 1562:}
        -: 1563:#endif
        -: 1564:
        -: 1565:/* Casts between various SP, DP, INT vector types.  Note that these do no
        -: 1566:   conversion of values, they just change the type.  */
        -: 1567:extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1568:_mm_castpd_ps(__m128d __A)
        -: 1569:{
        -: 1570:  return (__m128) __A;
        -: 1571:}
        -: 1572:
        -: 1573:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1574:_mm_castpd_si128(__m128d __A)
        -: 1575:{
        -: 1576:  return (__m128i) __A;
        -: 1577:}
        -: 1578:
        -: 1579:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1580:_mm_castps_pd(__m128 __A)
        -: 1581:{
        -: 1582:  return (__m128d) __A;
        -: 1583:}
        -: 1584:
        -: 1585:extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1586:_mm_castps_si128(__m128 __A)
        -: 1587:{
        -: 1588:  return (__m128i) __A;
        -: 1589:}
        -: 1590:
        -: 1591:extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1592:_mm_castsi128_ps(__m128i __A)
        -: 1593:{
        -: 1594:  return (__m128) __A;
        -: 1595:}
        -: 1596:
        -: 1597:extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
        -: 1598:_mm_castsi128_pd(__m128i __A)
        -: 1599:{
        -: 1600:  return (__m128d) __A;
        -: 1601:}
        -: 1602:
        -: 1603:#ifdef __DISABLE_SSE2__
        -: 1604:#undef __DISABLE_SSE2__
        -: 1605:#pragma GCC pop_options
        -: 1606:#endif /* __DISABLE_SSE2__ */
        -: 1607:
        -: 1608:#endif /* _EMMINTRIN_H_INCLUDED */
