594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1741 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1742 const char *NameHint,
unsigned Modifier,
const CallExpr *E,
1744 llvm::Triple::ArchType
Arch) {
1747 std::optional<llvm::APSInt> NeonTypeConst =
1754 const bool Usgn =
Type.isUnsigned();
1755 const bool Quad =
Type.isQuad();
1756 const bool Floating =
Type.isFloatingPoint();
1758 const bool AllowBFloatArgsAndRet =
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1767 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1768 return Builder.getInt32(addr.getAlignment().getQuantity());
1771 unsigned Int = LLVMIntrinsic;
1773 Int = AltLLVMIntrinsic;
1775 switch (BuiltinID) {
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(2);
1787 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1800 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops,
"vabs");
1801 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
"vabs");
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1805 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1806 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1807 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1808 return Builder.CreateBitCast(Ops[0], Ty);
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1815 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1816 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1817 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1820 Constant *ShiftAmt =
1821 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1825 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(Ops[0], Ops[1]);
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable(
"unexpected type");
1850 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1852 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1858 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1862 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1867 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1872 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1877 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1890 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1891 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1899 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1900 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1931 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1947 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1948 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1999 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2003 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2010 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(i+CV);
2013 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2014 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2015 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2020 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2021 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2025 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2031 Ops.push_back(getAlignmentValue32(PtrOp0));
2032 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vld1");
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2041 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2042 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
2043 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2058 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2061 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *
V = PoisonValue::get(Ty);
2067 LoadInst *Ld =
Builder.CreateLoad(PtrOp0);
2068 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
2069 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2079 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2080 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
2082 Ops.push_back(getAlignmentValue32(PtrOp1));
2084 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
2091 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
2092 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
2098 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
2100 case NEON::BI__builtin_neon_vmull_v:
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2116 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
2126 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2136 return EmitNeonCall(
CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2146 RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int =
Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2195 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2202 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
2204 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
2206 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2214 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
2216 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
2217 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2237 Ops.push_back(getAlignmentValue32(PtrOp0));
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2264 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2267 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2278 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
2280 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
2283 Constant *ShiftAmt =
2284 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
2288 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2293 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2294 Value *SV =
nullptr;
2296 for (
unsigned vi = 0; vi != 2; ++vi) {
2298 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(i+vi);
2300 Indices.push_back(i+e+vi);
2303 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2311 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2312 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
2313 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2314 ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2321 Value *SV =
nullptr;
2323 for (
unsigned vi = 0; vi != 2; ++vi) {
2325 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(2*i+vi);
2329 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2342 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2343 Value *SV =
nullptr;
2345 for (
unsigned vi = 0; vi != 2; ++vi) {
2347 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back((i + vi*e) >> 1);
2349 Indices.push_back(((i + vi*e) >> 1)+e);
2352 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2362 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2369 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2376 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2383 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2390 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2397 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2399 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vmmla");
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2403 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2410 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2429 assert(Int &&
"Expected valid intrinsic number");
4976 llvm::Triple::ArchType
Arch) {
4985 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4986 return EmitAArch64CpuSupports(E);
4988 unsigned HintID =
static_cast<unsigned>(-1);
4989 switch (BuiltinID) {
4991 case clang::AArch64::BI__builtin_arm_nop:
4994 case clang::AArch64::BI__builtin_arm_yield:
4995 case clang::AArch64::BI__yield:
4998 case clang::AArch64::BI__builtin_arm_wfe:
4999 case clang::AArch64::BI__wfe:
5002 case clang::AArch64::BI__builtin_arm_wfi:
5003 case clang::AArch64::BI__wfi:
5006 case clang::AArch64::BI__builtin_arm_sev:
5007 case clang::AArch64::BI__sev:
5010 case clang::AArch64::BI__builtin_arm_sevl:
5011 case clang::AArch64::BI__sevl:
5016 if (HintID !=
static_cast<unsigned>(-1)) {
5017 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hint);
5018 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
5021 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5022 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
5027 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5030 llvm::FunctionType::get(StructType::get(
CGM.Int64Ty,
CGM.Int64Ty), {},
5032 "__arm_sme_state"));
5034 "aarch64_pstate_sm_compatible");
5035 CI->setAttributes(Attrs);
5038 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5045 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5047 "rbit of unusual size!");
5050 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5052 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5054 "rbit of unusual size!");
5057 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5060 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5061 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5063 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5065 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5070 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5072 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5075 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5077 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5081 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5082 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5084 llvm::Type *Ty = Arg->getType();
5085 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5089 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5090 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5092 llvm::Type *Ty = Arg->getType();
5093 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5097 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5098 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5100 llvm::Type *Ty = Arg->getType();
5101 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5105 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5106 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5108 llvm::Type *Ty = Arg->getType();
5109 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5113 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5115 "__jcvt of unusual size!");
5118 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5121 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5122 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5123 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5124 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5128 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5131 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5132 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
5134 for (
size_t i = 0; i < 8; i++) {
5135 llvm::Value *ValOffsetPtr =
5146 Args.push_back(MemAddr);
5147 for (
size_t i = 0; i < 8; i++) {
5148 llvm::Value *ValOffsetPtr =
5155 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5156 ? Intrinsic::aarch64_st64b
5157 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5158 ? Intrinsic::aarch64_st64bv
5159 : Intrinsic::aarch64_st64bv0);
5161 return Builder.CreateCall(F, Args);
5165 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5166 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5168 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5169 ? Intrinsic::aarch64_rndr
5170 : Intrinsic::aarch64_rndrrs);
5172 llvm::Value *Val =
Builder.CreateCall(F);
5173 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
5177 Builder.CreateStore(RandomValue, MemAddress);
5182 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5183 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
5186 for (
unsigned i = 0; i < 2; i++)
5188 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
5190 StringRef Name = FD->
getName();
5194 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5195 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5198 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5199 ? Intrinsic::aarch64_ldaxp
5200 : Intrinsic::aarch64_ldxp);
5207 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5208 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
5209 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
5211 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5212 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
5213 Val =
Builder.CreateOr(Val, Val1);
5215 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5216 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5225 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5226 ? Intrinsic::aarch64_ldaxr
5227 : Intrinsic::aarch64_ldxr,
5229 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
5233 if (RealResTy->isPointerTy())
5234 return Builder.CreateIntToPtr(Val, RealResTy);
5236 llvm::Type *IntResTy = llvm::IntegerType::get(
5238 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
5242 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5243 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5246 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5247 ? Intrinsic::aarch64_stlxp
5248 : Intrinsic::aarch64_stxp);
5255 llvm::Value *Val =
Builder.CreateLoad(Tmp);
5260 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
5263 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5264 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5269 llvm::Type *StoreTy =
5272 if (StoreVal->
getType()->isPointerTy())
5275 llvm::Type *
IntTy = llvm::IntegerType::get(
5277 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
5283 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5284 ? Intrinsic::aarch64_stlxr
5285 : Intrinsic::aarch64_stxr,
5287 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
5289 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
5293 if (BuiltinID == clang::AArch64::BI__getReg) {
5296 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5299 LLVMContext &Context =
CGM.getLLVMContext();
5302 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5303 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5304 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5307 CGM.getIntrinsic(Intrinsic::read_register, {
Int64Ty});
5308 return Builder.CreateCall(F, Metadata);
5311 if (BuiltinID == clang::AArch64::BI__break) {
5314 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5316 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
5320 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5321 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5325 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5326 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5327 llvm::SyncScope::SingleThread);
5330 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5331 switch (BuiltinID) {
5332 case clang::AArch64::BI__builtin_arm_crc32b:
5333 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
5334 case clang::AArch64::BI__builtin_arm_crc32cb:
5335 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
5336 case clang::AArch64::BI__builtin_arm_crc32h:
5337 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
5338 case clang::AArch64::BI__builtin_arm_crc32ch:
5339 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
5340 case clang::AArch64::BI__builtin_arm_crc32w:
5341 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
5342 case clang::AArch64::BI__builtin_arm_crc32cw:
5343 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
5344 case clang::AArch64::BI__builtin_arm_crc32d:
5345 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
5346 case clang::AArch64::BI__builtin_arm_crc32cd:
5347 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
5350 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5355 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5356 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
5358 return Builder.CreateCall(F, {Arg0, Arg1});
5362 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5369 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5373 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5374 switch (BuiltinID) {
5375 case clang::AArch64::BI__builtin_arm_irg:
5376 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
5377 case clang::AArch64::BI__builtin_arm_addg:
5378 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
5379 case clang::AArch64::BI__builtin_arm_gmi:
5380 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
5381 case clang::AArch64::BI__builtin_arm_ldg:
5382 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
5383 case clang::AArch64::BI__builtin_arm_stg:
5384 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
5385 case clang::AArch64::BI__builtin_arm_subp:
5386 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
5389 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5390 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5395 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5398 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5403 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5404 {Pointer, TagOffset});
5406 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5412 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5417 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5419 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5420 {TagAddress, TagAddress});
5425 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5427 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5428 {TagAddress, TagAddress});
5430 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5434 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5438 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5439 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5440 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5441 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5442 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5443 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5444 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5445 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5448 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5449 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5450 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5451 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5454 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5455 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5457 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5458 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5460 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5461 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5463 llvm::Type *ValueType;
5467 }
else if (Is128Bit) {
5468 llvm::Type *Int128Ty =
5469 llvm::IntegerType::getInt128Ty(
CGM.getLLVMContext());
5470 ValueType = Int128Ty;
5472 }
else if (IsPointerBuiltin) {
5482 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5483 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5484 BuiltinID == clang::AArch64::BI__sys) {
5485 LLVMContext &Context =
CGM.getLLVMContext();
5490 std::string SysRegStr;
5491 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5492 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5493 ? ((1 << 1) | ((SysReg >> 14) & 1))
5495 llvm::raw_string_ostream(SysRegStr)
5496 << SysRegOp0 <<
":" << ((SysReg >> 11) & 7) <<
":"
5497 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5500 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5501 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5502 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5507 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5508 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::read_register, Types);
5510 return Builder.CreateCall(F, Metadata);
5513 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::write_register, Types);
5515 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5516 if (BuiltinID == clang::AArch64::BI__sys) {
5524 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5530 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5535 if (BuiltinID == clang::AArch64::BI__mulh ||
5536 BuiltinID == clang::AArch64::BI__umulh) {
5538 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5540 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5546 Value *MulResult, *HigherBits;
5548 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5549 HigherBits =
Builder.CreateAShr(MulResult, 64);
5551 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5552 HigherBits =
Builder.CreateLShr(MulResult, 64);
5554 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5559 if (BuiltinID == AArch64::BI__writex18byte ||
5560 BuiltinID == AArch64::BI__writex18word ||
5561 BuiltinID == AArch64::BI__writex18dword ||
5562 BuiltinID == AArch64::BI__writex18qword) {
5578 if (BuiltinID == AArch64::BI__readx18byte ||
5579 BuiltinID == AArch64::BI__readx18word ||
5580 BuiltinID == AArch64::BI__readx18dword ||
5581 BuiltinID == AArch64::BI__readx18qword) {
5596 if (BuiltinID == AArch64::BI__addx18byte ||
5597 BuiltinID == AArch64::BI__addx18word ||
5598 BuiltinID == AArch64::BI__addx18dword ||
5599 BuiltinID == AArch64::BI__addx18qword ||
5600 BuiltinID == AArch64::BI__incx18byte ||
5601 BuiltinID == AArch64::BI__incx18word ||
5602 BuiltinID == AArch64::BI__incx18dword ||
5603 BuiltinID == AArch64::BI__incx18qword) {
5606 switch (BuiltinID) {
5607 case AArch64::BI__incx18byte:
5611 case AArch64::BI__incx18word:
5615 case AArch64::BI__incx18dword:
5619 case AArch64::BI__incx18qword:
5625 isIncrement =
false;
5650 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5651 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5652 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5653 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5656 return Builder.CreateBitCast(Arg, RetTy);
5659 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5660 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5661 BuiltinID == AArch64::BI_CountLeadingZeros ||
5662 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5666 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5667 BuiltinID == AArch64::BI_CountLeadingOnes64)
5668 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(
ArgType));
5673 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5674 BuiltinID == AArch64::BI_CountLeadingZeros64)
5679 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5680 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5683 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5684 ?
CGM.getIntrinsic(Intrinsic::aarch64_cls)
5685 :
CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5688 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5693 if (BuiltinID == AArch64::BI_CountOneBits ||
5694 BuiltinID == AArch64::BI_CountOneBits64) {
5700 if (BuiltinID == AArch64::BI_CountOneBits64)
5705 if (BuiltinID == AArch64::BI__prefetch) {
5714 if (BuiltinID == AArch64::BI__hlt) {
5715 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5720 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5723 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5731 if (std::optional<MSVCIntrin> MsvcIntId =
5737 return P.first == BuiltinID;
5740 BuiltinID = It->second;
5744 unsigned ICEArguments = 0;
5751 for (
unsigned i = 0, e = E->
getNumArgs() - 1; i != e; i++) {
5753 switch (BuiltinID) {
5754 case NEON::BI__builtin_neon_vld1_v:
5755 case NEON::BI__builtin_neon_vld1q_v:
5756 case NEON::BI__builtin_neon_vld1_dup_v:
5757 case NEON::BI__builtin_neon_vld1q_dup_v:
5758 case NEON::BI__builtin_neon_vld1_lane_v:
5759 case NEON::BI__builtin_neon_vld1q_lane_v:
5760 case NEON::BI__builtin_neon_vst1_v:
5761 case NEON::BI__builtin_neon_vst1q_v:
5762 case NEON::BI__builtin_neon_vst1_lane_v:
5763 case NEON::BI__builtin_neon_vst1q_lane_v:
5764 case NEON::BI__builtin_neon_vldap1_lane_s64:
5765 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5766 case NEON::BI__builtin_neon_vstl1_lane_s64:
5767 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5785 assert(
Result &&
"SISD intrinsic should have been handled");
5791 if (std::optional<llvm::APSInt>
Result =
5796 bool usgn =
Type.isUnsigned();
5797 bool quad =
Type.isQuad();
5800 switch (BuiltinID) {
5802 case NEON::BI__builtin_neon_vabsh_f16:
5805 case NEON::BI__builtin_neon_vaddq_p128: {
5808 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5809 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5810 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5811 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5812 return Builder.CreateBitCast(Ops[0], Int128Ty);
5814 case NEON::BI__builtin_neon_vldrq_p128: {
5815 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5817 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5820 case NEON::BI__builtin_neon_vstrq_p128: {
5821 Value *Ptr = Ops[0];
5824 case NEON::BI__builtin_neon_vcvts_f32_u32:
5825 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5828 case NEON::BI__builtin_neon_vcvts_f32_s32:
5829 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5831 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5834 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5836 return Builder.CreateUIToFP(Ops[0], FTy);
5837 return Builder.CreateSIToFP(Ops[0], FTy);
5839 case NEON::BI__builtin_neon_vcvth_f16_u16:
5840 case NEON::BI__builtin_neon_vcvth_f16_u32:
5841 case NEON::BI__builtin_neon_vcvth_f16_u64:
5844 case NEON::BI__builtin_neon_vcvth_f16_s16:
5845 case NEON::BI__builtin_neon_vcvth_f16_s32:
5846 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5848 llvm::Type *FTy =
HalfTy;
5850 if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 64)
5852 else if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 32)
5856 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5858 return Builder.CreateUIToFP(Ops[0], FTy);
5859 return Builder.CreateSIToFP(Ops[0], FTy);
5861 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5862 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5863 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5864 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5865 case NEON::BI__builtin_neon_vcvth_u16_f16:
5866 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5867 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5868 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5869 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5870 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5873 llvm::Type* FTy =
HalfTy;
5874 llvm::Type *Tys[2] = {InTy, FTy};
5876 switch (BuiltinID) {
5877 default: llvm_unreachable(
"missing builtin ID in switch!");
5878 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5879 Int = Intrinsic::aarch64_neon_fcvtau;
break;
5880 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5881 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
5882 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5883 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
5884 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5885 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
5886 case NEON::BI__builtin_neon_vcvth_u16_f16:
5887 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
5888 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5889 Int = Intrinsic::aarch64_neon_fcvtas;
break;
5890 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5891 Int = Intrinsic::aarch64_neon_fcvtms;
break;
5892 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5893 Int = Intrinsic::aarch64_neon_fcvtns;
break;
5894 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5895 Int = Intrinsic::aarch64_neon_fcvtps;
break;
5896 case NEON::BI__builtin_neon_vcvth_s16_f16:
5897 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
5901 case NEON::BI__builtin_neon_vcaleh_f16:
5902 case NEON::BI__builtin_neon_vcalth_f16:
5903 case NEON::BI__builtin_neon_vcageh_f16:
5904 case NEON::BI__builtin_neon_vcagth_f16: {
5907 llvm::Type* FTy =
HalfTy;
5908 llvm::Type *Tys[2] = {InTy, FTy};
5910 switch (BuiltinID) {
5911 default: llvm_unreachable(
"missing builtin ID in switch!");
5912 case NEON::BI__builtin_neon_vcageh_f16:
5913 Int = Intrinsic::aarch64_neon_facge;
break;
5914 case NEON::BI__builtin_neon_vcagth_f16:
5915 Int = Intrinsic::aarch64_neon_facgt;
break;
5916 case NEON::BI__builtin_neon_vcaleh_f16:
5917 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
5918 case NEON::BI__builtin_neon_vcalth_f16:
5919 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
5924 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5925 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5928 llvm::Type* FTy =
HalfTy;
5929 llvm::Type *Tys[2] = {InTy, FTy};
5931 switch (BuiltinID) {
5932 default: llvm_unreachable(
"missing builtin ID in switch!");
5933 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5934 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
5935 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5936 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
5941 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5942 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5944 llvm::Type* FTy =
HalfTy;
5946 llvm::Type *Tys[2] = {FTy, InTy};
5948 switch (BuiltinID) {
5949 default: llvm_unreachable(
"missing builtin ID in switch!");
5950 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5951 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5952 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
5954 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5955 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5956 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
5961 case NEON::BI__builtin_neon_vpaddd_s64: {
5962 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
5965 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2i64");
5966 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5967 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5968 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5969 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5971 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
5973 case NEON::BI__builtin_neon_vpaddd_f64: {
5974 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
5977 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f64");
5978 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5979 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5980 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5981 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5983 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5985 case NEON::BI__builtin_neon_vpadds_f32: {
5986 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
5989 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f32");
5990 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5991 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5992 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5993 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5995 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5997 case NEON::BI__builtin_neon_vceqzd_s64:
6001 ICmpInst::ICMP_EQ,
"vceqz");
6002 case NEON::BI__builtin_neon_vceqzd_f64:
6003 case NEON::BI__builtin_neon_vceqzs_f32:
6004 case NEON::BI__builtin_neon_vceqzh_f16:
6008 ICmpInst::FCMP_OEQ,
"vceqz");
6009 case NEON::BI__builtin_neon_vcgezd_s64:
6013 ICmpInst::ICMP_SGE,
"vcgez");
6014 case NEON::BI__builtin_neon_vcgezd_f64:
6015 case NEON::BI__builtin_neon_vcgezs_f32:
6016 case NEON::BI__builtin_neon_vcgezh_f16:
6020 ICmpInst::FCMP_OGE,
"vcgez");
6021 case NEON::BI__builtin_neon_vclezd_s64:
6025 ICmpInst::ICMP_SLE,
"vclez");
6026 case NEON::BI__builtin_neon_vclezd_f64:
6027 case NEON::BI__builtin_neon_vclezs_f32:
6028 case NEON::BI__builtin_neon_vclezh_f16:
6032 ICmpInst::FCMP_OLE,
"vclez");
6033 case NEON::BI__builtin_neon_vcgtzd_s64:
6037 ICmpInst::ICMP_SGT,
"vcgtz");
6038 case NEON::BI__builtin_neon_vcgtzd_f64:
6039 case NEON::BI__builtin_neon_vcgtzs_f32:
6040 case NEON::BI__builtin_neon_vcgtzh_f16:
6044 ICmpInst::FCMP_OGT,
"vcgtz");
6045 case NEON::BI__builtin_neon_vcltzd_s64:
6049 ICmpInst::ICMP_SLT,
"vcltz");
6051 case NEON::BI__builtin_neon_vcltzd_f64:
6052 case NEON::BI__builtin_neon_vcltzs_f32:
6053 case NEON::BI__builtin_neon_vcltzh_f16:
6057 ICmpInst::FCMP_OLT,
"vcltz");
6059 case NEON::BI__builtin_neon_vceqzd_u64: {
6063 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(
Int64Ty));
6066 case NEON::BI__builtin_neon_vceqd_f64:
6067 case NEON::BI__builtin_neon_vcled_f64:
6068 case NEON::BI__builtin_neon_vcltd_f64:
6069 case NEON::BI__builtin_neon_vcged_f64:
6070 case NEON::BI__builtin_neon_vcgtd_f64: {
6071 llvm::CmpInst::Predicate P;
6072 switch (BuiltinID) {
6073 default: llvm_unreachable(
"missing builtin ID in switch!");
6074 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ;
break;
6075 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE;
break;
6076 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT;
break;
6077 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE;
break;
6078 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT;
break;
6083 if (P == llvm::FCmpInst::FCMP_OEQ)
6084 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6086 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6089 case NEON::BI__builtin_neon_vceqs_f32:
6090 case NEON::BI__builtin_neon_vcles_f32:
6091 case NEON::BI__builtin_neon_vclts_f32:
6092 case NEON::BI__builtin_neon_vcges_f32:
6093 case NEON::BI__builtin_neon_vcgts_f32: {
6094 llvm::CmpInst::Predicate P;
6095 switch (BuiltinID) {
6096 default: llvm_unreachable(
"missing builtin ID in switch!");
6097 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ;
break;
6098 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE;
break;
6099 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT;
break;
6100 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE;
break;
6101 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT;
break;
6106 if (P == llvm::FCmpInst::FCMP_OEQ)
6107 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6109 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6112 case NEON::BI__builtin_neon_vceqh_f16:
6113 case NEON::BI__builtin_neon_vcleh_f16:
6114 case NEON::BI__builtin_neon_vclth_f16:
6115 case NEON::BI__builtin_neon_vcgeh_f16:
6116 case NEON::BI__builtin_neon_vcgth_f16: {
6117 llvm::CmpInst::Predicate P;
6118 switch (BuiltinID) {
6119 default: llvm_unreachable(
"missing builtin ID in switch!");
6120 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ;
break;
6121 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE;
break;
6122 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT;
break;
6123 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE;
break;
6124 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT;
break;
6129 if (P == llvm::FCmpInst::FCMP_OEQ)
6130 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6132 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6135 case NEON::BI__builtin_neon_vceqd_s64:
6136 case NEON::BI__builtin_neon_vceqd_u64:
6137 case NEON::BI__builtin_neon_vcgtd_s64:
6138 case NEON::BI__builtin_neon_vcgtd_u64:
6139 case NEON::BI__builtin_neon_vcltd_s64:
6140 case NEON::BI__builtin_neon_vcltd_u64:
6141 case NEON::BI__builtin_neon_vcged_u64:
6142 case NEON::BI__builtin_neon_vcged_s64:
6143 case NEON::BI__builtin_neon_vcled_u64:
6144 case NEON::BI__builtin_neon_vcled_s64: {
6145 llvm::CmpInst::Predicate P;
6146 switch (BuiltinID) {
6147 default: llvm_unreachable(
"missing builtin ID in switch!");
6148 case NEON::BI__builtin_neon_vceqd_s64:
6149 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;
break;
6150 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;
break;
6151 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;
break;
6152 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;
break;
6153 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;
break;
6154 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;
break;
6155 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;
break;
6156 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;
break;
6157 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;
break;
6162 Ops[0] =
Builder.CreateICmp(P, Ops[0], Ops[1]);
6165 case NEON::BI__builtin_neon_vtstd_s64:
6166 case NEON::BI__builtin_neon_vtstd_u64: {
6170 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
6171 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6172 llvm::Constant::getNullValue(
Int64Ty));
6175 case NEON::BI__builtin_neon_vset_lane_i8:
6176 case NEON::BI__builtin_neon_vset_lane_i16:
6177 case NEON::BI__builtin_neon_vset_lane_i32:
6178 case NEON::BI__builtin_neon_vset_lane_i64:
6179 case NEON::BI__builtin_neon_vset_lane_bf16:
6180 case NEON::BI__builtin_neon_vset_lane_f32:
6181 case NEON::BI__builtin_neon_vsetq_lane_i8:
6182 case NEON::BI__builtin_neon_vsetq_lane_i16:
6183 case NEON::BI__builtin_neon_vsetq_lane_i32:
6184 case NEON::BI__builtin_neon_vsetq_lane_i64:
6185 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6186 case NEON::BI__builtin_neon_vsetq_lane_f32:
6188 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6189 case NEON::BI__builtin_neon_vset_lane_f64:
6192 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
6194 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6195 case NEON::BI__builtin_neon_vset_lane_mf8:
6196 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6201 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6202 case NEON::BI__builtin_neon_vsetq_lane_f64:
6205 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
6207 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6209 case NEON::BI__builtin_neon_vget_lane_i8:
6210 case NEON::BI__builtin_neon_vdupb_lane_i8:
6212 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
6215 case NEON::BI__builtin_neon_vgetq_lane_i8:
6216 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6218 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
6221 case NEON::BI__builtin_neon_vget_lane_mf8:
6222 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6223 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6224 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6227 case NEON::BI__builtin_neon_vget_lane_i16:
6228 case NEON::BI__builtin_neon_vduph_lane_i16:
6230 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
6233 case NEON::BI__builtin_neon_vgetq_lane_i16:
6234 case NEON::BI__builtin_neon_vduph_laneq_i16:
6236 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
6239 case NEON::BI__builtin_neon_vget_lane_i32:
6240 case NEON::BI__builtin_neon_vdups_lane_i32:
6242 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
6245 case NEON::BI__builtin_neon_vdups_lane_f32:
6247 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6250 case NEON::BI__builtin_neon_vgetq_lane_i32:
6251 case NEON::BI__builtin_neon_vdups_laneq_i32:
6253 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
6256 case NEON::BI__builtin_neon_vget_lane_i64:
6257 case NEON::BI__builtin_neon_vdupd_lane_i64:
6259 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
6262 case NEON::BI__builtin_neon_vdupd_lane_f64:
6264 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6267 case NEON::BI__builtin_neon_vgetq_lane_i64:
6268 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6270 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
6273 case NEON::BI__builtin_neon_vget_lane_f32:
6275 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6278 case NEON::BI__builtin_neon_vget_lane_f64:
6280 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6283 case NEON::BI__builtin_neon_vgetq_lane_f32:
6284 case NEON::BI__builtin_neon_vdups_laneq_f32:
6286 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
6289 case NEON::BI__builtin_neon_vgetq_lane_f64:
6290 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6292 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
6295 case NEON::BI__builtin_neon_vaddh_f16:
6297 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
6298 case NEON::BI__builtin_neon_vsubh_f16:
6300 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
6301 case NEON::BI__builtin_neon_vmulh_f16:
6303 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
6304 case NEON::BI__builtin_neon_vdivh_f16:
6306 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
6307 case NEON::BI__builtin_neon_vfmah_f16:
6310 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6312 case NEON::BI__builtin_neon_vfmsh_f16: {
6317 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6320 case NEON::BI__builtin_neon_vaddd_s64:
6321 case NEON::BI__builtin_neon_vaddd_u64:
6323 case NEON::BI__builtin_neon_vsubd_s64:
6324 case NEON::BI__builtin_neon_vsubd_u64:
6326 case NEON::BI__builtin_neon_vqdmlalh_s16:
6327 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6331 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6332 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6333 ProductOps,
"vqdmlXl");
6334 Constant *CI = ConstantInt::get(
SizeTy, 0);
6335 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6337 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6338 ? Intrinsic::aarch64_neon_sqadd
6339 : Intrinsic::aarch64_neon_sqsub;
6342 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6348 case NEON::BI__builtin_neon_vqshld_n_u64:
6349 case NEON::BI__builtin_neon_vqshld_n_s64: {
6350 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6351 ? Intrinsic::aarch64_neon_uqshl
6352 : Intrinsic::aarch64_neon_sqshl;
6357 case NEON::BI__builtin_neon_vrshrd_n_u64:
6358 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6359 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6360 ? Intrinsic::aarch64_neon_urshl
6361 : Intrinsic::aarch64_neon_srshl;
6364 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
6367 case NEON::BI__builtin_neon_vrsrad_n_u64:
6368 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6369 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6370 ? Intrinsic::aarch64_neon_urshl
6371 : Intrinsic::aarch64_neon_srshl;
6375 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6378 case NEON::BI__builtin_neon_vshld_n_s64:
6379 case NEON::BI__builtin_neon_vshld_n_u64: {
6382 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
6384 case NEON::BI__builtin_neon_vshrd_n_s64: {
6387 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6388 Amt->getZExtValue())),
6391 case NEON::BI__builtin_neon_vshrd_n_u64: {
6393 uint64_t ShiftAmt = Amt->getZExtValue();
6396 return ConstantInt::get(
Int64Ty, 0);
6397 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
6400 case NEON::BI__builtin_neon_vsrad_n_s64: {
6403 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6404 Amt->getZExtValue())),
6406 return Builder.CreateAdd(Ops[0], Ops[1]);
6408 case NEON::BI__builtin_neon_vsrad_n_u64: {
6410 uint64_t ShiftAmt = Amt->getZExtValue();
6415 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
6417 return Builder.CreateAdd(Ops[0], Ops[1]);
6419 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6420 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6421 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6422 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6428 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6429 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6430 ProductOps,
"vqdmlXl");
6431 Constant *CI = ConstantInt::get(
SizeTy, 0);
6432 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6435 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6436 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6437 ? Intrinsic::aarch64_neon_sqadd
6438 : Intrinsic::aarch64_neon_sqsub;
6441 case NEON::BI__builtin_neon_vqdmlals_s32:
6442 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6444 ProductOps.push_back(Ops[1]);
6447 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6448 ProductOps,
"vqdmlXl");
6450 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6451 ? Intrinsic::aarch64_neon_sqadd
6452 : Intrinsic::aarch64_neon_sqsub;
6455 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6456 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6457 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6458 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6462 ProductOps.push_back(Ops[1]);
6463 ProductOps.push_back(Ops[2]);
6465 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6466 ProductOps,
"vqdmlXl");
6469 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6470 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6471 ? Intrinsic::aarch64_neon_sqadd
6472 : Intrinsic::aarch64_neon_sqsub;
6475 case NEON::BI__builtin_neon_vget_lane_bf16:
6476 case NEON::BI__builtin_neon_vduph_lane_bf16:
6477 case NEON::BI__builtin_neon_vduph_lane_f16: {
6481 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6482 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6483 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6487 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6488 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6489 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6490 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6492 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6494 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6495 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6496 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6497 llvm::Value *Trunc =
6498 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6499 return Builder.CreateShuffleVector(
6500 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6502 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6504 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6506 std::iota(LoMask.begin(), LoMask.end(), 0);
6507 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6508 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6509 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6510 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6511 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6512 llvm::Value *Trunc =
6513 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6514 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6517 case clang::AArch64::BI_InterlockedAdd:
6518 case clang::AArch64::BI_InterlockedAdd_acq:
6519 case clang::AArch64::BI_InterlockedAdd_rel:
6520 case clang::AArch64::BI_InterlockedAdd_nf:
6521 case clang::AArch64::BI_InterlockedAdd64:
6522 case clang::AArch64::BI_InterlockedAdd64_acq:
6523 case clang::AArch64::BI_InterlockedAdd64_rel:
6524 case clang::AArch64::BI_InterlockedAdd64_nf: {
6527 llvm::AtomicOrdering Ordering;
6528 switch (BuiltinID) {
6529 case clang::AArch64::BI_InterlockedAdd:
6530 case clang::AArch64::BI_InterlockedAdd64:
6531 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6533 case clang::AArch64::BI_InterlockedAdd_acq:
6534 case clang::AArch64::BI_InterlockedAdd64_acq:
6535 Ordering = llvm::AtomicOrdering::Acquire;
6537 case clang::AArch64::BI_InterlockedAdd_rel:
6538 case clang::AArch64::BI_InterlockedAdd64_rel:
6539 Ordering = llvm::AtomicOrdering::Release;
6541 case clang::AArch64::BI_InterlockedAdd_nf:
6542 case clang::AArch64::BI_InterlockedAdd64_nf:
6543 Ordering = llvm::AtomicOrdering::Monotonic;
6546 llvm_unreachable(
"missing builtin ID in switch!");
6548 AtomicRMWInst *RMWI =
6549 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6550 return Builder.CreateAdd(RMWI, Val);
6555 llvm::Type *Ty = VTy;
6574 bool ExtractLow =
false;
6575 bool ExtendLaneArg =
false;
6576 switch (BuiltinID) {
6577 default:
return nullptr;
6578 case NEON::BI__builtin_neon_vbsl_v:
6579 case NEON::BI__builtin_neon_vbslq_v: {
6580 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6581 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6582 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6583 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6585 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6586 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6587 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6588 return Builder.CreateBitCast(Ops[0], Ty);
6590 case NEON::BI__builtin_neon_vfma_lane_v:
6591 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6594 Value *Addend = Ops[0];
6595 Value *Multiplicand = Ops[1];
6596 Value *LaneSource = Ops[2];
6597 Ops[0] = Multiplicand;
6598 Ops[1] = LaneSource;
6602 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6603 ? llvm::FixedVectorType::get(VTy->getElementType(),
6604 VTy->getNumElements() / 2)
6607 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6608 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6609 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6612 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6616 case NEON::BI__builtin_neon_vfma_laneq_v: {
6619 if (VTy && VTy->getElementType() ==
DoubleTy) {
6622 llvm::FixedVectorType *VTy =
6624 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6625 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6628 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6629 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6632 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6633 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6635 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6636 VTy->getNumElements() * 2);
6637 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6638 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6640 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6643 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6644 {Ops[2], Ops[1], Ops[0]});
6646 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6647 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6648 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6650 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6653 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6654 {Ops[2], Ops[1], Ops[0]});
6656 case NEON::BI__builtin_neon_vfmah_lane_f16:
6657 case NEON::BI__builtin_neon_vfmas_lane_f32:
6658 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6659 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6660 case NEON::BI__builtin_neon_vfmad_lane_f64:
6661 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6664 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6666 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6667 {Ops[1], Ops[2], Ops[0]});
6669 case NEON::BI__builtin_neon_vmull_v:
6671 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6672 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6674 case NEON::BI__builtin_neon_vmax_v:
6675 case NEON::BI__builtin_neon_vmaxq_v:
6677 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6678 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6680 case NEON::BI__builtin_neon_vmaxh_f16: {
6682 Int = Intrinsic::aarch64_neon_fmax;
6685 case NEON::BI__builtin_neon_vmin_v:
6686 case NEON::BI__builtin_neon_vminq_v:
6688 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6689 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6691 case NEON::BI__builtin_neon_vminh_f16: {
6693 Int = Intrinsic::aarch64_neon_fmin;
6696 case NEON::BI__builtin_neon_vabd_v:
6697 case NEON::BI__builtin_neon_vabdq_v:
6699 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6700 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6702 case NEON::BI__builtin_neon_vpadal_v:
6703 case NEON::BI__builtin_neon_vpadalq_v: {
6704 unsigned ArgElts = VTy->getNumElements();
6706 unsigned BitWidth = EltTy->getBitWidth();
6707 auto *ArgTy = llvm::FixedVectorType::get(
6708 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6709 llvm::Type* Tys[2] = { VTy, ArgTy };
6710 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6712 TmpOps.push_back(Ops[1]);
6715 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6716 return Builder.CreateAdd(tmp, addend);
6718 case NEON::BI__builtin_neon_vpmin_v:
6719 case NEON::BI__builtin_neon_vpminq_v:
6721 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6722 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6724 case NEON::BI__builtin_neon_vpmax_v:
6725 case NEON::BI__builtin_neon_vpmaxq_v:
6727 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6728 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6730 case NEON::BI__builtin_neon_vminnm_v:
6731 case NEON::BI__builtin_neon_vminnmq_v:
6732 Int = Intrinsic::aarch64_neon_fminnm;
6734 case NEON::BI__builtin_neon_vminnmh_f16:
6736 Int = Intrinsic::aarch64_neon_fminnm;
6738 case NEON::BI__builtin_neon_vmaxnm_v:
6739 case NEON::BI__builtin_neon_vmaxnmq_v:
6740 Int = Intrinsic::aarch64_neon_fmaxnm;
6742 case NEON::BI__builtin_neon_vmaxnmh_f16:
6744 Int = Intrinsic::aarch64_neon_fmaxnm;
6746 case NEON::BI__builtin_neon_vrecpss_f32: {
6751 case NEON::BI__builtin_neon_vrecpsd_f64:
6755 case NEON::BI__builtin_neon_vrecpsh_f16:
6759 case NEON::BI__builtin_neon_vqshrun_n_v:
6760 Int = Intrinsic::aarch64_neon_sqshrun;
6762 case NEON::BI__builtin_neon_vqrshrun_n_v:
6763 Int = Intrinsic::aarch64_neon_sqrshrun;
6765 case NEON::BI__builtin_neon_vqshrn_n_v:
6766 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6768 case NEON::BI__builtin_neon_vrshrn_n_v:
6769 Int = Intrinsic::aarch64_neon_rshrn;
6771 case NEON::BI__builtin_neon_vqrshrn_n_v:
6772 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6774 case NEON::BI__builtin_neon_vrndah_f16: {
6776 Int =
Builder.getIsFPConstrained()
6777 ? Intrinsic::experimental_constrained_round
6781 case NEON::BI__builtin_neon_vrnda_v:
6782 case NEON::BI__builtin_neon_vrndaq_v: {
6783 Int =
Builder.getIsFPConstrained()
6784 ? Intrinsic::experimental_constrained_round
6788 case NEON::BI__builtin_neon_vrndih_f16: {
6790 Int =
Builder.getIsFPConstrained()
6791 ? Intrinsic::experimental_constrained_nearbyint
6792 : Intrinsic::nearbyint;
6795 case NEON::BI__builtin_neon_vrndmh_f16: {
6797 Int =
Builder.getIsFPConstrained()
6798 ? Intrinsic::experimental_constrained_floor
6802 case NEON::BI__builtin_neon_vrndm_v:
6803 case NEON::BI__builtin_neon_vrndmq_v: {
6804 Int =
Builder.getIsFPConstrained()
6805 ? Intrinsic::experimental_constrained_floor
6809 case NEON::BI__builtin_neon_vrndnh_f16: {
6811 Int =
Builder.getIsFPConstrained()
6812 ? Intrinsic::experimental_constrained_roundeven
6813 : Intrinsic::roundeven;
6816 case NEON::BI__builtin_neon_vrndn_v:
6817 case NEON::BI__builtin_neon_vrndnq_v: {
6818 Int =
Builder.getIsFPConstrained()
6819 ? Intrinsic::experimental_constrained_roundeven
6820 : Intrinsic::roundeven;
6823 case NEON::BI__builtin_neon_vrndns_f32: {
6825 Int =
Builder.getIsFPConstrained()
6826 ? Intrinsic::experimental_constrained_roundeven
6827 : Intrinsic::roundeven;
6830 case NEON::BI__builtin_neon_vrndph_f16: {
6832 Int =
Builder.getIsFPConstrained()
6833 ? Intrinsic::experimental_constrained_ceil
6837 case NEON::BI__builtin_neon_vrndp_v:
6838 case NEON::BI__builtin_neon_vrndpq_v: {
6839 Int =
Builder.getIsFPConstrained()
6840 ? Intrinsic::experimental_constrained_ceil
6844 case NEON::BI__builtin_neon_vrndxh_f16: {
6846 Int =
Builder.getIsFPConstrained()
6847 ? Intrinsic::experimental_constrained_rint
6851 case NEON::BI__builtin_neon_vrndx_v:
6852 case NEON::BI__builtin_neon_vrndxq_v: {
6853 Int =
Builder.getIsFPConstrained()
6854 ? Intrinsic::experimental_constrained_rint
6858 case NEON::BI__builtin_neon_vrndh_f16: {
6860 Int =
Builder.getIsFPConstrained()
6861 ? Intrinsic::experimental_constrained_trunc
6865 case NEON::BI__builtin_neon_vrnd32x_f32:
6866 case NEON::BI__builtin_neon_vrnd32xq_f32:
6867 case NEON::BI__builtin_neon_vrnd32x_f64:
6868 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6870 Int = Intrinsic::aarch64_neon_frint32x;
6873 case NEON::BI__builtin_neon_vrnd32z_f32:
6874 case NEON::BI__builtin_neon_vrnd32zq_f32:
6875 case NEON::BI__builtin_neon_vrnd32z_f64:
6876 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6878 Int = Intrinsic::aarch64_neon_frint32z;
6881 case NEON::BI__builtin_neon_vrnd64x_f32:
6882 case NEON::BI__builtin_neon_vrnd64xq_f32:
6883 case NEON::BI__builtin_neon_vrnd64x_f64:
6884 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6886 Int = Intrinsic::aarch64_neon_frint64x;
6889 case NEON::BI__builtin_neon_vrnd64z_f32:
6890 case NEON::BI__builtin_neon_vrnd64zq_f32:
6891 case NEON::BI__builtin_neon_vrnd64z_f64:
6892 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6894 Int = Intrinsic::aarch64_neon_frint64z;
6897 case NEON::BI__builtin_neon_vrnd_v:
6898 case NEON::BI__builtin_neon_vrndq_v: {
6899 Int =
Builder.getIsFPConstrained()
6900 ? Intrinsic::experimental_constrained_trunc
6904 case NEON::BI__builtin_neon_vcvt_f64_v:
6905 case NEON::BI__builtin_neon_vcvtq_f64_v:
6906 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6908 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
6909 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
6910 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6912 "unexpected vcvt_f64_f32 builtin");
6916 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
6918 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6920 "unexpected vcvt_f32_f64 builtin");
6924 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
6926 case NEON::BI__builtin_neon_vcvt_s32_v:
6927 case NEON::BI__builtin_neon_vcvt_u32_v:
6928 case NEON::BI__builtin_neon_vcvt_s64_v:
6929 case NEON::BI__builtin_neon_vcvt_u64_v:
6930 case NEON::BI__builtin_neon_vcvt_s16_f16:
6931 case NEON::BI__builtin_neon_vcvt_u16_f16:
6932 case NEON::BI__builtin_neon_vcvtq_s32_v:
6933 case NEON::BI__builtin_neon_vcvtq_u32_v:
6934 case NEON::BI__builtin_neon_vcvtq_s64_v:
6935 case NEON::BI__builtin_neon_vcvtq_u64_v:
6936 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6937 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6939 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6943 case NEON::BI__builtin_neon_vcvta_s16_f16:
6944 case NEON::BI__builtin_neon_vcvta_u16_f16:
6945 case NEON::BI__builtin_neon_vcvta_s32_v:
6946 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6947 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6948 case NEON::BI__builtin_neon_vcvta_u32_v:
6949 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6950 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6951 case NEON::BI__builtin_neon_vcvta_s64_v:
6952 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6953 case NEON::BI__builtin_neon_vcvta_u64_v:
6954 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6955 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6959 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6960 case NEON::BI__builtin_neon_vcvtm_s32_v:
6961 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6962 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6963 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6964 case NEON::BI__builtin_neon_vcvtm_u32_v:
6965 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6966 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6967 case NEON::BI__builtin_neon_vcvtm_s64_v:
6968 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6969 case NEON::BI__builtin_neon_vcvtm_u64_v:
6970 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6971 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6975 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6976 case NEON::BI__builtin_neon_vcvtn_s32_v:
6977 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6978 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6979 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6980 case NEON::BI__builtin_neon_vcvtn_u32_v:
6981 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6982 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6983 case NEON::BI__builtin_neon_vcvtn_s64_v:
6984 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6985 case NEON::BI__builtin_neon_vcvtn_u64_v:
6986 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6987 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6991 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6992 case NEON::BI__builtin_neon_vcvtp_s32_v:
6993 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6994 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6995 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6996 case NEON::BI__builtin_neon_vcvtp_u32_v:
6997 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6998 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6999 case NEON::BI__builtin_neon_vcvtp_s64_v:
7000 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7001 case NEON::BI__builtin_neon_vcvtp_u64_v:
7002 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7003 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7007 case NEON::BI__builtin_neon_vmulx_v:
7008 case NEON::BI__builtin_neon_vmulxq_v: {
7009 Int = Intrinsic::aarch64_neon_fmulx;
7012 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7013 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7017 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7019 Int = Intrinsic::aarch64_neon_fmulx;
7022 case NEON::BI__builtin_neon_vmul_lane_v:
7023 case NEON::BI__builtin_neon_vmul_laneq_v: {
7026 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7029 llvm::FixedVectorType *VTy =
7031 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7032 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7036 case NEON::BI__builtin_neon_vnegd_s64:
7038 case NEON::BI__builtin_neon_vnegh_f16:
7040 case NEON::BI__builtin_neon_vpmaxnm_v:
7041 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7042 Int = Intrinsic::aarch64_neon_fmaxnmp;
7045 case NEON::BI__builtin_neon_vpminnm_v:
7046 case NEON::BI__builtin_neon_vpminnmq_v: {
7047 Int = Intrinsic::aarch64_neon_fminnmp;
7050 case NEON::BI__builtin_neon_vsqrth_f16: {
7052 Int =
Builder.getIsFPConstrained()
7053 ? Intrinsic::experimental_constrained_sqrt
7057 case NEON::BI__builtin_neon_vsqrt_v:
7058 case NEON::BI__builtin_neon_vsqrtq_v: {
7059 Int =
Builder.getIsFPConstrained()
7060 ? Intrinsic::experimental_constrained_sqrt
7062 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7065 case NEON::BI__builtin_neon_vrbit_v:
7066 case NEON::BI__builtin_neon_vrbitq_v: {
7067 Int = Intrinsic::bitreverse;
7070 case NEON::BI__builtin_neon_vaddv_u8:
7074 case NEON::BI__builtin_neon_vaddv_s8: {
7075 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7077 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7078 llvm::Type *Tys[2] = { Ty, VTy };
7083 case NEON::BI__builtin_neon_vaddv_u16:
7086 case NEON::BI__builtin_neon_vaddv_s16: {
7087 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7089 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7090 llvm::Type *Tys[2] = { Ty, VTy };
7095 case NEON::BI__builtin_neon_vaddvq_u8:
7098 case NEON::BI__builtin_neon_vaddvq_s8: {
7099 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7101 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7102 llvm::Type *Tys[2] = { Ty, VTy };
7107 case NEON::BI__builtin_neon_vaddvq_u16:
7110 case NEON::BI__builtin_neon_vaddvq_s16: {
7111 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7113 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7114 llvm::Type *Tys[2] = { Ty, VTy };
7119 case NEON::BI__builtin_neon_vmaxv_u8: {
7120 Int = Intrinsic::aarch64_neon_umaxv;
7122 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7123 llvm::Type *Tys[2] = { Ty, VTy };
7128 case NEON::BI__builtin_neon_vmaxv_u16: {
7129 Int = Intrinsic::aarch64_neon_umaxv;
7131 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7132 llvm::Type *Tys[2] = { Ty, VTy };
7137 case NEON::BI__builtin_neon_vmaxvq_u8: {
7138 Int = Intrinsic::aarch64_neon_umaxv;
7140 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7141 llvm::Type *Tys[2] = { Ty, VTy };
7146 case NEON::BI__builtin_neon_vmaxvq_u16: {
7147 Int = Intrinsic::aarch64_neon_umaxv;
7149 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7150 llvm::Type *Tys[2] = { Ty, VTy };
7155 case NEON::BI__builtin_neon_vmaxv_s8: {
7156 Int = Intrinsic::aarch64_neon_smaxv;
7158 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7159 llvm::Type *Tys[2] = { Ty, VTy };
7164 case NEON::BI__builtin_neon_vmaxv_s16: {
7165 Int = Intrinsic::aarch64_neon_smaxv;
7167 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7168 llvm::Type *Tys[2] = { Ty, VTy };
7173 case NEON::BI__builtin_neon_vmaxvq_s8: {
7174 Int = Intrinsic::aarch64_neon_smaxv;
7176 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7177 llvm::Type *Tys[2] = { Ty, VTy };
7182 case NEON::BI__builtin_neon_vmaxvq_s16: {
7183 Int = Intrinsic::aarch64_neon_smaxv;
7185 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7186 llvm::Type *Tys[2] = { Ty, VTy };
7191 case NEON::BI__builtin_neon_vmaxv_f16: {
7192 Int = Intrinsic::aarch64_neon_fmaxv;
7194 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7195 llvm::Type *Tys[2] = { Ty, VTy };
7200 case NEON::BI__builtin_neon_vmaxvq_f16: {
7201 Int = Intrinsic::aarch64_neon_fmaxv;
7203 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7204 llvm::Type *Tys[2] = { Ty, VTy };
7209 case NEON::BI__builtin_neon_vminv_u8: {
7210 Int = Intrinsic::aarch64_neon_uminv;
7212 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7213 llvm::Type *Tys[2] = { Ty, VTy };
7218 case NEON::BI__builtin_neon_vminv_u16: {
7219 Int = Intrinsic::aarch64_neon_uminv;
7221 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7222 llvm::Type *Tys[2] = { Ty, VTy };
7227 case NEON::BI__builtin_neon_vminvq_u8: {
7228 Int = Intrinsic::aarch64_neon_uminv;
7230 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7231 llvm::Type *Tys[2] = { Ty, VTy };
7236 case NEON::BI__builtin_neon_vminvq_u16: {
7237 Int = Intrinsic::aarch64_neon_uminv;
7239 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7240 llvm::Type *Tys[2] = { Ty, VTy };
7245 case NEON::BI__builtin_neon_vminv_s8: {
7246 Int = Intrinsic::aarch64_neon_sminv;
7248 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7249 llvm::Type *Tys[2] = { Ty, VTy };
7254 case NEON::BI__builtin_neon_vminv_s16: {
7255 Int = Intrinsic::aarch64_neon_sminv;
7257 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7258 llvm::Type *Tys[2] = { Ty, VTy };
7263 case NEON::BI__builtin_neon_vminvq_s8: {
7264 Int = Intrinsic::aarch64_neon_sminv;
7266 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7267 llvm::Type *Tys[2] = { Ty, VTy };
7272 case NEON::BI__builtin_neon_vminvq_s16: {
7273 Int = Intrinsic::aarch64_neon_sminv;
7275 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7276 llvm::Type *Tys[2] = { Ty, VTy };
7281 case NEON::BI__builtin_neon_vminv_f16: {
7282 Int = Intrinsic::aarch64_neon_fminv;
7284 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7285 llvm::Type *Tys[2] = { Ty, VTy };
7290 case NEON::BI__builtin_neon_vminvq_f16: {
7291 Int = Intrinsic::aarch64_neon_fminv;
7293 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7294 llvm::Type *Tys[2] = { Ty, VTy };
7299 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7300 Int = Intrinsic::aarch64_neon_fmaxnmv;
7302 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7303 llvm::Type *Tys[2] = { Ty, VTy };
7308 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7309 Int = Intrinsic::aarch64_neon_fmaxnmv;
7311 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7312 llvm::Type *Tys[2] = { Ty, VTy };
7317 case NEON::BI__builtin_neon_vminnmv_f16: {
7318 Int = Intrinsic::aarch64_neon_fminnmv;
7320 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7321 llvm::Type *Tys[2] = { Ty, VTy };
7326 case NEON::BI__builtin_neon_vminnmvq_f16: {
7327 Int = Intrinsic::aarch64_neon_fminnmv;
7329 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7330 llvm::Type *Tys[2] = { Ty, VTy };
7335 case NEON::BI__builtin_neon_vmul_n_f64: {
7338 return Builder.CreateFMul(Ops[0], RHS);
7340 case NEON::BI__builtin_neon_vaddlv_u8: {
7341 Int = Intrinsic::aarch64_neon_uaddlv;
7343 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7344 llvm::Type *Tys[2] = { Ty, VTy };
7349 case NEON::BI__builtin_neon_vaddlv_u16: {
7350 Int = Intrinsic::aarch64_neon_uaddlv;
7352 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7353 llvm::Type *Tys[2] = { Ty, VTy };
7357 case NEON::BI__builtin_neon_vaddlvq_u8: {
7358 Int = Intrinsic::aarch64_neon_uaddlv;
7360 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7361 llvm::Type *Tys[2] = { Ty, VTy };
7366 case NEON::BI__builtin_neon_vaddlvq_u16: {
7367 Int = Intrinsic::aarch64_neon_uaddlv;
7369 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7370 llvm::Type *Tys[2] = { Ty, VTy };
7374 case NEON::BI__builtin_neon_vaddlv_s8: {
7375 Int = Intrinsic::aarch64_neon_saddlv;
7377 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7378 llvm::Type *Tys[2] = { Ty, VTy };
7383 case NEON::BI__builtin_neon_vaddlv_s16: {
7384 Int = Intrinsic::aarch64_neon_saddlv;
7386 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7387 llvm::Type *Tys[2] = { Ty, VTy };
7391 case NEON::BI__builtin_neon_vaddlvq_s8: {
7392 Int = Intrinsic::aarch64_neon_saddlv;
7394 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7395 llvm::Type *Tys[2] = { Ty, VTy };
7400 case NEON::BI__builtin_neon_vaddlvq_s16: {
7401 Int = Intrinsic::aarch64_neon_saddlv;
7403 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7404 llvm::Type *Tys[2] = { Ty, VTy };
7408 case NEON::BI__builtin_neon_vsri_n_v:
7409 case NEON::BI__builtin_neon_vsriq_n_v: {
7410 Int = Intrinsic::aarch64_neon_vsri;
7411 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
7414 case NEON::BI__builtin_neon_vsli_n_v:
7415 case NEON::BI__builtin_neon_vsliq_n_v: {
7416 Int = Intrinsic::aarch64_neon_vsli;
7417 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
7420 case NEON::BI__builtin_neon_vsra_n_v:
7421 case NEON::BI__builtin_neon_vsraq_n_v:
7422 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7424 return Builder.CreateAdd(Ops[0], Ops[1]);
7425 case NEON::BI__builtin_neon_vrsra_n_v:
7426 case NEON::BI__builtin_neon_vrsraq_n_v: {
7427 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7429 TmpOps.push_back(Ops[1]);
7430 TmpOps.push_back(Ops[2]);
7432 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
7433 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
7434 return Builder.CreateAdd(Ops[0], tmp);
7436 case NEON::BI__builtin_neon_vld1_v:
7437 case NEON::BI__builtin_neon_vld1q_v: {
7440 case NEON::BI__builtin_neon_vst1_v:
7441 case NEON::BI__builtin_neon_vst1q_v:
7442 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7444 case NEON::BI__builtin_neon_vld1_lane_v:
7445 case NEON::BI__builtin_neon_vld1q_lane_v: {
7446 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7447 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7449 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
7451 case NEON::BI__builtin_neon_vldap1_lane_s64:
7452 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7453 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7454 llvm::LoadInst *LI =
Builder.CreateAlignedLoad(
7456 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7458 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
7460 case NEON::BI__builtin_neon_vld1_dup_v:
7461 case NEON::BI__builtin_neon_vld1q_dup_v: {
7462 Value *
V = PoisonValue::get(Ty);
7463 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7465 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
7466 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
7469 case NEON::BI__builtin_neon_vst1_lane_v:
7470 case NEON::BI__builtin_neon_vst1q_lane_v:
7471 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7472 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7474 case NEON::BI__builtin_neon_vstl1_lane_s64:
7475 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7476 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7477 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7478 llvm::StoreInst *SI =
7480 SI->setAtomic(llvm::AtomicOrdering::Release);
7483 case NEON::BI__builtin_neon_vld2_v:
7484 case NEON::BI__builtin_neon_vld2q_v: {
7486 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7487 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7488 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7490 case NEON::BI__builtin_neon_vld3_v:
7491 case NEON::BI__builtin_neon_vld3q_v: {
7493 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7494 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7495 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7497 case NEON::BI__builtin_neon_vld4_v:
7498 case NEON::BI__builtin_neon_vld4q_v: {
7500 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7501 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7502 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7504 case NEON::BI__builtin_neon_vld2_dup_v:
7505 case NEON::BI__builtin_neon_vld2q_dup_v: {
7507 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7508 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7509 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7511 case NEON::BI__builtin_neon_vld3_dup_v:
7512 case NEON::BI__builtin_neon_vld3q_dup_v: {
7514 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7515 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7516 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7518 case NEON::BI__builtin_neon_vld4_dup_v:
7519 case NEON::BI__builtin_neon_vld4q_dup_v: {
7521 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7522 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7523 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7525 case NEON::BI__builtin_neon_vld2_lane_v:
7526 case NEON::BI__builtin_neon_vld2q_lane_v: {
7527 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7528 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7529 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7530 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7531 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7534 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7536 case NEON::BI__builtin_neon_vld3_lane_v:
7537 case NEON::BI__builtin_neon_vld3q_lane_v: {
7538 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7539 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7540 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7541 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7542 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7543 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7546 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7548 case NEON::BI__builtin_neon_vld4_lane_v:
7549 case NEON::BI__builtin_neon_vld4q_lane_v: {
7550 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7551 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7552 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7553 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7554 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7555 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7556 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
7559 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7561 case NEON::BI__builtin_neon_vst2_v:
7562 case NEON::BI__builtin_neon_vst2q_v: {
7563 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7564 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7565 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7568 case NEON::BI__builtin_neon_vst2_lane_v:
7569 case NEON::BI__builtin_neon_vst2q_lane_v: {
7570 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7572 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7573 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7576 case NEON::BI__builtin_neon_vst3_v:
7577 case NEON::BI__builtin_neon_vst3q_v: {
7578 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7579 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7580 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7583 case NEON::BI__builtin_neon_vst3_lane_v:
7584 case NEON::BI__builtin_neon_vst3q_lane_v: {
7585 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7587 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7588 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7591 case NEON::BI__builtin_neon_vst4_v:
7592 case NEON::BI__builtin_neon_vst4q_v: {
7593 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7594 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7595 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7598 case NEON::BI__builtin_neon_vst4_lane_v:
7599 case NEON::BI__builtin_neon_vst4q_lane_v: {
7600 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7602 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7603 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7606 case NEON::BI__builtin_neon_vtrn_v:
7607 case NEON::BI__builtin_neon_vtrnq_v: {
7608 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7609 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7610 Value *SV =
nullptr;
7612 for (
unsigned vi = 0; vi != 2; ++vi) {
7614 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7615 Indices.push_back(i+vi);
7616 Indices.push_back(i+e+vi);
7619 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
7624 case NEON::BI__builtin_neon_vuzp_v:
7625 case NEON::BI__builtin_neon_vuzpq_v: {
7626 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7627 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7628 Value *SV =
nullptr;
7630 for (
unsigned vi = 0; vi != 2; ++vi) {
7632 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7633 Indices.push_back(2*i+vi);
7636 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
7641 case NEON::BI__builtin_neon_vzip_v:
7642 case NEON::BI__builtin_neon_vzipq_v: {
7643 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7644 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7645 Value *SV =
nullptr;
7647 for (
unsigned vi = 0; vi != 2; ++vi) {
7649 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7650 Indices.push_back((i + vi*e) >> 1);
7651 Indices.push_back(((i + vi*e) >> 1)+e);
7654 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
7659 case NEON::BI__builtin_neon_vqtbl1q_v: {
7660 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7663 case NEON::BI__builtin_neon_vqtbl2q_v: {
7664 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7667 case NEON::BI__builtin_neon_vqtbl3q_v: {
7668 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7671 case NEON::BI__builtin_neon_vqtbl4q_v: {
7672 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7675 case NEON::BI__builtin_neon_vqtbx1q_v: {
7676 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7679 case NEON::BI__builtin_neon_vqtbx2q_v: {
7680 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7683 case NEON::BI__builtin_neon_vqtbx3q_v: {
7684 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7687 case NEON::BI__builtin_neon_vqtbx4q_v: {
7688 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7691 case NEON::BI__builtin_neon_vsqadd_v:
7692 case NEON::BI__builtin_neon_vsqaddq_v: {
7693 Int = Intrinsic::aarch64_neon_usqadd;
7696 case NEON::BI__builtin_neon_vuqadd_v:
7697 case NEON::BI__builtin_neon_vuqaddq_v: {
7698 Int = Intrinsic::aarch64_neon_suqadd;
7702 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7703 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7704 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7705 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7706 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7707 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7708 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7709 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7710 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7711 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7718 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7719 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7720 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7721 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7722 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7723 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7724 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7725 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7726 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7727 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7734 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7735 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7736 case NEON::BI__builtin_neon_vluti2_lane_f16:
7737 case NEON::BI__builtin_neon_vluti2_lane_p16:
7738 case NEON::BI__builtin_neon_vluti2_lane_p8:
7739 case NEON::BI__builtin_neon_vluti2_lane_s16:
7740 case NEON::BI__builtin_neon_vluti2_lane_s8:
7741 case NEON::BI__builtin_neon_vluti2_lane_u16:
7742 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7743 Int = Intrinsic::aarch64_neon_vluti2_lane;
7750 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7751 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7752 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7753 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7754 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7755 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7756 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7757 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7758 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7759 Int = Intrinsic::aarch64_neon_vluti2_lane;
7766 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7767 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7768 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7769 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7770 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7773 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7774 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7775 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7776 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7777 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7780 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7781 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7782 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7783 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7784 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7785 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7786 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_lane_x2");
7788 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7789 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7790 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7791 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7792 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7793 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7794 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_laneq_x2");
7796 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7799 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7800 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7802 llvm::FixedVectorType::get(
BFloatTy, 8),
7803 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7804 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7807 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7808 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7810 llvm::FixedVectorType::get(
BFloatTy, 8),
7811 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7812 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7815 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7816 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7818 llvm::FixedVectorType::get(
HalfTy, 8),
7819 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7820 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7823 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7824 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7826 llvm::FixedVectorType::get(
HalfTy, 8),
7827 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7828 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7830 llvm::FixedVectorType::get(
Int8Ty, 8),
7831 Ops[0]->
getType(),
false, Ops, E,
"vfcvtn");
7832 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7834 llvm::FixedVectorType::get(
Int8Ty, 8),
7835 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7837 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7839 llvm::FixedVectorType::get(
Int8Ty, 16),
7840 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7842 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7843 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7844 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7847 Ops[1]->
getType(),
false, Ops, E,
"vfcvtn2");
7850 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7851 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7854 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7855 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7856 ExtendLaneArg =
true;
7858 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7859 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7861 ExtendLaneArg,
HalfTy, Ops, E,
"fdot2_lane");
7862 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7863 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7866 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7867 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7868 ExtendLaneArg =
true;
7870 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7871 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7873 ExtendLaneArg,
FloatTy, Ops, E,
"fdot4_lane");
7875 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7877 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7879 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7881 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7883 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7885 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7887 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7889 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7891 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7893 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7895 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7897 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7899 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7900 ExtendLaneArg =
true;
7902 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7904 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7905 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7906 ExtendLaneArg =
true;
7908 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7910 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7911 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7912 ExtendLaneArg =
true;
7914 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7916 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7917 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7918 ExtendLaneArg =
true;
7920 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7922 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7923 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7924 ExtendLaneArg =
true;
7926 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7928 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7929 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7930 ExtendLaneArg =
true;
7932 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7934 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7935 case NEON::BI__builtin_neon_vamin_f16:
7936 case NEON::BI__builtin_neon_vaminq_f16:
7937 case NEON::BI__builtin_neon_vamin_f32:
7938 case NEON::BI__builtin_neon_vaminq_f32:
7939 case NEON::BI__builtin_neon_vaminq_f64: {
7940 Int = Intrinsic::aarch64_neon_famin;
7943 case NEON::BI__builtin_neon_vamax_f16:
7944 case NEON::BI__builtin_neon_vamaxq_f16:
7945 case NEON::BI__builtin_neon_vamax_f32:
7946 case NEON::BI__builtin_neon_vamaxq_f32:
7947 case NEON::BI__builtin_neon_vamaxq_f64: {
7948 Int = Intrinsic::aarch64_neon_famax;
7951 case NEON::BI__builtin_neon_vscale_f16:
7952 case NEON::BI__builtin_neon_vscaleq_f16:
7953 case NEON::BI__builtin_neon_vscale_f32:
7954 case NEON::BI__builtin_neon_vscaleq_f32:
7955 case NEON::BI__builtin_neon_vscaleq_f64: {
7956 Int = Intrinsic::aarch64_neon_fp8_fscale;