334 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
335 llvm::SyncScope::ID SSID;
337 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
338 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
339 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
340 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
341 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
342 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
343 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
344 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
345 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
346 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
347 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
348 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
349 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
350 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
351 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
352 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
353 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
354 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
361 case AMDGPU::BI__builtin_amdgcn_div_scale:
362 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
372 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
375 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
378 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
382 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
383 Builder.CreateStore(FlagExt, FlagOutPtr);
386 case AMDGPU::BI__builtin_amdgcn_div_fmas:
387 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
393 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
395 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
396 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
399 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
401 Intrinsic::amdgcn_ds_swizzle);
402 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
403 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
404 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
408 unsigned ICEArguments = 0;
413 unsigned Size = DataTy->getPrimitiveSizeInBits();
415 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
417 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
418 ? Intrinsic::amdgcn_mov_dpp8
419 : Intrinsic::amdgcn_update_dpp,
423 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
425 Args.push_back(llvm::PoisonValue::get(
IntTy));
426 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
428 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
430 if (!DataTy->isIntegerTy())
432 V, llvm::IntegerType::get(
Builder.getContext(), Size));
436 F->getFunctionType()->getFunctionParamType(I + InsertOld);
437 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
440 if (Size < 32 && !DataTy->isIntegerTy())
442 V, llvm::IntegerType::get(
Builder.getContext(), Size));
443 return Builder.CreateTruncOrBitCast(
V, DataTy);
445 case AMDGPU::BI__builtin_amdgcn_permlane16:
446 case AMDGPU::BI__builtin_amdgcn_permlanex16:
449 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
450 ? Intrinsic::amdgcn_permlane16
451 : Intrinsic::amdgcn_permlanex16);
452 case AMDGPU::BI__builtin_amdgcn_permlane64:
454 Intrinsic::amdgcn_permlane64);
455 case AMDGPU::BI__builtin_amdgcn_readlane:
457 Intrinsic::amdgcn_readlane);
458 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
460 Intrinsic::amdgcn_readfirstlane);
461 case AMDGPU::BI__builtin_amdgcn_div_fixup:
462 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
463 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
465 Intrinsic::amdgcn_div_fixup);
466 case AMDGPU::BI__builtin_amdgcn_trig_preop:
467 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
469 case AMDGPU::BI__builtin_amdgcn_rcp:
470 case AMDGPU::BI__builtin_amdgcn_rcpf:
471 case AMDGPU::BI__builtin_amdgcn_rcph:
472 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
474 case AMDGPU::BI__builtin_amdgcn_sqrt:
475 case AMDGPU::BI__builtin_amdgcn_sqrtf:
476 case AMDGPU::BI__builtin_amdgcn_sqrth:
477 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
479 Intrinsic::amdgcn_sqrt);
480 case AMDGPU::BI__builtin_amdgcn_rsq:
481 case AMDGPU::BI__builtin_amdgcn_rsqf:
482 case AMDGPU::BI__builtin_amdgcn_rsqh:
483 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
485 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
486 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
488 Intrinsic::amdgcn_rsq_clamp);
489 case AMDGPU::BI__builtin_amdgcn_sinf:
490 case AMDGPU::BI__builtin_amdgcn_sinh:
491 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
493 case AMDGPU::BI__builtin_amdgcn_cosf:
494 case AMDGPU::BI__builtin_amdgcn_cosh:
495 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
497 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
498 return EmitAMDGPUDispatchPtr(*
this, E);
499 case AMDGPU::BI__builtin_amdgcn_logf:
500 case AMDGPU::BI__builtin_amdgcn_log_bf16:
502 case AMDGPU::BI__builtin_amdgcn_exp2f:
503 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
505 Intrinsic::amdgcn_exp2);
506 case AMDGPU::BI__builtin_amdgcn_log_clampf:
508 Intrinsic::amdgcn_log_clamp);
509 case AMDGPU::BI__builtin_amdgcn_ldexp:
510 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
514 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
515 return Builder.CreateCall(F, {Src0, Src1});
517 case AMDGPU::BI__builtin_amdgcn_ldexph: {
523 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
526 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
527 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
528 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
530 Intrinsic::amdgcn_frexp_mant);
531 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
532 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
534 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
536 return Builder.CreateCall(F, Src0);
538 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
540 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
542 return Builder.CreateCall(F, Src0);
544 case AMDGPU::BI__builtin_amdgcn_fract:
545 case AMDGPU::BI__builtin_amdgcn_fractf:
546 case AMDGPU::BI__builtin_amdgcn_fracth:
548 Intrinsic::amdgcn_fract);
549 case AMDGPU::BI__builtin_amdgcn_lerp:
551 Intrinsic::amdgcn_lerp);
552 case AMDGPU::BI__builtin_amdgcn_ubfe:
554 Intrinsic::amdgcn_ubfe);
555 case AMDGPU::BI__builtin_amdgcn_sbfe:
557 Intrinsic::amdgcn_sbfe);
558 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
559 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
562 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
563 return Builder.CreateCall(F, { Src });
565 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
566 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
569 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
570 return Builder.CreateCall(F, {Src});
572 case AMDGPU::BI__builtin_amdgcn_tanhf:
573 case AMDGPU::BI__builtin_amdgcn_tanhh:
574 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
576 Intrinsic::amdgcn_tanh);
577 case AMDGPU::BI__builtin_amdgcn_uicmp:
578 case AMDGPU::BI__builtin_amdgcn_uicmpl:
579 case AMDGPU::BI__builtin_amdgcn_sicmp:
580 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
586 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
587 {
Builder.getInt64Ty(), Src0->getType() });
588 return Builder.CreateCall(F, { Src0, Src1, Src2 });
590 case AMDGPU::BI__builtin_amdgcn_fcmp:
591 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
597 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
598 {
Builder.getInt64Ty(), Src0->getType() });
599 return Builder.CreateCall(F, { Src0, Src1, Src2 });
601 case AMDGPU::BI__builtin_amdgcn_class:
602 case AMDGPU::BI__builtin_amdgcn_classf:
603 case AMDGPU::BI__builtin_amdgcn_classh:
605 case AMDGPU::BI__builtin_amdgcn_fmed3f:
606 case AMDGPU::BI__builtin_amdgcn_fmed3h:
608 Intrinsic::amdgcn_fmed3);
609 case AMDGPU::BI__builtin_amdgcn_ds_append:
610 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
611 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
612 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
617 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
618 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
619 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
620 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
621 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
622 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
623 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
624 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
625 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
626 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
627 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
628 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
629 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
630 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
631 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
632 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
633 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
634 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
635 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
636 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
637 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
638 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
639 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
640 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
641 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
642 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
645 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
646 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
647 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
648 IID = Intrinsic::amdgcn_global_load_tr_b64;
650 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
651 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
652 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
653 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
654 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
655 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
656 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
657 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
658 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
659 IID = Intrinsic::amdgcn_global_load_tr_b128;
661 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
662 IID = Intrinsic::amdgcn_global_load_tr4_b64;
664 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
665 IID = Intrinsic::amdgcn_global_load_tr6_b96;
667 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
668 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
670 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
671 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
673 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
674 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
676 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
677 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
678 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
679 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
681 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
682 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
684 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
685 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
687 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
688 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
690 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
691 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
692 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
693 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
698 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
701 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
702 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
703 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
704 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
705 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
706 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
710 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
711 IID = Intrinsic::amdgcn_global_load_monitor_b32;
713 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
714 IID = Intrinsic::amdgcn_global_load_monitor_b64;
716 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
717 IID = Intrinsic::amdgcn_global_load_monitor_b128;
719 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
720 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
722 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
723 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
725 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
726 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
733 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
736 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
737 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
738 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
741 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
742 IID = Intrinsic::amdgcn_cluster_load_b32;
744 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
745 IID = Intrinsic::amdgcn_cluster_load_b64;
747 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
748 IID = Intrinsic::amdgcn_cluster_load_b128;
752 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
755 return Builder.CreateCall(F, {Args});
757 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
760 Intrinsic::amdgcn_load_to_lds);
762 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
763 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
764 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
765 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
766 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
767 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
770 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
771 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
773 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
774 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
776 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
777 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
779 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
780 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
782 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
783 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
785 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
786 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
790 LLVMContext &Ctx =
CGM.getLLVMContext();
793 const unsigned ScopeArg = E->
getNumArgs() - 1;
794 for (
unsigned i = 0; i != ScopeArg; ++i)
798 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
799 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
802 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
803 return Builder.CreateCall(F, {Args});
805 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
806 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
810 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
811 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
814 return Builder.CreateCall(F, {Env});
816 case AMDGPU::BI__builtin_amdgcn_read_exec:
818 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
820 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
822 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
823 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
824 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
825 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
835 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
838 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
839 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
842 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
843 {NodePtr->getType(), RayDir->getType()});
844 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
845 RayInverseDir, TextureDescr});
847 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
848 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
851 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
852 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
854 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
855 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
869 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
871 llvm::CallInst *CI =
Builder.CreateCall(
872 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
873 Offset, TextureDescr});
875 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
876 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
877 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
879 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
880 Builder.CreateStore(RetRayDir, RetRayDirPtr);
885 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
886 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
887 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
888 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
891 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
892 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
894 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
895 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
897 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
898 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
900 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
901 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
906 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
914 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
918 if (A->
getType()->getPrimitiveSizeInBits() <
919 RetTy->getScalarType()->getPrimitiveSizeInBits())
920 A =
Builder.CreateZExt(A, RetTy->getScalarType());
922 return Builder.CreateInsertElement(I0, A, 1);
924 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
925 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
926 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
928 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
929 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
930 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
934 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
936 return Builder.CreateCall(F, Args);
938 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
939 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
940 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
941 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
942 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
943 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
944 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
945 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
946 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
947 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
948 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
949 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
950 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
951 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
952 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
953 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
954 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
955 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
956 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
957 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
958 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
959 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
960 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
961 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
962 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
963 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
964 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
965 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
966 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
967 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
968 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
969 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
970 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
971 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
972 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
973 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
974 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
975 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
976 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
977 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
978 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
979 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
980 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
981 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
982 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
983 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
984 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
985 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
986 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
987 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
988 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
989 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
990 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
991 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
992 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
993 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
994 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
995 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
996 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
997 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
999 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1000 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1001 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1002 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1003 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1004 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1005 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1006 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1007 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1008 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1009 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1010 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1011 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1012 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1013 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1014 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1015 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1016 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1017 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1018 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1019 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1020 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1021 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1022 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1023 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1024 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1025 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1026 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1027 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1028 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1029 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1030 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1031 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1032 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1033 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1034 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1035 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1036 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1037 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1038 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1039 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1040 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1041 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1054 bool AppendFalseForOpselArg =
false;
1055 unsigned BuiltinWMMAOp;
1057 bool NeedReturnType =
false;
1059 switch (BuiltinID) {
1060 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1061 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1062 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1063 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1064 ArgsForMatchingMatrixTypes = {2, 0};
1065 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1067 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1068 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1069 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1070 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1071 ArgsForMatchingMatrixTypes = {2, 0};
1072 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1074 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1075 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1076 AppendFalseForOpselArg =
true;
1078 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1079 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1080 ArgsForMatchingMatrixTypes = {2, 0};
1081 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1083 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1084 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1085 AppendFalseForOpselArg =
true;
1087 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1088 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1089 ArgsForMatchingMatrixTypes = {2, 0};
1090 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1092 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1093 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1094 ArgsForMatchingMatrixTypes = {2, 0};
1095 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1097 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1098 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1099 ArgsForMatchingMatrixTypes = {2, 0};
1100 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1102 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1103 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1104 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1105 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1106 ArgsForMatchingMatrixTypes = {4, 1};
1107 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1109 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1110 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1111 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1112 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1113 ArgsForMatchingMatrixTypes = {4, 1};
1114 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1116 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1117 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1118 ArgsForMatchingMatrixTypes = {2, 0};
1119 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1121 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1122 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1123 ArgsForMatchingMatrixTypes = {2, 0};
1124 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1126 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1127 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1128 ArgsForMatchingMatrixTypes = {2, 0};
1129 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1131 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1132 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1133 ArgsForMatchingMatrixTypes = {2, 0};
1134 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1136 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1137 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1138 ArgsForMatchingMatrixTypes = {4, 1};
1139 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1141 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1142 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1143 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1144 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1146 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1147 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1148 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1149 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1151 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1152 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1153 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1154 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1156 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1157 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1158 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1159 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1161 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1162 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1163 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1164 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1166 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1167 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1168 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1169 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1171 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1172 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1173 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1174 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1176 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1177 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1178 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1179 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1181 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1182 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1183 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1184 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1186 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1187 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1188 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1189 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1191 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1192 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1193 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1194 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1197 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1198 ArgsForMatchingMatrixTypes = {5, 1};
1199 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1201 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1202 ArgsForMatchingMatrixTypes = {5, 1};
1203 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1205 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1206 ArgsForMatchingMatrixTypes = {5, 1};
1207 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1209 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1210 ArgsForMatchingMatrixTypes = {5, 1};
1211 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1213 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1214 ArgsForMatchingMatrixTypes = {5, 1};
1215 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1217 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1218 NeedReturnType =
true;
1219 ArgsForMatchingMatrixTypes = {1, 5};
1220 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1222 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1223 ArgsForMatchingMatrixTypes = {3, 0};
1224 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1226 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1227 ArgsForMatchingMatrixTypes = {3, 0};
1228 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1230 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1231 ArgsForMatchingMatrixTypes = {3, 0};
1232 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1234 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1235 ArgsForMatchingMatrixTypes = {3, 0};
1236 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1238 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1239 ArgsForMatchingMatrixTypes = {3, 0};
1240 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1242 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1243 ArgsForMatchingMatrixTypes = {3, 0};
1244 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1246 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1247 ArgsForMatchingMatrixTypes = {3, 0};
1248 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1250 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1251 ArgsForMatchingMatrixTypes = {3, 0};
1252 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1254 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1255 ArgsForMatchingMatrixTypes = {3, 0};
1256 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1258 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1259 ArgsForMatchingMatrixTypes = {3, 0};
1260 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1262 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1263 ArgsForMatchingMatrixTypes = {3, 0};
1264 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1266 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1267 ArgsForMatchingMatrixTypes = {3, 0};
1268 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1270 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1271 ArgsForMatchingMatrixTypes = {3, 0};
1272 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1274 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1275 ArgsForMatchingMatrixTypes = {3, 0};
1276 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1278 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1279 ArgsForMatchingMatrixTypes = {3, 0};
1280 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1282 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1283 ArgsForMatchingMatrixTypes = {3, 0};
1284 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1286 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1287 ArgsForMatchingMatrixTypes = {4, 1};
1288 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1290 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1291 ArgsForMatchingMatrixTypes = {5, 1, 3};
1292 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1294 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1295 ArgsForMatchingMatrixTypes = {5, 1, 3};
1296 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1298 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1299 ArgsForMatchingMatrixTypes = {5, 1, 3};
1300 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1302 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1303 ArgsForMatchingMatrixTypes = {3, 0, 1};
1304 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1306 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1307 ArgsForMatchingMatrixTypes = {3, 0, 1};
1308 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1310 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1311 ArgsForMatchingMatrixTypes = {3, 0, 1};
1312 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1314 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1315 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1316 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1318 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1319 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1320 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1322 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1323 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1324 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1326 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1327 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1328 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1330 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1331 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1332 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1334 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1335 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1336 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1338 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1339 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1340 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1342 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1343 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1344 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1346 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1347 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1348 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1350 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1351 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1352 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1354 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1355 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1356 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1358 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1359 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1360 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1362 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1363 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1364 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1366 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1367 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1368 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1373 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1375 if (AppendFalseForOpselArg)
1376 Args.push_back(
Builder.getFalse());
1381 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1382 ArgTypes.push_back(Args[ArgIdx]->
getType());
1384 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1385 return Builder.CreateCall(F, Args);
1388 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1389 return EmitAMDGPUWorkGroupSize(*
this, 0);
1390 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1391 return EmitAMDGPUWorkGroupSize(*
this, 1);
1392 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1393 return EmitAMDGPUWorkGroupSize(*
this, 2);
1396 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1397 return EmitAMDGPUGridSize(*
this, 0);
1398 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1399 return EmitAMDGPUGridSize(*
this, 1);
1400 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1401 return EmitAMDGPUGridSize(*
this, 2);
1404 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1405 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1407 Intrinsic::r600_recipsqrt_ieee);
1408 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1412 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1413 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1415 case AMDGPU::BI__builtin_amdgcn_fence: {
1418 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1423 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1424 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1425 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1426 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1427 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1428 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1429 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1430 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1431 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1432 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1433 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1434 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1435 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1436 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1437 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1438 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1439 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1440 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1441 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1442 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1443 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1444 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1445 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1446 llvm::AtomicRMWInst::BinOp BinOp;
1447 switch (BuiltinID) {
1448 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1449 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1450 BinOp = llvm::AtomicRMWInst::UIncWrap;
1452 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1453 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1454 BinOp = llvm::AtomicRMWInst::UDecWrap;
1456 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1457 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1458 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1459 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1460 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1461 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1462 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1463 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1464 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1465 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1466 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1467 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1468 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1469 BinOp = llvm::AtomicRMWInst::FAdd;
1471 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1472 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1473 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1474 BinOp = llvm::AtomicRMWInst::FMin;
1476 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1477 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1478 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1479 BinOp = llvm::AtomicRMWInst::FMax;
1485 llvm::Type *OrigTy = Val->
getType();
1490 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1491 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1492 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1514 AO = AtomicOrdering::Monotonic;
1517 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1518 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1519 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1520 llvm::Type *V2BF16Ty = FixedVectorType::get(
1521 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1522 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1526 llvm::AtomicRMWInst *RMW =
1527 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1529 RMW->setVolatile(
true);
1531 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1532 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1536 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1540 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1541 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1544 return Builder.CreateBitCast(RMW, OrigTy);
1546 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1547 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1552 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1553 return Builder.CreateCall(F, {Arg});
1555 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1556 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1564 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1565 ? Intrinsic::amdgcn_permlane16_swap
1566 : Intrinsic::amdgcn_permlane32_swap);
1567 llvm::CallInst *
Call =
1568 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1570 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1571 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
1575 llvm::Value *Insert0 =
Builder.CreateInsertElement(
1576 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1577 llvm::Value *AsVector =
1578 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1581 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1582 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1584 Intrinsic::amdgcn_bitop3);
1585 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1590 for (
unsigned I = 0; I < 4; ++I)
1592 llvm::PointerType *RetTy = llvm::PointerType::get(
1593 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1594 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1595 {RetTy, Args[0]->getType()});
1596 return Builder.CreateCall(F, Args);
1598 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1599 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1600 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1601 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1602 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1603 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1605 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1606 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1607 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1608 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1609 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1610 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1611 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1612 llvm::Type *RetTy =
nullptr;
1613 switch (BuiltinID) {
1614 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1617 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1620 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1623 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1624 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
1626 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1627 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
1629 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1630 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
1634 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1639 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1641 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1642 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1643 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1645 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1646 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1647 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1649 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1650 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1651 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1653 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1654 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1656 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
1657 case Builtin::BIlogbf:
1658 case Builtin::BI__builtin_logbf: {
1662 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1665 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1670 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
1671 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1673 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
1676 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
1679 case Builtin::BIlogb:
1680 case Builtin::BI__builtin_logb: {
1684 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1687 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1692 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
1693 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1695 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
1698 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
1702 case Builtin::BIscalbnf:
1703 case Builtin::BI__builtin_scalbnf:
1704 case Builtin::BIscalbn:
1705 case Builtin::BI__builtin_scalbn:
1707 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);