(work in progress)
The architecture of ResNet50 is described in the table below.
defaults
ph=0 pw=0 U=1 V=1 dh=1 dw=1 g=1
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1 | fprop | block_1 | conv2d | N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
2 | fprop | block_1 | conv2d | N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 | volta_fp16_scudnn_fp16_128x64_relu_medium_nn_v1 |
3 | fprop | block_1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
4 | fprop | block_1 | batch_norm | T=(32,64,112,112) | batch_norm_collect_statistics_kernel |
5 | fprop | block_1 | batch_norm | T=(32,64,112,112) | batch_norm_transform_input_kernel |
6 | fprop | block_1 | relu | T=(32,64,112,112) | modern::elementwise_kernel |
7 | fprop | block_1 | max_pool2d | T=[(32,64,112,112)] | max_pool_forward_nchw |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
8 | fprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
9 | fprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | volta_fp16_scudnn_fp16_128x64_relu_interior_nn_v1 |
10 | fprop | 2a:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
11 | fprop | 2a:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_collect_statistics_kernel |
12 | fprop | 2a:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_transform_input_kernel |
13 | fprop | 2a:ReLU1 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
14 | fprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
15 | fprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
16 | fprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
17 | fprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_256x64_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
18 | fprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
19 | fprop | 2a:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
20 | fprop | 2a:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_collect_statistics_kernel |
21 | fprop | 2a:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_transform_input_kernel |
22 | fprop | 2a:ReLU2 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
23 | fprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
24 | fprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
25 | fprop | 2a:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
26 | fprop | 2a:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_collect_statistics_kernel |
27 | fprop | 2a:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_transform_input_kernel |
28 | fprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
29 | fprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
30 | fprop | 2a:Residual:Projection | __add__ | T=[(1,)] | legacy::elementwise_kernel |
31 | fprop | 2a:Residual:Projection | batch_norm | T=(32,256,56,56) | batch_norm_collect_statistics_kernel |
32 | fprop | 2a:Residual:Projection | batch_norm | T=(32,256,56,56) | batch_norm_transform_input_kernel |
33 | fprop | 2a:Residual | __iadd__ | T=[(32,256,56,56),(32,256,56,56)] | modern::elementwise_kernel |
34 | fprop | 2a:ReLU3 | relu | T=(32,256,56,56) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
35 | fprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
36 | fprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
37 | fprop | 2b:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
38 | fprop | 2b:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_collect_statistics_kernel |
39 | fprop | 2b:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_transform_input_kernel |
40 | fprop | 2b:ReLU1 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
41 | fprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
42 | fprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
43 | fprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
44 | fprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_256x64_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
45 | fprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
46 | fprop | 2b:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
47 | fprop | 2b:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_collect_statistics_kernel |
48 | fprop | 2b:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_transform_input_kernel |
49 | fprop | 2b:ReLU2 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
50 | fprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
51 | fprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
52 | fprop | 2b:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
53 | fprop | 2b:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_collect_statistics_kernel |
54 | fprop | 2b:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_transform_input_kernel |
55 | fprop | 2b:Residual | __iadd__ | T=[(32,256,56,56),(32,256,56,56)] | modern::elementwise_kernel |
56 | fprop | 2b:ReLU3 | relu | T=(32,256,56,56) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
57 | fprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
58 | fprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
59 | fprop | 2c:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
60 | fprop | 2c:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_collect_statistics_kernel |
61 | fprop | 2c:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_transform_input_kernel |
62 | fprop | 2c:ReLU1 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
63 | fprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
64 | fprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
65 | fprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
66 | fprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_256x64_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
67 | fprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
68 | fprop | 2c:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
69 | fprop | 2c:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_collect_statistics_kernel |
70 | fprop | 2c:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_transform_input_kernel |
71 | fprop | 2c:ReLU2 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
72 | fprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
73 | fprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
74 | fprop | 2c:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
75 | fprop | 2c:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_collect_statistics_kernel |
76 | fprop | 2c:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_transform_input_kernel |
77 | fprop | 2c:Residual | __iadd__ | T=[(32,256,56,56),(32,256,56,56)] | modern::elementwise_kernel |
78 | fprop | 2c:ReLU3 | relu | T=(32,256,56,56) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
79 | fprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
80 | fprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
81 | fprop | 3a:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
82 | fprop | 3a:BN1 | batch_norm | T=(32,128,56,56) | batch_norm_collect_statistics_kernel |
83 | fprop | 3a:BN1 | batch_norm | T=(32,128,56,56) | batch_norm_transform_input_kernel |
84 | fprop | 3a:ReLU1 | relu | T=(32,128,56,56) | modern::elementwise_kernel |
85 | fprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
86 | fprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
87 | fprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
88 | fprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
89 | fprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
90 | fprop | 3a:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
91 | fprop | 3a:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
92 | fprop | 3a:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
93 | fprop | 3a:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
94 | fprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
95 | fprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
96 | fprop | 3a:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
97 | fprop | 3a:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_collect_statistics_kernel |
98 | fprop | 3a:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_transform_input_kernel |
99 | fprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
100 | fprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
101 | fprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
102 | fprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
103 | fprop | 3a:Residual:Projection | __add__ | T=[(1,)] | legacy::elementwise_kernel |
104 | fprop | 3a:Residual:Projection | batch_norm | T=(32,512,28,28) | batch_norm_collect_statistics_kernel |
105 | fprop | 3a:Residual:Projection | batch_norm | T=(32,512,28,28) | batch_norm_transform_input_kernel |
106 | fprop | 3a:Residual | __iadd__ | T=[(32,512,28,28),(32,512,28,28)] | modern::elementwise_kernel |
107 | fprop | 3a:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
108 | fprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
109 | fprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
110 | fprop | 3b:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
111 | fprop | 3b:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
112 | fprop | 3b:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
113 | fprop | 3b:ReLU1 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
114 | fprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
115 | fprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
116 | fprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
117 | fprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
118 | fprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
119 | fprop | 3b:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
120 | fprop | 3b:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
121 | fprop | 3b:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
122 | fprop | 3b:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
123 | fprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
124 | fprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
125 | fprop | 3b:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
126 | fprop | 3b:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_collect_statistics_kernel |
127 | fprop | 3b:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_transform_input_kernel |
128 | fprop | 3b:Residual | __iadd__ | T=[(32,512,28,28),(32,512,28,28)] | modern::elementwise_kernel |
129 | fprop | 3b:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
130 | fprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
131 | fprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
132 | fprop | 3c:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
133 | fprop | 3c:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
134 | fprop | 3c:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
135 | fprop | 3c:ReLU1 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
136 | fprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
137 | fprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
138 | fprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
139 | fprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
140 | fprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
141 | fprop | 3c:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
142 | fprop | 3c:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
143 | fprop | 3c:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
144 | fprop | 3c:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
145 | fprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
146 | fprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
147 | fprop | 3c:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
148 | fprop | 3c:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_collect_statistics_kernel |
149 | fprop | 3c:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_transform_input_kernel |
150 | fprop | 3c:Residual | __iadd__ | T=[(32,512,28,28),(32,512,28,28)] | modern::elementwise_kernel |
151 | fprop | 3c:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
152 | fprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
153 | fprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
154 | fprop | 3d:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
155 | fprop | 3d:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
156 | fprop | 3d:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
157 | fprop | 3d:ReLU1 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
158 | fprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
159 | fprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
160 | fprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
161 | fprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
162 | fprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
163 | fprop | 3d:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
164 | fprop | 3d:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_collect_statistics_kernel |
165 | fprop | 3d:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_transform_input_kernel |
166 | fprop | 3d:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
167 | fprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
168 | fprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
169 | fprop | 3d:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
170 | fprop | 3d:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_collect_statistics_kernel |
171 | fprop | 3d:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_transform_input_kernel |
172 | fprop | 3d:Residual | __iadd__ | T=[(32,512,28,28),(32,512,28,28)] | modern::elementwise_kernel |
173 | fprop | 3d:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
174 | fprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | cask_cudnn::computeOffsetsKernel |
175 | fprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1 |
176 | fprop | 4a:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
177 | fprop | 4a:BN1 | batch_norm | T=(32,256,28,28) | batch_norm_collect_statistics_kernel |
178 | fprop | 4a:BN1 | batch_norm | T=(32,256,28,28) | batch_norm_transform_input_kernel |
179 | fprop | 4a:ReLU1 | relu | T=(32,256,28,28) | modern::elementwise_kernel |
180 | fprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
181 | fprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
182 | fprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
183 | fprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
184 | fprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
185 | fprop | 4a:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
186 | fprop | 4a:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
187 | fprop | 4a:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
188 | fprop | 4a:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
189 | fprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
190 | fprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
191 | fprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
192 | fprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
193 | fprop | 4a:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
194 | fprop | 4a:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
195 | fprop | 4a:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
196 | fprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
197 | fprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
198 | fprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
199 | fprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
200 | fprop | 4a:Residual:Projection | __add__ | T=[(1,)] | legacy::elementwise_kernel |
201 | fprop | 4a:Residual:Projection | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
202 | fprop | 4a:Residual:Projection | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
203 | fprop | 4a:Residual | __iadd__ | T=[(32,1024,14,14),(32,1024,14,14)] | modern::elementwise_kernel |
204 | fprop | 4a:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
205 | fprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
206 | fprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
207 | fprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
208 | fprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
209 | fprop | 4b:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
210 | fprop | 4b:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
211 | fprop | 4b:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
212 | fprop | 4b:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
213 | fprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
214 | fprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
215 | fprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
216 | fprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
217 | fprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
218 | fprop | 4b:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
219 | fprop | 4b:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
220 | fprop | 4b:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
221 | fprop | 4b:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
222 | fprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
223 | fprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
224 | fprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
225 | fprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
226 | fprop | 4b:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
227 | fprop | 4b:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
228 | fprop | 4b:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
229 | fprop | 4b:Residual | __iadd__ | T=[(32,1024,14,14),(32,1024,14,14)] | modern::elementwise_kernel |
230 | fprop | 4b:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
231 | fprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
232 | fprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
233 | fprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
234 | fprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
235 | fprop | 4c:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
236 | fprop | 4c:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
237 | fprop | 4c:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
238 | fprop | 4c:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
239 | fprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
240 | fprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
241 | fprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
242 | fprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
243 | fprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
244 | fprop | 4c:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
245 | fprop | 4c:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
246 | fprop | 4c:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
247 | fprop | 4c:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
248 | fprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
249 | fprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
250 | fprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
251 | fprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
252 | fprop | 4c:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
253 | fprop | 4c:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
254 | fprop | 4c:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
255 | fprop | 4c:Residual | __iadd__ | T=[(32,1024,14,14),(32,1024,14,14)] | modern::elementwise_kernel |
256 | fprop | 4c:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
257 | fprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
258 | fprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
259 | fprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
260 | fprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
261 | fprop | 4d:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
262 | fprop | 4d:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
263 | fprop | 4d:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
264 | fprop | 4d:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
265 | fprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
266 | fprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
267 | fprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
268 | fprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
269 | fprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
270 | fprop | 4d:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
271 | fprop | 4d:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
272 | fprop | 4d:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
273 | fprop | 4d:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
274 | fprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
275 | fprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
276 | fprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
277 | fprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
278 | fprop | 4d:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
279 | fprop | 4d:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
280 | fprop | 4d:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
281 | fprop | 4d:Residual | __iadd__ | T=[(32,1024,14,14),(32,1024,14,14)] | modern::elementwise_kernel |
282 | fprop | 4d:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
283 | fprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
284 | fprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
285 | fprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
286 | fprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
287 | fprop | 4e:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
288 | fprop | 4e:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
289 | fprop | 4e:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
290 | fprop | 4e:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
291 | fprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
292 | fprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
293 | fprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
294 | fprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
295 | fprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
296 | fprop | 4e:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
297 | fprop | 4e:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
298 | fprop | 4e:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
299 | fprop | 4e:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
300 | fprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
301 | fprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
302 | fprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
303 | fprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
304 | fprop | 4e:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
305 | fprop | 4e:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
306 | fprop | 4e:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
307 | fprop | 4e:Residual | __iadd__ | T=[(32,1024,14,14),(32,1024,14,14)] | modern::elementwise_kernel |
308 | fprop | 4e:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
309 | fprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
310 | fprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
311 | fprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
312 | fprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
313 | fprop | 4f:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
314 | fprop | 4f:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
315 | fprop | 4f:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
316 | fprop | 4f:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
317 | fprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
318 | fprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
319 | fprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
320 | fprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
321 | fprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
322 | fprop | 4f:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
323 | fprop | 4f:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_collect_statistics_kernel |
324 | fprop | 4f:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_transform_input_kernel |
325 | fprop | 4f:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
326 | fprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
327 | fprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
328 | fprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
329 | fprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
330 | fprop | 4f:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
331 | fprop | 4f:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_collect_statistics_kernel |
332 | fprop | 4f:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_transform_input_kernel |
333 | fprop | 4f:Residual | __iadd__ | T=[(32,1024,14,14),(32,1024,14,14)] | modern::elementwise_kernel |
334 | fprop | 4f:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
335 | fprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
336 | fprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
337 | fprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
338 | fprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
339 | fprop | 5a:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
340 | fprop | 5a:BN1 | batch_norm | T=(32,512,14,14) | batch_norm_collect_statistics_kernel |
341 | fprop | 5a:BN1 | batch_norm | T=(32,512,14,14) | batch_norm_transform_input_kernel |
342 | fprop | 5a:ReLU1 | relu | T=(32,512,14,14) | modern::elementwise_kernel |
343 | fprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
344 | fprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
345 | fprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
346 | fprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
347 | fprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
348 | fprop | 5a:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
349 | fprop | 5a:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_collect_statistics_kernel |
350 | fprop | 5a:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_transform_input_kernel |
351 | fprop | 5a:ReLU2 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
352 | fprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
353 | fprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
354 | fprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
355 | fprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
356 | fprop | 5a:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
357 | fprop | 5a:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_collect_statistics_kernel |
358 | fprop | 5a:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_transform_input_kernel |
359 | fprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
360 | fprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | cudnn::gemm::computeOffsetsKernel |
361 | fprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
362 | fprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
363 | fprop | 5a:Residual:Projection | __add__ | T=[(1,)] | legacy::elementwise_kernel |
364 | fprop | 5a:Residual:Projection | batch_norm | T=(32,2048,7,7) | batch_norm_collect_statistics_kernel |
365 | fprop | 5a:Residual:Projection | batch_norm | T=(32,2048,7,7) | batch_norm_transform_input_kernel |
366 | fprop | 5a:Residual | __iadd__ | T=[(32,2048,7,7),(32,2048,7,7)] | modern::elementwise_kernel |
367 | fprop | 5a:ReLU3 | relu | T=(32,2048,7,7) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
368 | fprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
369 | fprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
370 | fprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
371 | fprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
372 | fprop | 5b:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
373 | fprop | 5b:BN1 | batch_norm | T=(32,512,7,7) | batch_norm_collect_statistics_kernel |
374 | fprop | 5b:BN1 | batch_norm | T=(32,512,7,7) | batch_norm_transform_input_kernel |
375 | fprop | 5b:ReLU1 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
376 | fprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
377 | fprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
378 | fprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
379 | fprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
380 | fprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
381 | fprop | 5b:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
382 | fprop | 5b:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_collect_statistics_kernel |
383 | fprop | 5b:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_transform_input_kernel |
384 | fprop | 5b:ReLU2 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
385 | fprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
386 | fprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
387 | fprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
388 | fprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
389 | fprop | 5b:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
390 | fprop | 5b:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_collect_statistics_kernel |
391 | fprop | 5b:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_transform_input_kernel |
392 | fprop | 5b:Residual | __iadd__ | T=[(32,2048,7,7),(32,2048,7,7)] | modern::elementwise_kernel |
393 | fprop | 5b:ReLU3 | relu | T=(32,2048,7,7) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
394 | fprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
395 | fprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
396 | fprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
397 | fprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
398 | fprop | 5c:BN1 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
399 | fprop | 5c:BN1 | batch_norm | T=(32,512,7,7) | batch_norm_collect_statistics_kernel |
400 | fprop | 5c:BN1 | batch_norm | T=(32,512,7,7) | batch_norm_transform_input_kernel |
401 | fprop | 5c:ReLU1 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
402 | fprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
403 | fprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
404 | fprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
405 | fprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1 |
406 | fprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
407 | fprop | 5c:BN2 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
408 | fprop | 5c:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_collect_statistics_kernel |
409 | fprop | 5c:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_transform_input_kernel |
410 | fprop | 5c:ReLU2 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
411 | fprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
412 | fprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
413 | fprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1 |
414 | fprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
415 | fprop | 5c:BN3 | __add__ | T=[(1,)] | legacy::elementwise_kernel |
416 | fprop | 5c:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_collect_statistics_kernel |
417 | fprop | 5c:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_transform_input_kernel |
418 | fprop | 5c:Residual | __iadd__ | T=[(32,2048,7,7),(32,2048,7,7)] | modern::elementwise_kernel |
419 | fprop | 5c:ReLU3 | relu | T=(32,2048,7,7) | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
420 | fprop | - | adaptive_avg_pool2d | T=[(32,2048,7,7)] | reduce_kernel |
421 | fprop | FC | bias | M=1000,N=32 | legacy::elementwise_kernel |
422 | fprop | FC | linear | M=1000,N=32,K=2048 | turing_fp16_s1688gemm_fp16_256x64_ldg8_f2f_tn |
423 | fprop | FC | linear | M=1000,N=32,K=2048 | splitKreduce_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
424 | fprop | - | cross_entropy | T=[(32,1000),(32,)],[,] | softmax_warp_forward |
425 | fprop | - | cross_entropy | T=[(32,1000),(32,)],[,] | cunn_ClassNLLCriterion_updateOutput_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
426 | fprop | - | zero_ | T=[(64,3,7,7)] | modern::elementwise_kernel |
427 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
428 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
429 | fprop | - | zero_ | T=[(64,64,1,1)] | modern::elementwise_kernel |
430 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
431 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
432 | fprop | - | zero_ | T=[(64,64,3,3)] | modern::elementwise_kernel |
433 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
434 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
435 | fprop | - | zero_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
436 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
437 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
438 | fprop | - | zero_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
439 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
440 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
441 | fprop | - | zero_ | T=[(64,256,1,1)] | modern::elementwise_kernel |
442 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
443 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
444 | fprop | - | zero_ | T=[(64,64,3,3)] | modern::elementwise_kernel |
445 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
446 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
447 | fprop | - | zero_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
448 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
449 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
450 | fprop | - | zero_ | T=[(64,256,1,1)] | modern::elementwise_kernel |
451 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
452 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
453 | fprop | - | zero_ | T=[(64,64,3,3)] | modern::elementwise_kernel |
454 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
455 | fprop | - | zero_ | T=[(64,)] | modern::elementwise_kernel |
456 | fprop | - | zero_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
457 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
458 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
459 | fprop | - | zero_ | T=[(128,256,1,1)] | modern::elementwise_kernel |
460 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
461 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
462 | fprop | - | zero_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
463 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
464 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
465 | fprop | - | zero_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
466 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
467 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
468 | fprop | - | zero_ | T=[(512,256,1,1)] | modern::elementwise_kernel |
469 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
470 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
471 | fprop | - | zero_ | T=[(128,512,1,1)] | modern::elementwise_kernel |
472 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
473 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
474 | fprop | - | zero_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
475 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
476 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
477 | fprop | - | zero_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
478 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
479 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
480 | fprop | - | zero_ | T=[(128,512,1,1)] | modern::elementwise_kernel |
481 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
482 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
483 | fprop | - | zero_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
484 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
485 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
486 | fprop | - | zero_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
487 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
488 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
489 | fprop | - | zero_ | T=[(128,512,1,1)] | modern::elementwise_kernel |
490 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
491 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
492 | fprop | - | zero_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
493 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
494 | fprop | - | zero_ | T=[(128,)] | modern::elementwise_kernel |
495 | fprop | - | zero_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
496 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
497 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
498 | fprop | - | zero_ | T=[(256,512,1,1)] | modern::elementwise_kernel |
499 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
500 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
501 | fprop | - | zero_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
502 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
503 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
504 | fprop | - | zero_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
505 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
506 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
507 | fprop | - | zero_ | T=[(1024,512,1,1)] | modern::elementwise_kernel |
508 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
509 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
510 | fprop | - | zero_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
511 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
512 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
513 | fprop | - | zero_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
514 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
515 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
516 | fprop | - | zero_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
517 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
518 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
519 | fprop | - | zero_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
520 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
521 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
522 | fprop | - | zero_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
523 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
524 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
525 | fprop | - | zero_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
526 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
527 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
528 | fprop | - | zero_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
529 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
530 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
531 | fprop | - | zero_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
532 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
533 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
534 | fprop | - | zero_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
535 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
536 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
537 | fprop | - | zero_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
538 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
539 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
540 | fprop | - | zero_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
541 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
542 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
543 | fprop | - | zero_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
544 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
545 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
546 | fprop | - | zero_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
547 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
548 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
549 | fprop | - | zero_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
550 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
551 | fprop | - | zero_ | T=[(256,)] | modern::elementwise_kernel |
552 | fprop | - | zero_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
553 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
554 | fprop | - | zero_ | T=[(1024,)] | modern::elementwise_kernel |
555 | fprop | - | zero_ | T=[(512,1024,1,1)] | modern::elementwise_kernel |
556 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
557 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
558 | fprop | - | zero_ | T=[(512,512,3,3)] | modern::elementwise_kernel |
559 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
560 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
561 | fprop | - | zero_ | T=[(2048,512,1,1)] | modern::elementwise_kernel |
562 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
563 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
564 | fprop | - | zero_ | T=[(2048,1024,1,1)] | modern::elementwise_kernel |
565 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
566 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
567 | fprop | - | zero_ | T=[(512,2048,1,1)] | modern::elementwise_kernel |
568 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
569 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
570 | fprop | - | zero_ | T=[(512,512,3,3)] | modern::elementwise_kernel |
571 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
572 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
573 | fprop | - | zero_ | T=[(2048,512,1,1)] | modern::elementwise_kernel |
574 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
575 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
576 | fprop | - | zero_ | T=[(512,2048,1,1)] | modern::elementwise_kernel |
577 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
578 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
579 | fprop | - | zero_ | T=[(512,512,3,3)] | modern::elementwise_kernel |
580 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
581 | fprop | - | zero_ | T=[(512,)] | modern::elementwise_kernel |
582 | fprop | - | zero_ | T=[(2048,512,1,1)] | modern::elementwise_kernel |
583 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
584 | fprop | - | zero_ | T=[(2048,)] | modern::elementwise_kernel |
585 | fprop | - | zero_ | T=[(1000,2048)] | modern::elementwise_kernel |
586 | fprop | - | zero_ | T=[(1000,)] | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
587 | fprop | - | backward | T=, | legacy::elementwise_kernel |
588 | bprop | - | cross_entropy | T=[(32,1000),(32,)],[,] | cunn_ClassNLLCriterion_updateGradInput_kernel |
589 | bprop | - | cross_entropy | T=[(32,1000),(32,)],[,] | softmax_warp_backward |
590 | bprop | FC | linear | M=2048,N=32,K=1000 | turing_fp16_s1688gemm_fp16_256x64_ldg8_f2f_nn |
591 | bprop | FC | linear | M=2048,N=1000,K=32 | splitKreduce_kernel |
592 | bprop | FC | linear | X=(32,2048),W=(1000,2048) | turing_fp16_s1688gemm_fp16_128x128_ldg8_f2f_nt |
593 | fprop | - | sum | na | reduce_kernel |
594 | fprop | - | add_ | na | modern::elementwise_kernel |
595 | fprop | - | add_ | na | modern::elementwise_kernel |
596 | fprop | - | div | na | legacy::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
597 | bprop | 5c:ReLU3 | relu | T=(32,2048,7,7) | modern::elementwise_kernel |
598 | bprop | 5c:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_backward_kernel |
599 | fprop | - | add_ | na | modern::elementwise_kernel |
600 | fprop | - | add_ | na | modern::elementwise_kernel |
601 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
602 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
603 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
604 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
605 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
606 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
607 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
608 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | scalePackedTensor_kernel |
609 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
610 | bprop | 5c:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
611 | fprop | - | add_ | na | modern::elementwise_kernel |
612 | bprop | 5c:ReLU2 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
613 | bprop | 5c:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_backward_kernel |
614 | fprop | - | add_ | na | modern::elementwise_kernel |
615 | fprop | - | add_ | na | modern::elementwise_kernel |
616 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
617 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
618 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
619 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
620 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
621 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
622 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
623 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
624 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
625 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
626 | bprop | 5c:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
627 | fprop | - | add_ | na | modern::elementwise_kernel |
628 | bprop | 5c:ReLU1 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
629 | bprop | 5c:BN1 | batch_norm | T=(32,512,7,7) | batch_norm_backward_kernel |
630 | fprop | - | add_ | na | modern::elementwise_kernel |
631 | fprop | - | add_ | na | modern::elementwise_kernel |
632 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
633 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
634 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
635 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
636 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
637 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
638 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
639 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | scalePackedTensor_kernel |
640 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
641 | bprop | 5c:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
642 | fprop | - | add | na | modern::elementwise_kernel |
643 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
644 | bprop | 5b:ReLU3 | relu | T=(32,2048,7,7) | modern::elementwise_kernel |
645 | bprop | 5b:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_backward_kernel |
646 | fprop | - | add_ | na | modern::elementwise_kernel |
647 | fprop | - | add_ | na | modern::elementwise_kernel |
648 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
649 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
650 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
651 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
652 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
653 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
654 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
655 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | scalePackedTensor_kernel |
656 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
657 | bprop | 5b:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
658 | fprop | - | add_ | na | modern::elementwise_kernel |
659 | bprop | 5b:ReLU2 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
660 | bprop | 5b:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_backward_kernel |
661 | fprop | - | add_ | na | modern::elementwise_kernel |
662 | fprop | - | add_ | na | modern::elementwise_kernel |
663 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
664 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
665 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
666 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
667 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
668 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
669 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
670 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
671 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
672 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
673 | bprop | 5b:Conv2 | conv2d | N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
674 | fprop | - | add_ | na | modern::elementwise_kernel |
675 | bprop | 5b:ReLU1 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
676 | bprop | 5b:BN1 | batch_norm | T=(32,512,7,7) | batch_norm_backward_kernel |
677 | fprop | - | add_ | na | modern::elementwise_kernel |
678 | fprop | - | add_ | na | modern::elementwise_kernel |
679 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
680 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
681 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
682 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
683 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
684 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
685 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
686 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | scalePackedTensor_kernel |
687 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
688 | bprop | 5b:Conv1 | conv2d | N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
689 | fprop | - | add | na | modern::elementwise_kernel |
690 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
691 | bprop | 5a:ReLU3 | relu | T=(32,2048,7,7) | modern::elementwise_kernel |
692 | bprop | 5a:Residual:Projection | batch_norm | T=(32,2048,7,7) | batch_norm_backward_kernel |
693 | fprop | - | add_ | na | modern::elementwise_kernel |
694 | fprop | - | add_ | na | modern::elementwise_kernel |
695 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
696 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | dgrad_1x1_stride_2x2 |
697 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
698 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
699 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
700 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | cudnn::gemm::computeWgradOffsetsKernel |
701 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | scalePackedTensor_kernel |
702 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
703 | bprop | 5a:Residual:Projection | conv2d | N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
704 | fprop | - | add_ | na | modern::elementwise_kernel |
705 | bprop | 5a:BN3 | batch_norm | T=(32,2048,7,7) | batch_norm_backward_kernel |
706 | fprop | - | add_ | na | modern::elementwise_kernel |
707 | fprop | - | add_ | na | modern::elementwise_kernel |
708 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
709 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
710 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
711 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
712 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
713 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nchwToNhwcKernel |
714 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
715 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | scalePackedTensor_kernel |
716 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
717 | bprop | 5a:Conv3 | conv2d | N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 | nhwcToNchwKernel |
718 | fprop | - | add_ | na | modern::elementwise_kernel |
719 | bprop | 5a:ReLU2 | relu | T=(32,512,7,7) | modern::elementwise_kernel |
720 | bprop | 5a:BN2 | batch_norm | T=(32,512,7,7) | batch_norm_backward_kernel |
721 | fprop | - | add_ | na | modern::elementwise_kernel |
722 | fprop | - | add_ | na | modern::elementwise_kernel |
723 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
724 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
725 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | dgrad_1d |
726 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
727 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
728 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
729 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | cudnn::gemm::computeWgradOffsetsKernel |
730 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | scalePackedTensor_kernel |
731 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
732 | bprop | 5a:Conv2 | conv2d | N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
733 | fprop | - | add_ | na | modern::elementwise_kernel |
734 | bprop | 5a:ReLU1 | relu | T=(32,512,14,14) | modern::elementwise_kernel |
735 | bprop | 5a:BN1 | batch_norm | T=(32,512,14,14) | batch_norm_backward_kernel |
736 | fprop | - | add_ | na | modern::elementwise_kernel |
737 | fprop | - | add_ | na | modern::elementwise_kernel |
738 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
739 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
740 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
741 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
742 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
743 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
744 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
745 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
746 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
747 | bprop | 5a:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
748 | fprop | - | add | na | modern::elementwise_kernel |
749 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
750 | bprop | 4f:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
751 | bprop | 4f:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
752 | fprop | - | add_ | na | modern::elementwise_kernel |
753 | fprop | - | add_ | na | modern::elementwise_kernel |
754 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
755 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
756 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
757 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
758 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
759 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
760 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
761 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
762 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
763 | bprop | 4f:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
764 | fprop | - | add_ | na | modern::elementwise_kernel |
765 | bprop | 4f:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
766 | bprop | 4f:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
767 | fprop | - | add_ | na | modern::elementwise_kernel |
768 | fprop | - | add_ | na | modern::elementwise_kernel |
769 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
770 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
771 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
772 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
773 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
774 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
775 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
776 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
777 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
778 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
779 | bprop | 4f:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
780 | fprop | - | add_ | na | modern::elementwise_kernel |
781 | bprop | 4f:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
782 | bprop | 4f:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
783 | fprop | - | add_ | na | modern::elementwise_kernel |
784 | fprop | - | add_ | na | modern::elementwise_kernel |
785 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
786 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
787 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
788 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
789 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
790 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
791 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
792 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
793 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
794 | bprop | 4f:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
795 | fprop | - | add | na | modern::elementwise_kernel |
796 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
797 | bprop | 4e:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
798 | bprop | 4e:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
799 | fprop | - | add_ | na | modern::elementwise_kernel |
800 | fprop | - | add_ | na | modern::elementwise_kernel |
801 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
802 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
803 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
804 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
805 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
806 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
807 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
808 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
809 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
810 | bprop | 4e:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
811 | fprop | - | add_ | na | modern::elementwise_kernel |
812 | bprop | 4e:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
813 | bprop | 4e:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
814 | fprop | - | add_ | na | modern::elementwise_kernel |
815 | fprop | - | add_ | na | modern::elementwise_kernel |
816 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
817 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
818 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
819 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
820 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
821 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
822 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
823 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
824 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
825 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
826 | bprop | 4e:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
827 | fprop | - | add_ | na | modern::elementwise_kernel |
828 | bprop | 4e:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
829 | bprop | 4e:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
830 | fprop | - | add_ | na | modern::elementwise_kernel |
831 | fprop | - | add_ | na | modern::elementwise_kernel |
832 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
833 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
834 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
835 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
836 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
837 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
838 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
839 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
840 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
841 | bprop | 4e:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
842 | fprop | - | add | na | modern::elementwise_kernel |
843 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
844 | bprop | 4d:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
845 | bprop | 4d:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
846 | fprop | - | add_ | na | modern::elementwise_kernel |
847 | fprop | - | add_ | na | modern::elementwise_kernel |
848 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
849 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
850 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
851 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
852 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
853 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
854 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
855 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
856 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
857 | bprop | 4d:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
858 | fprop | - | add_ | na | modern::elementwise_kernel |
859 | bprop | 4d:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
860 | bprop | 4d:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
861 | fprop | - | add_ | na | modern::elementwise_kernel |
862 | fprop | - | add_ | na | modern::elementwise_kernel |
863 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
864 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
865 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
866 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
867 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
868 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
869 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
870 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
871 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
872 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
873 | bprop | 4d:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
874 | fprop | - | add_ | na | modern::elementwise_kernel |
875 | bprop | 4d:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
876 | bprop | 4d:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
877 | fprop | - | add_ | na | modern::elementwise_kernel |
878 | fprop | - | add_ | na | modern::elementwise_kernel |
879 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
880 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
881 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
882 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
883 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
884 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
885 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
886 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
887 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
888 | bprop | 4d:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
889 | fprop | - | add | na | modern::elementwise_kernel |
890 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
891 | bprop | 4c:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
892 | bprop | 4c:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
893 | fprop | - | add_ | na | modern::elementwise_kernel |
894 | fprop | - | add_ | na | modern::elementwise_kernel |
895 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
896 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
897 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
898 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
899 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
900 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
901 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
902 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
903 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
904 | bprop | 4c:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
905 | fprop | - | add_ | na | modern::elementwise_kernel |
906 | bprop | 4c:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
907 | bprop | 4c:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
908 | fprop | - | add_ | na | modern::elementwise_kernel |
909 | fprop | - | add_ | na | modern::elementwise_kernel |
910 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
911 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
912 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
913 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
914 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
915 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
916 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
917 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
918 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
919 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
920 | bprop | 4c:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
921 | fprop | - | add_ | na | modern::elementwise_kernel |
922 | bprop | 4c:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
923 | bprop | 4c:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
924 | fprop | - | add_ | na | modern::elementwise_kernel |
925 | fprop | - | add_ | na | modern::elementwise_kernel |
926 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
927 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
928 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
929 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
930 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
931 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
932 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
933 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
934 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
935 | bprop | 4c:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
936 | fprop | - | add | na | modern::elementwise_kernel |
937 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
938 | bprop | 4b:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
939 | bprop | 4b:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
940 | fprop | - | add_ | na | modern::elementwise_kernel |
941 | fprop | - | add_ | na | modern::elementwise_kernel |
942 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
943 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
944 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
945 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
946 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
947 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
948 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
949 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
950 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
951 | bprop | 4b:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
952 | fprop | - | add_ | na | modern::elementwise_kernel |
953 | bprop | 4b:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
954 | bprop | 4b:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
955 | fprop | - | add_ | na | modern::elementwise_kernel |
956 | fprop | - | add_ | na | modern::elementwise_kernel |
957 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
958 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
959 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
960 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
961 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
962 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
963 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
964 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
965 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
966 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
967 | bprop | 4b:Conv2 | conv2d | N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
968 | fprop | - | add_ | na | modern::elementwise_kernel |
969 | bprop | 4b:ReLU1 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
970 | bprop | 4b:BN1 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
971 | fprop | - | add_ | na | modern::elementwise_kernel |
972 | fprop | - | add_ | na | modern::elementwise_kernel |
973 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
974 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
975 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
976 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
977 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
978 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
979 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
980 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
981 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
982 | bprop | 4b:Conv1 | conv2d | N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
983 | fprop | - | add | na | modern::elementwise_kernel |
984 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
985 | bprop | 4a:ReLU3 | relu | T=(32,1024,14,14) | modern::elementwise_kernel |
986 | bprop | 4a:Residual:Projection | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
987 | fprop | - | add_ | na | modern::elementwise_kernel |
988 | fprop | - | add_ | na | modern::elementwise_kernel |
989 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
990 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | dgrad_1x1_stride_2x2 |
991 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
992 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
993 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
994 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | cudnn::gemm::computeWgradOffsetsKernel |
995 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | scalePackedTensor_kernel |
996 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
997 | bprop | 4a:Residual:Projection | conv2d | N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
998 | fprop | - | add_ | na | modern::elementwise_kernel |
999 | bprop | 4a:BN3 | batch_norm | T=(32,1024,14,14) | batch_norm_backward_kernel |
1000 | fprop | - | add_ | na | modern::elementwise_kernel |
1001 | fprop | - | add_ | na | modern::elementwise_kernel |
1002 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
1003 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1004 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1005 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1006 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
1007 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nchwToNhwcKernel |
1008 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1009 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | scalePackedTensor_kernel |
1010 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1011 | bprop | 4a:Conv3 | conv2d | N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 | nhwcToNchwKernel |
1012 | fprop | - | add_ | na | modern::elementwise_kernel |
1013 | bprop | 4a:ReLU2 | relu | T=(32,256,14,14) | modern::elementwise_kernel |
1014 | bprop | 4a:BN2 | batch_norm | T=(32,256,14,14) | batch_norm_backward_kernel |
1015 | fprop | - | add_ | na | modern::elementwise_kernel |
1016 | fprop | - | add_ | na | modern::elementwise_kernel |
1017 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1018 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1019 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | dgrad_1d |
1020 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
1021 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1022 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1023 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | cudnn::gemm::computeWgradOffsetsKernel |
1024 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | scalePackedTensor_kernel |
1025 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1026 | bprop | 4a:Conv2 | conv2d | N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
1027 | fprop | - | add_ | na | modern::elementwise_kernel |
1028 | bprop | 4a:ReLU1 | relu | T=(32,256,28,28) | modern::elementwise_kernel |
1029 | bprop | 4a:BN1 | batch_norm | T=(32,256,28,28) | batch_norm_backward_kernel |
1030 | fprop | - | add_ | na | modern::elementwise_kernel |
1031 | fprop | - | add_ | na | modern::elementwise_kernel |
1032 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1033 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1034 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1035 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1036 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1037 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1038 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1039 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1040 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1041 | bprop | 4a:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1042 | fprop | - | add | na | modern::elementwise_kernel |
1043 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1044 | bprop | 3d:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
1045 | bprop | 3d:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_backward_kernel |
1046 | fprop | - | add_ | na | modern::elementwise_kernel |
1047 | fprop | - | add_ | na | modern::elementwise_kernel |
1048 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1049 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1050 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1051 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1052 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1053 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1054 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1055 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1056 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1057 | bprop | 3d:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1058 | fprop | - | add_ | na | modern::elementwise_kernel |
1059 | bprop | 3d:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1060 | bprop | 3d:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1061 | fprop | - | add_ | na | modern::elementwise_kernel |
1062 | fprop | - | add_ | na | modern::elementwise_kernel |
1063 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1064 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1065 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
1066 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
1067 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
1068 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1069 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1070 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
1071 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
1072 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1073 | bprop | 3d:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
1074 | fprop | - | add_ | na | modern::elementwise_kernel |
1075 | bprop | 3d:ReLU1 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1076 | bprop | 3d:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1077 | fprop | - | add_ | na | modern::elementwise_kernel |
1078 | fprop | - | add_ | na | modern::elementwise_kernel |
1079 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1080 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1081 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1082 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1083 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1084 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1085 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1086 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1087 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1088 | bprop | 3d:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1089 | fprop | - | add | na | modern::elementwise_kernel |
1090 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1091 | bprop | 3c:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
1092 | bprop | 3c:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_backward_kernel |
1093 | fprop | - | add_ | na | modern::elementwise_kernel |
1094 | fprop | - | add_ | na | modern::elementwise_kernel |
1095 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1096 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1097 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1098 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1099 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1100 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1101 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1102 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1103 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1104 | bprop | 3c:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1105 | fprop | - | add_ | na | modern::elementwise_kernel |
1106 | bprop | 3c:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1107 | bprop | 3c:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1108 | fprop | - | add_ | na | modern::elementwise_kernel |
1109 | fprop | - | add_ | na | modern::elementwise_kernel |
1110 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1111 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1112 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
1113 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
1114 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
1115 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1116 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1117 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
1118 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
1119 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1120 | bprop | 3c:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
1121 | fprop | - | add_ | na | modern::elementwise_kernel |
1122 | bprop | 3c:ReLU1 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1123 | bprop | 3c:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1124 | fprop | - | add_ | na | modern::elementwise_kernel |
1125 | fprop | - | add_ | na | modern::elementwise_kernel |
1126 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1127 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1128 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1129 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1130 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1131 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1132 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1133 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1134 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1135 | bprop | 3c:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1136 | fprop | - | add | na | modern::elementwise_kernel |
1137 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1138 | bprop | 3b:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
1139 | bprop | 3b:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_backward_kernel |
1140 | fprop | - | add_ | na | modern::elementwise_kernel |
1141 | fprop | - | add_ | na | modern::elementwise_kernel |
1142 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1143 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1144 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1145 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1146 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1147 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1148 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1149 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1150 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1151 | bprop | 3b:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1152 | fprop | - | add_ | na | modern::elementwise_kernel |
1153 | bprop | 3b:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1154 | bprop | 3b:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1155 | fprop | - | add_ | na | modern::elementwise_kernel |
1156 | fprop | - | add_ | na | modern::elementwise_kernel |
1157 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1158 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1159 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
1160 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
1161 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
1162 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1163 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1164 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
1165 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
1166 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1167 | bprop | 3b:Conv2 | conv2d | N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
1168 | fprop | - | add_ | na | modern::elementwise_kernel |
1169 | bprop | 3b:ReLU1 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1170 | bprop | 3b:BN1 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1171 | fprop | - | add_ | na | modern::elementwise_kernel |
1172 | fprop | - | add_ | na | modern::elementwise_kernel |
1173 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1174 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1175 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1176 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1177 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1178 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1179 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1180 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1181 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1182 | bprop | 3b:Conv1 | conv2d | N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1183 | fprop | - | add | na | modern::elementwise_kernel |
1184 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1185 | bprop | 3a:ReLU3 | relu | T=(32,512,28,28) | modern::elementwise_kernel |
1186 | bprop | 3a:Residual:Projection | batch_norm | T=(32,512,28,28) | batch_norm_backward_kernel |
1187 | fprop | - | add_ | na | modern::elementwise_kernel |
1188 | fprop | - | add_ | na | modern::elementwise_kernel |
1189 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
1190 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | dgrad_1x1_stride_2x2 |
1191 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
1192 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
1193 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nchwToNhwcKernel |
1194 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | cudnn::gemm::computeWgradOffsetsKernel |
1195 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | scalePackedTensor_kernel |
1196 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1197 | bprop | 3a:Residual:Projection | conv2d | N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 | nhwcToNchwKernel |
1198 | fprop | - | add_ | na | modern::elementwise_kernel |
1199 | bprop | 3a:BN3 | batch_norm | T=(32,512,28,28) | batch_norm_backward_kernel |
1200 | fprop | - | add_ | na | modern::elementwise_kernel |
1201 | fprop | - | add_ | na | modern::elementwise_kernel |
1202 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1203 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1204 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1205 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1206 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1207 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nchwToNhwcKernel |
1208 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1209 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | scalePackedTensor_kernel |
1210 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1211 | bprop | 3a:Conv3 | conv2d | N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 | nhwcToNchwKernel |
1212 | fprop | - | add_ | na | modern::elementwise_kernel |
1213 | bprop | 3a:ReLU2 | relu | T=(32,128,28,28) | modern::elementwise_kernel |
1214 | bprop | 3a:BN2 | batch_norm | T=(32,128,28,28) | batch_norm_backward_kernel |
1215 | fprop | - | add_ | na | modern::elementwise_kernel |
1216 | fprop | - | add_ | na | modern::elementwise_kernel |
1217 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1218 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1219 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | dgrad_2d |
1220 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
1221 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1222 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nchwToNhwcKernel |
1223 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | cudnn::gemm::computeWgradOffsetsKernel |
1224 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | scalePackedTensor_kernel |
1225 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1226 | bprop | 3a:Conv2 | conv2d | N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 | nhwcToNchwKernel |
1227 | fprop | - | add_ | na | modern::elementwise_kernel |
1228 | bprop | 3a:ReLU1 | relu | T=(32,128,56,56) | modern::elementwise_kernel |
1229 | bprop | 3a:BN1 | batch_norm | T=(32,128,56,56) | batch_norm_backward_kernel |
1230 | fprop | - | add_ | na | modern::elementwise_kernel |
1231 | fprop | - | add_ | na | modern::elementwise_kernel |
1232 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1233 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1234 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1235 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1236 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1237 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1238 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1239 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1240 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1241 | bprop | 3a:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1242 | fprop | - | add | na | modern::elementwise_kernel |
1243 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1244 | bprop | 2c:ReLU3 | relu | T=(32,256,56,56) | modern::elementwise_kernel |
1245 | bprop | 2c:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_backward_kernel |
1246 | fprop | - | add_ | na | modern::elementwise_kernel |
1247 | fprop | - | add_ | na | modern::elementwise_kernel |
1248 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1249 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1250 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1251 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1252 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1253 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1254 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1255 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1256 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1257 | bprop | 2c:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1258 | fprop | - | add_ | na | modern::elementwise_kernel |
1259 | bprop | 2c:ReLU2 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
1260 | bprop | 2c:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_backward_kernel |
1261 | fprop | - | add_ | na | modern::elementwise_kernel |
1262 | fprop | - | add_ | na | modern::elementwise_kernel |
1263 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1264 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1265 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
1266 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
1267 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
1268 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1269 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1270 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
1271 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
1272 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1273 | bprop | 2c:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
1274 | fprop | - | add_ | na | modern::elementwise_kernel |
1275 | bprop | 2c:ReLU1 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
1276 | bprop | 2c:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_backward_kernel |
1277 | fprop | - | add_ | na | modern::elementwise_kernel |
1278 | fprop | - | add_ | na | modern::elementwise_kernel |
1279 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1280 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1281 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1282 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1283 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1284 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1285 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1286 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1287 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1288 | bprop | 2c:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1289 | fprop | - | add | na | modern::elementwise_kernel |
1290 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1291 | bprop | 2b:ReLU3 | relu | T=(32,256,56,56) | modern::elementwise_kernel |
1292 | bprop | 2b:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_backward_kernel |
1293 | fprop | - | add_ | na | modern::elementwise_kernel |
1294 | fprop | - | add_ | na | modern::elementwise_kernel |
1295 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1296 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1297 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1298 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1299 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1300 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1301 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1302 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1303 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1304 | bprop | 2b:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1305 | fprop | - | add_ | na | modern::elementwise_kernel |
1306 | bprop | 2b:ReLU2 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
1307 | bprop | 2b:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_backward_kernel |
1308 | fprop | - | add_ | na | modern::elementwise_kernel |
1309 | fprop | - | add_ | na | modern::elementwise_kernel |
1310 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1311 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1312 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
1313 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
1314 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
1315 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1316 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1317 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
1318 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
1319 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1320 | bprop | 2b:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
1321 | fprop | - | add_ | na | modern::elementwise_kernel |
1322 | bprop | 2b:ReLU1 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
1323 | bprop | 2b:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_backward_kernel |
1324 | fprop | - | add_ | na | modern::elementwise_kernel |
1325 | fprop | - | add_ | na | modern::elementwise_kernel |
1326 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1327 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1328 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1329 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1330 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1331 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1332 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1333 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1334 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1335 | bprop | 2b:Conv1 | conv2d | N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1336 | fprop | - | add | na | modern::elementwise_kernel |
1337 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1338 | bprop | 2a:ReLU3 | relu | T=(32,256,56,56) | modern::elementwise_kernel |
1339 | bprop | 2a:Residual:Projection | batch_norm | T=(32,256,56,56) | batch_norm_backward_kernel |
1340 | fprop | - | add_ | na | modern::elementwise_kernel |
1341 | fprop | - | add_ | na | modern::elementwise_kernel |
1342 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1343 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1344 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1345 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1346 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1347 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1348 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1349 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1350 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1351 | bprop | 2a:Residual:Projection | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1352 | fprop | - | add_ | na | modern::elementwise_kernel |
1353 | bprop | 2a:BN3 | batch_norm | T=(32,256,56,56) | batch_norm_backward_kernel |
1354 | fprop | - | add_ | na | modern::elementwise_kernel |
1355 | fprop | - | add_ | na | modern::elementwise_kernel |
1356 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1357 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1358 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1359 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1360 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1361 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1362 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1363 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1364 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1365 | bprop | 2a:Conv3 | conv2d | N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1366 | fprop | - | add_ | na | modern::elementwise_kernel |
1367 | bprop | 2a:ReLU2 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
1368 | bprop | 2a:BN2 | batch_norm | T=(32,64,56,56) | batch_norm_backward_kernel |
1369 | fprop | - | add_ | na | modern::elementwise_kernel |
1370 | fprop | - | add_ | na | modern::elementwise_kernel |
1371 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1372 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1373 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeOffsetsKernel |
1374 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeBOffsetsKernel |
1375 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1 |
1376 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1377 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nchwToNhwcKernel |
1378 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | cudnn::gemm::computeWgradOffsetsKernel |
1379 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | scalePackedTensor_kernel |
1380 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1381 | bprop | 2a:Conv2 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 | nhwcToNchwKernel |
1382 | fprop | - | add_ | na | modern::elementwise_kernel |
1383 | bprop | 2a:ReLU1 | relu | T=(32,64,56,56) | modern::elementwise_kernel |
1384 | bprop | 2a:BN1 | batch_norm | T=(32,64,56,56) | batch_norm_backward_kernel |
1385 | fprop | - | add_ | na | modern::elementwise_kernel |
1386 | fprop | - | add_ | na | modern::elementwise_kernel |
1387 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1388 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeOffsetsKernel |
1389 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeBOffsetsKernel |
1390 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1 |
1391 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1392 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nchwToNhwcKernel |
1393 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | cudnn::gemm::computeWgradOffsetsKernel |
1394 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | scalePackedTensor_kernel |
1395 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1 |
1396 | bprop | 2a:Conv1 | conv2d | N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 | nhwcToNchwKernel |
1397 | fprop | - | add | na | modern::elementwise_kernel |
1398 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1399 | bprop | block_1 | max_pool2d | T=[(32,64,112,112)] | modern::elementwise_kernel |
1400 | bprop | block_1 | max_pool2d | T=[(32,64,112,112)] | max_pool_backward_nchw |
1401 | bprop | block_1 | relu | T=(32,64,112,112) | modern::elementwise_kernel |
1402 | bprop | block_1 | batch_norm | T=(32,64,112,112) | batch_norm_backward_kernel |
1403 | fprop | - | add_ | na | modern::elementwise_kernel |
1404 | fprop | - | add_ | na | modern::elementwise_kernel |
1405 | bprop | block_1 | conv2d | N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 | nchwToNhwc3To4Kernel |
1406 | bprop | block_1 | conv2d | N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 | nchwToNhwcKernel |
1407 | bprop | block_1 | conv2d | N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 | cask_cudnn::first_layer_wgrad_kernel |
1408 | bprop | block_1 | conv2d | N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 | generic4To3Channel_kernel |
1409 | fprop | - | add_ | na | modern::elementwise_kernel |
Idx | Direction | Layer | Op | Params | Kernel |
---|---|---|---|---|---|
1410 | fprop | - | mul_ | T=[(64,3,7,7)] | modern::elementwise_kernel |
1411 | fprop | - | add_ | T=[(64,3,7,7),(64,3,7,7)] | modern::elementwise_kernel |
1412 | fprop | - | add_ | T=[(64,3,7,7),(64,3,7,7)] | modern::elementwise_kernel |
1413 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1414 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1415 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1416 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1417 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1418 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1419 | fprop | - | mul_ | T=[(64,64,1,1)] | modern::elementwise_kernel |
1420 | fprop | - | add_ | T=[(64,64,1,1),(64,64,1,1)] | modern::elementwise_kernel |
1421 | fprop | - | add_ | T=[(64,64,1,1),(64,64,1,1)] | modern::elementwise_kernel |
1422 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1423 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1424 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1425 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1426 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1427 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1428 | fprop | - | mul_ | T=[(64,64,3,3)] | modern::elementwise_kernel |
1429 | fprop | - | add_ | T=[(64,64,3,3),(64,64,3,3)] | modern::elementwise_kernel |
1430 | fprop | - | add_ | T=[(64,64,3,3),(64,64,3,3)] | modern::elementwise_kernel |
1431 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1432 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1433 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1434 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1435 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1436 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1437 | fprop | - | mul_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
1438 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1439 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1440 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1441 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1442 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1443 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1444 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1445 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1446 | fprop | - | mul_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
1447 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1448 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1449 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1450 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1451 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1452 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1453 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1454 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1455 | fprop | - | mul_ | T=[(64,256,1,1)] | modern::elementwise_kernel |
1456 | fprop | - | add_ | T=[(64,256,1,1),(64,256,1,1)] | modern::elementwise_kernel |
1457 | fprop | - | add_ | T=[(64,256,1,1),(64,256,1,1)] | modern::elementwise_kernel |
1458 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1459 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1460 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1461 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1462 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1463 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1464 | fprop | - | mul_ | T=[(64,64,3,3)] | modern::elementwise_kernel |
1465 | fprop | - | add_ | T=[(64,64,3,3),(64,64,3,3)] | modern::elementwise_kernel |
1466 | fprop | - | add_ | T=[(64,64,3,3),(64,64,3,3)] | modern::elementwise_kernel |
1467 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1468 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1469 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1470 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1471 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1472 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1473 | fprop | - | mul_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
1474 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1475 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1476 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1477 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1478 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1479 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1480 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1481 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1482 | fprop | - | mul_ | T=[(64,256,1,1)] | modern::elementwise_kernel |
1483 | fprop | - | add_ | T=[(64,256,1,1),(64,256,1,1)] | modern::elementwise_kernel |
1484 | fprop | - | add_ | T=[(64,256,1,1),(64,256,1,1)] | modern::elementwise_kernel |
1485 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1486 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1487 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1488 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1489 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1490 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1491 | fprop | - | mul_ | T=[(64,64,3,3)] | modern::elementwise_kernel |
1492 | fprop | - | add_ | T=[(64,64,3,3),(64,64,3,3)] | modern::elementwise_kernel |
1493 | fprop | - | add_ | T=[(64,64,3,3),(64,64,3,3)] | modern::elementwise_kernel |
1494 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1495 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1496 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1497 | fprop | - | mul_ | T=[(64,)] | modern::elementwise_kernel |
1498 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1499 | fprop | - | add_ | T=[(64,),(64,)] | modern::elementwise_kernel |
1500 | fprop | - | mul_ | T=[(256,64,1,1)] | modern::elementwise_kernel |
1501 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1502 | fprop | - | add_ | T=[(256,64,1,1),(256,64,1,1)] | modern::elementwise_kernel |
1503 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1504 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1505 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1506 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1507 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1508 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1509 | fprop | - | mul_ | T=[(128,256,1,1)] | modern::elementwise_kernel |
1510 | fprop | - | add_ | T=[(128,256,1,1),(128,256,1,1)] | modern::elementwise_kernel |
1511 | fprop | - | add_ | T=[(128,256,1,1),(128,256,1,1)] | modern::elementwise_kernel |
1512 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1513 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1514 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1515 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1516 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1517 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1518 | fprop | - | mul_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
1519 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1520 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1521 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1522 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1523 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1524 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1525 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1526 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1527 | fprop | - | mul_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
1528 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1529 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1530 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1531 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1532 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1533 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1534 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1535 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1536 | fprop | - | mul_ | T=[(512,256,1,1)] | modern::elementwise_kernel |
1537 | fprop | - | add_ | T=[(512,256,1,1),(512,256,1,1)] | modern::elementwise_kernel |
1538 | fprop | - | add_ | T=[(512,256,1,1),(512,256,1,1)] | modern::elementwise_kernel |
1539 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1540 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1541 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1542 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1543 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1544 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1545 | fprop | - | mul_ | T=[(128,512,1,1)] | modern::elementwise_kernel |
1546 | fprop | - | add_ | T=[(128,512,1,1),(128,512,1,1)] | modern::elementwise_kernel |
1547 | fprop | - | add_ | T=[(128,512,1,1),(128,512,1,1)] | modern::elementwise_kernel |
1548 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1549 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1550 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1551 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1552 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1553 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1554 | fprop | - | mul_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
1555 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1556 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1557 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1558 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1559 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1560 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1561 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1562 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1563 | fprop | - | mul_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
1564 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1565 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1566 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1567 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1568 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1569 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1570 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1571 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1572 | fprop | - | mul_ | T=[(128,512,1,1)] | modern::elementwise_kernel |
1573 | fprop | - | add_ | T=[(128,512,1,1),(128,512,1,1)] | modern::elementwise_kernel |
1574 | fprop | - | add_ | T=[(128,512,1,1),(128,512,1,1)] | modern::elementwise_kernel |
1575 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1576 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1577 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1578 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1579 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1580 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1581 | fprop | - | mul_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
1582 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1583 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1584 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1585 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1586 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1587 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1588 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1589 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1590 | fprop | - | mul_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
1591 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1592 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1593 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1594 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1595 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1596 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1597 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1598 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1599 | fprop | - | mul_ | T=[(128,512,1,1)] | modern::elementwise_kernel |
1600 | fprop | - | add_ | T=[(128,512,1,1),(128,512,1,1)] | modern::elementwise_kernel |
1601 | fprop | - | add_ | T=[(128,512,1,1),(128,512,1,1)] | modern::elementwise_kernel |
1602 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1603 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1604 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1605 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1606 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1607 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1608 | fprop | - | mul_ | T=[(128,128,3,3)] | modern::elementwise_kernel |
1609 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1610 | fprop | - | add_ | T=[(128,128,3,3),(128,128,3,3)] | modern::elementwise_kernel |
1611 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1612 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1613 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1614 | fprop | - | mul_ | T=[(128,)] | modern::elementwise_kernel |
1615 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1616 | fprop | - | add_ | T=[(128,),(128,)] | modern::elementwise_kernel |
1617 | fprop | - | mul_ | T=[(512,128,1,1)] | modern::elementwise_kernel |
1618 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1619 | fprop | - | add_ | T=[(512,128,1,1),(512,128,1,1)] | modern::elementwise_kernel |
1620 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1621 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1622 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1623 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1624 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1625 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1626 | fprop | - | mul_ | T=[(256,512,1,1)] | modern::elementwise_kernel |
1627 | fprop | - | add_ | T=[(256,512,1,1),(256,512,1,1)] | modern::elementwise_kernel |
1628 | fprop | - | add_ | T=[(256,512,1,1),(256,512,1,1)] | modern::elementwise_kernel |
1629 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1630 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1631 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1632 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1633 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1634 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1635 | fprop | - | mul_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
1636 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1637 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1638 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1639 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1640 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1641 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1642 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1643 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1644 | fprop | - | mul_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
1645 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1646 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1647 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1648 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1649 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1650 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1651 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1652 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1653 | fprop | - | mul_ | T=[(1024,512,1,1)] | modern::elementwise_kernel |
1654 | fprop | - | add_ | T=[(1024,512,1,1),(1024,512,1,1)] | modern::elementwise_kernel |
1655 | fprop | - | add_ | T=[(1024,512,1,1),(1024,512,1,1)] | modern::elementwise_kernel |
1656 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1657 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1658 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1659 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1660 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1661 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1662 | fprop | - | mul_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
1663 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1664 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1665 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1666 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1667 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1668 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1669 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1670 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1671 | fprop | - | mul_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
1672 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1673 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1674 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1675 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1676 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1677 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1678 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1679 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1680 | fprop | - | mul_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
1681 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1682 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1683 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1684 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1685 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1686 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1687 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1688 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1689 | fprop | - | mul_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
1690 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1691 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1692 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1693 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1694 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1695 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1696 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1697 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1698 | fprop | - | mul_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
1699 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1700 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1701 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1702 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1703 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1704 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1705 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1706 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1707 | fprop | - | mul_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
1708 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1709 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1710 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1711 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1712 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1713 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1714 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1715 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1716 | fprop | - | mul_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
1717 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1718 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1719 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1720 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1721 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1722 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1723 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1724 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1725 | fprop | - | mul_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
1726 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1727 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1728 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1729 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1730 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1731 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1732 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1733 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1734 | fprop | - | mul_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
1735 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1736 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1737 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1738 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1739 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1740 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1741 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1742 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1743 | fprop | - | mul_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
1744 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1745 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1746 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1747 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1748 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1749 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1750 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1751 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1752 | fprop | - | mul_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
1753 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1754 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1755 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1756 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1757 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1758 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1759 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1760 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1761 | fprop | - | mul_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
1762 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1763 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1764 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1765 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1766 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1767 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1768 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1769 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1770 | fprop | - | mul_ | T=[(256,1024,1,1)] | modern::elementwise_kernel |
1771 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1772 | fprop | - | add_ | T=[(256,1024,1,1),(256,1024,1,1)] | modern::elementwise_kernel |
1773 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1774 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1775 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1776 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1777 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1778 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1779 | fprop | - | mul_ | T=[(256,256,3,3)] | modern::elementwise_kernel |
1780 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1781 | fprop | - | add_ | T=[(256,256,3,3),(256,256,3,3)] | modern::elementwise_kernel |
1782 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1783 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1784 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1785 | fprop | - | mul_ | T=[(256,)] | modern::elementwise_kernel |
1786 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1787 | fprop | - | add_ | T=[(256,),(256,)] | modern::elementwise_kernel |
1788 | fprop | - | mul_ | T=[(1024,256,1,1)] | modern::elementwise_kernel |
1789 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1790 | fprop | - | add_ | T=[(1024,256,1,1),(1024,256,1,1)] | modern::elementwise_kernel |
1791 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1792 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1793 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1794 | fprop | - | mul_ | T=[(1024,)] | modern::elementwise_kernel |
1795 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1796 | fprop | - | add_ | T=[(1024,),(1024,)] | modern::elementwise_kernel |
1797 | fprop | - | mul_ | T=[(512,1024,1,1)] | modern::elementwise_kernel |
1798 | fprop | - | add_ | T=[(512,1024,1,1),(512,1024,1,1)] | modern::elementwise_kernel |
1799 | fprop | - | add_ | T=[(512,1024,1,1),(512,1024,1,1)] | modern::elementwise_kernel |
1800 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1801 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1802 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1803 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1804 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1805 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1806 | fprop | - | mul_ | T=[(512,512,3,3)] | modern::elementwise_kernel |
1807 | fprop | - | add_ | T=[(512,512,3,3),(512,512,3,3)] | modern::elementwise_kernel |
1808 | fprop | - | add_ | T=[(512,512,3,3),(512,512,3,3)] | modern::elementwise_kernel |
1809 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1810 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1811 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1812 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1813 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1814 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1815 | fprop | - | mul_ | T=[(2048,512,1,1)] | modern::elementwise_kernel |
1816 | fprop | - | add_ | T=[(2048,512,1,1),(2048,512,1,1)] | modern::elementwise_kernel |
1817 | fprop | - | add_ | T=[(2048,512,1,1),(2048,512,1,1)] | modern::elementwise_kernel |
1818 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1819 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1820 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1821 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1822 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1823 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1824 | fprop | - | mul_ | T=[(2048,1024,1,1)] | modern::elementwise_kernel |
1825 | fprop | - | add_ | T=[(2048,1024,1,1),(2048,1024,1,1)] | modern::elementwise_kernel |
1826 | fprop | - | add_ | T=[(2048,1024,1,1),(2048,1024,1,1)] | modern::elementwise_kernel |
1827 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1828 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1829 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1830 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1831 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1832 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1833 | fprop | - | mul_ | T=[(512,2048,1,1)] | modern::elementwise_kernel |
1834 | fprop | - | add_ | T=[(512,2048,1,1),(512,2048,1,1)] | modern::elementwise_kernel |
1835 | fprop | - | add_ | T=[(512,2048,1,1),(512,2048,1,1)] | modern::elementwise_kernel |
1836 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1837 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1838 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1839 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1840 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1841 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1842 | fprop | - | mul_ | T=[(512,512,3,3)] | modern::elementwise_kernel |
1843 | fprop | - | add_ | T=[(512,512,3,3),(512,512,3,3)] | modern::elementwise_kernel |
1844 | fprop | - | add_ | T=[(512,512,3,3),(512,512,3,3)] | modern::elementwise_kernel |
1845 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1846 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1847 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1848 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1849 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1850 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1851 | fprop | - | mul_ | T=[(2048,512,1,1)] | modern::elementwise_kernel |
1852 | fprop | - | add_ | T=[(2048,512,1,1),(2048,512,1,1)] | modern::elementwise_kernel |
1853 | fprop | - | add_ | T=[(2048,512,1,1),(2048,512,1,1)] | modern::elementwise_kernel |
1854 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1855 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1856 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1857 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1858 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1859 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1860 | fprop | - | mul_ | T=[(512,2048,1,1)] | modern::elementwise_kernel |
1861 | fprop | - | add_ | T=[(512,2048,1,1),(512,2048,1,1)] | modern::elementwise_kernel |
1862 | fprop | - | add_ | T=[(512,2048,1,1),(512,2048,1,1)] | modern::elementwise_kernel |
1863 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1864 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1865 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1866 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1867 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1868 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1869 | fprop | - | mul_ | T=[(512,512,3,3)] | modern::elementwise_kernel |
1870 | fprop | - | add_ | T=[(512,512,3,3),(512,512,3,3)] | modern::elementwise_kernel |
1871 | fprop | - | add_ | T=[(512,512,3,3),(512,512,3,3)] | modern::elementwise_kernel |
1872 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1873 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1874 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1875 | fprop | - | mul_ | T=[(512,)] | modern::elementwise_kernel |
1876 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1877 | fprop | - | add_ | T=[(512,),(512,)] | modern::elementwise_kernel |
1878 | fprop | - | mul_ | T=[(2048,512,1,1)] | modern::elementwise_kernel |
1879 | fprop | - | add_ | T=[(2048,512,1,1),(2048,512,1,1)] | modern::elementwise_kernel |
1880 | fprop | - | add_ | T=[(2048,512,1,1),(2048,512,1,1)] | modern::elementwise_kernel |
1881 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1882 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1883 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1884 | fprop | - | mul_ | T=[(2048,)] | modern::elementwise_kernel |
1885 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1886 | fprop | - | add_ | T=[(2048,),(2048,)] | modern::elementwise_kernel |
1887 | fprop | - | mul_ | T=[(1000,2048)] | modern::elementwise_kernel |
1888 | fprop | - | add_ | T=[(1000,2048),(1000,2048)] | modern::elementwise_kernel |
1889 | fprop | - | add_ | T=[(1000,2048),(1000,2048)] | modern::elementwise_kernel |
1890 | fprop | - | mul_ | T=[(1000,)] | modern::elementwise_kernel |
1891 | fprop | - | add_ | T=[(1000,),(1000,)] | modern::elementwise_kernel |
1892 | fprop | - | add_ | T=[(1000,),(1000,)] | modern::elementwise_kernel |