ResNet50

June 4, 2020

(work in progress)

ResNet

Model Architecture

The architecture of ResNet50 is described in the table below.

\begin{array}{c|c|c} \newcommand{\x}{\times} \hline \text{Layer #} & \text{Output Size} & \text{Operation} \\ \hline 1 & 112 \x 112 & 7 \x 7, 64, \text{ Stride } 2 \\ \hline & 56 \x 56 & 3 \x 3 \text{ MaxPool, Stride } 2 \\ \hline 2 & 56 \x 56 & \begin{bmatrix} 1\x1, 64 \\ 3\x3, 64 \\ 1\x1, 256 \\ \end{bmatrix} \x 3 \\ \hline 3 & 28 \x 28 & \begin{bmatrix} 1\x1, 128 \\ 3\x3, 128 \\ 1\x1, 512 \\ \end{bmatrix} \x 4 \\ \hline 4 & 14 \x 14 & \begin{bmatrix} 1\x1, 256 \\ 3\x3, 256 \\ 1\x1, 1024 \\ \end{bmatrix} \x 6 \\ \hline 5 & 7 \x 7 & \begin{bmatrix} 1\x1, 512 \\ 3\x3, 512 \\ 1\x1, 2048 \\ \end{bmatrix} \x 3 \\ \hline & 2048 & 7 \x 7 \text{ Average Pool} \\ \hline & 1000 & \text{Linear} \\ \hline \end{array}

defaults

ph=0 pw=0 U=1 V=1 dh=1 dw=1 g=1

GPU Kernels

Idx Direction Layer Op Params Kernel
1 fprop block_1 conv2d N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 cudnn::gemm::computeOffsetsKernel
2 fprop block_1 conv2d N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 volta_fp16_scudnn_fp16_128x64_relu_medium_nn_v1
3 fprop block_1 __add__ T=[(1,)] legacy::elementwise_kernel
4 fprop block_1 batch_norm T=(32,64,112,112) batch_norm_collect_statistics_kernel
5 fprop block_1 batch_norm T=(32,64,112,112) batch_norm_transform_input_kernel
6 fprop block_1 relu T=(32,64,112,112) modern::elementwise_kernel
7 fprop block_1 max_pool2d T=[(32,64,112,112)] max_pool_forward_nchw
Idx Direction Layer Op Params Kernel
8 fprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
9 fprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 volta_fp16_scudnn_fp16_128x64_relu_interior_nn_v1
10 fprop 2a:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
11 fprop 2a:BN1 batch_norm T=(32,64,56,56) batch_norm_collect_statistics_kernel
12 fprop 2a:BN1 batch_norm T=(32,64,56,56) batch_norm_transform_input_kernel
13 fprop 2a:ReLU1 relu T=(32,64,56,56) modern::elementwise_kernel
14 fprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
15 fprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
16 fprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
17 fprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_256x64_ldg8_relu_f2f_exp_small_nhwc_tn_v1
18 fprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
19 fprop 2a:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
20 fprop 2a:BN2 batch_norm T=(32,64,56,56) batch_norm_collect_statistics_kernel
21 fprop 2a:BN2 batch_norm T=(32,64,56,56) batch_norm_transform_input_kernel
22 fprop 2a:ReLU2 relu T=(32,64,56,56) modern::elementwise_kernel
23 fprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
24 fprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
25 fprop 2a:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
26 fprop 2a:BN3 batch_norm T=(32,256,56,56) batch_norm_collect_statistics_kernel
27 fprop 2a:BN3 batch_norm T=(32,256,56,56) batch_norm_transform_input_kernel
28 fprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
29 fprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
30 fprop 2a:Residual:Projection __add__ T=[(1,)] legacy::elementwise_kernel
31 fprop 2a:Residual:Projection batch_norm T=(32,256,56,56) batch_norm_collect_statistics_kernel
32 fprop 2a:Residual:Projection batch_norm T=(32,256,56,56) batch_norm_transform_input_kernel
33 fprop 2a:Residual __iadd__ T=[(32,256,56,56),(32,256,56,56)] modern::elementwise_kernel
34 fprop 2a:ReLU3 relu T=(32,256,56,56) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
35 fprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
36 fprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
37 fprop 2b:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
38 fprop 2b:BN1 batch_norm T=(32,64,56,56) batch_norm_collect_statistics_kernel
39 fprop 2b:BN1 batch_norm T=(32,64,56,56) batch_norm_transform_input_kernel
40 fprop 2b:ReLU1 relu T=(32,64,56,56) modern::elementwise_kernel
41 fprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
42 fprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
43 fprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
44 fprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_256x64_ldg8_relu_f2f_exp_small_nhwc_tn_v1
45 fprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
46 fprop 2b:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
47 fprop 2b:BN2 batch_norm T=(32,64,56,56) batch_norm_collect_statistics_kernel
48 fprop 2b:BN2 batch_norm T=(32,64,56,56) batch_norm_transform_input_kernel
49 fprop 2b:ReLU2 relu T=(32,64,56,56) modern::elementwise_kernel
50 fprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
51 fprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
52 fprop 2b:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
53 fprop 2b:BN3 batch_norm T=(32,256,56,56) batch_norm_collect_statistics_kernel
54 fprop 2b:BN3 batch_norm T=(32,256,56,56) batch_norm_transform_input_kernel
55 fprop 2b:Residual __iadd__ T=[(32,256,56,56),(32,256,56,56)] modern::elementwise_kernel
56 fprop 2b:ReLU3 relu T=(32,256,56,56) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
57 fprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
58 fprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
59 fprop 2c:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
60 fprop 2c:BN1 batch_norm T=(32,64,56,56) batch_norm_collect_statistics_kernel
61 fprop 2c:BN1 batch_norm T=(32,64,56,56) batch_norm_transform_input_kernel
62 fprop 2c:ReLU1 relu T=(32,64,56,56) modern::elementwise_kernel
63 fprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
64 fprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
65 fprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
66 fprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_256x64_ldg8_relu_f2f_exp_small_nhwc_tn_v1
67 fprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
68 fprop 2c:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
69 fprop 2c:BN2 batch_norm T=(32,64,56,56) batch_norm_collect_statistics_kernel
70 fprop 2c:BN2 batch_norm T=(32,64,56,56) batch_norm_transform_input_kernel
71 fprop 2c:ReLU2 relu T=(32,64,56,56) modern::elementwise_kernel
72 fprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
73 fprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
74 fprop 2c:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
75 fprop 2c:BN3 batch_norm T=(32,256,56,56) batch_norm_collect_statistics_kernel
76 fprop 2c:BN3 batch_norm T=(32,256,56,56) batch_norm_transform_input_kernel
77 fprop 2c:Residual __iadd__ T=[(32,256,56,56),(32,256,56,56)] modern::elementwise_kernel
78 fprop 2c:ReLU3 relu T=(32,256,56,56) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
79 fprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 cask_cudnn::computeOffsetsKernel
80 fprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
81 fprop 3a:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
82 fprop 3a:BN1 batch_norm T=(32,128,56,56) batch_norm_collect_statistics_kernel
83 fprop 3a:BN1 batch_norm T=(32,128,56,56) batch_norm_transform_input_kernel
84 fprop 3a:ReLU1 relu T=(32,128,56,56) modern::elementwise_kernel
85 fprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
86 fprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
87 fprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 cudnn::gemm::computeOffsetsKernel
88 fprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
89 fprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
90 fprop 3a:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
91 fprop 3a:BN2 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
92 fprop 3a:BN2 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
93 fprop 3a:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
94 fprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
95 fprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
96 fprop 3a:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
97 fprop 3a:BN3 batch_norm T=(32,512,28,28) batch_norm_collect_statistics_kernel
98 fprop 3a:BN3 batch_norm T=(32,512,28,28) batch_norm_transform_input_kernel
99 fprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nchwToNhwcKernel
100 fprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 cudnn::gemm::computeOffsetsKernel
101 fprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
102 fprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nhwcToNchwKernel
103 fprop 3a:Residual:Projection __add__ T=[(1,)] legacy::elementwise_kernel
104 fprop 3a:Residual:Projection batch_norm T=(32,512,28,28) batch_norm_collect_statistics_kernel
105 fprop 3a:Residual:Projection batch_norm T=(32,512,28,28) batch_norm_transform_input_kernel
106 fprop 3a:Residual __iadd__ T=[(32,512,28,28),(32,512,28,28)] modern::elementwise_kernel
107 fprop 3a:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
108 fprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
109 fprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
110 fprop 3b:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
111 fprop 3b:BN1 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
112 fprop 3b:BN1 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
113 fprop 3b:ReLU1 relu T=(32,128,28,28) modern::elementwise_kernel
114 fprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
115 fprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
116 fprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
117 fprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
118 fprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
119 fprop 3b:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
120 fprop 3b:BN2 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
121 fprop 3b:BN2 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
122 fprop 3b:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
123 fprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
124 fprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
125 fprop 3b:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
126 fprop 3b:BN3 batch_norm T=(32,512,28,28) batch_norm_collect_statistics_kernel
127 fprop 3b:BN3 batch_norm T=(32,512,28,28) batch_norm_transform_input_kernel
128 fprop 3b:Residual __iadd__ T=[(32,512,28,28),(32,512,28,28)] modern::elementwise_kernel
129 fprop 3b:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
130 fprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
131 fprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
132 fprop 3c:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
133 fprop 3c:BN1 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
134 fprop 3c:BN1 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
135 fprop 3c:ReLU1 relu T=(32,128,28,28) modern::elementwise_kernel
136 fprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
137 fprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
138 fprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
139 fprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
140 fprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
141 fprop 3c:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
142 fprop 3c:BN2 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
143 fprop 3c:BN2 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
144 fprop 3c:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
145 fprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
146 fprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
147 fprop 3c:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
148 fprop 3c:BN3 batch_norm T=(32,512,28,28) batch_norm_collect_statistics_kernel
149 fprop 3c:BN3 batch_norm T=(32,512,28,28) batch_norm_transform_input_kernel
150 fprop 3c:Residual __iadd__ T=[(32,512,28,28),(32,512,28,28)] modern::elementwise_kernel
151 fprop 3c:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
152 fprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
153 fprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
154 fprop 3d:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
155 fprop 3d:BN1 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
156 fprop 3d:BN1 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
157 fprop 3d:ReLU1 relu T=(32,128,28,28) modern::elementwise_kernel
158 fprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
159 fprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
160 fprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
161 fprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
162 fprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
163 fprop 3d:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
164 fprop 3d:BN2 batch_norm T=(32,128,28,28) batch_norm_collect_statistics_kernel
165 fprop 3d:BN2 batch_norm T=(32,128,28,28) batch_norm_transform_input_kernel
166 fprop 3d:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
167 fprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
168 fprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
169 fprop 3d:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
170 fprop 3d:BN3 batch_norm T=(32,512,28,28) batch_norm_collect_statistics_kernel
171 fprop 3d:BN3 batch_norm T=(32,512,28,28) batch_norm_transform_input_kernel
172 fprop 3d:Residual __iadd__ T=[(32,512,28,28),(32,512,28,28)] modern::elementwise_kernel
173 fprop 3d:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
174 fprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 cask_cudnn::computeOffsetsKernel
175 fprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_filter1x1_stg8_interior_nchw_nn_v1
176 fprop 4a:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
177 fprop 4a:BN1 batch_norm T=(32,256,28,28) batch_norm_collect_statistics_kernel
178 fprop 4a:BN1 batch_norm T=(32,256,28,28) batch_norm_transform_input_kernel
179 fprop 4a:ReLU1 relu T=(32,256,28,28) modern::elementwise_kernel
180 fprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
181 fprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
182 fprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 cudnn::gemm::computeOffsetsKernel
183 fprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
184 fprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
185 fprop 4a:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
186 fprop 4a:BN2 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
187 fprop 4a:BN2 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
188 fprop 4a:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
189 fprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
190 fprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
191 fprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
192 fprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
193 fprop 4a:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
194 fprop 4a:BN3 batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
195 fprop 4a:BN3 batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
196 fprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nchwToNhwcKernel
197 fprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 cudnn::gemm::computeOffsetsKernel
198 fprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
199 fprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nhwcToNchwKernel
200 fprop 4a:Residual:Projection __add__ T=[(1,)] legacy::elementwise_kernel
201 fprop 4a:Residual:Projection batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
202 fprop 4a:Residual:Projection batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
203 fprop 4a:Residual __iadd__ T=[(32,1024,14,14),(32,1024,14,14)] modern::elementwise_kernel
204 fprop 4a:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
205 fprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
206 fprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
207 fprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
208 fprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
209 fprop 4b:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
210 fprop 4b:BN1 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
211 fprop 4b:BN1 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
212 fprop 4b:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
213 fprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
214 fprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
215 fprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
216 fprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
217 fprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
218 fprop 4b:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
219 fprop 4b:BN2 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
220 fprop 4b:BN2 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
221 fprop 4b:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
222 fprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
223 fprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
224 fprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
225 fprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
226 fprop 4b:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
227 fprop 4b:BN3 batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
228 fprop 4b:BN3 batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
229 fprop 4b:Residual __iadd__ T=[(32,1024,14,14),(32,1024,14,14)] modern::elementwise_kernel
230 fprop 4b:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
231 fprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
232 fprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
233 fprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
234 fprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
235 fprop 4c:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
236 fprop 4c:BN1 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
237 fprop 4c:BN1 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
238 fprop 4c:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
239 fprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
240 fprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
241 fprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
242 fprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
243 fprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
244 fprop 4c:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
245 fprop 4c:BN2 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
246 fprop 4c:BN2 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
247 fprop 4c:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
248 fprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
249 fprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
250 fprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
251 fprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
252 fprop 4c:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
253 fprop 4c:BN3 batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
254 fprop 4c:BN3 batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
255 fprop 4c:Residual __iadd__ T=[(32,1024,14,14),(32,1024,14,14)] modern::elementwise_kernel
256 fprop 4c:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
257 fprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
258 fprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
259 fprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
260 fprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
261 fprop 4d:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
262 fprop 4d:BN1 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
263 fprop 4d:BN1 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
264 fprop 4d:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
265 fprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
266 fprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
267 fprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
268 fprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
269 fprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
270 fprop 4d:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
271 fprop 4d:BN2 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
272 fprop 4d:BN2 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
273 fprop 4d:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
274 fprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
275 fprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
276 fprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
277 fprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
278 fprop 4d:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
279 fprop 4d:BN3 batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
280 fprop 4d:BN3 batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
281 fprop 4d:Residual __iadd__ T=[(32,1024,14,14),(32,1024,14,14)] modern::elementwise_kernel
282 fprop 4d:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
283 fprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
284 fprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
285 fprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
286 fprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
287 fprop 4e:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
288 fprop 4e:BN1 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
289 fprop 4e:BN1 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
290 fprop 4e:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
291 fprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
292 fprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
293 fprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
294 fprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
295 fprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
296 fprop 4e:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
297 fprop 4e:BN2 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
298 fprop 4e:BN2 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
299 fprop 4e:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
300 fprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
301 fprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
302 fprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
303 fprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
304 fprop 4e:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
305 fprop 4e:BN3 batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
306 fprop 4e:BN3 batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
307 fprop 4e:Residual __iadd__ T=[(32,1024,14,14),(32,1024,14,14)] modern::elementwise_kernel
308 fprop 4e:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
309 fprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
310 fprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
311 fprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
312 fprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
313 fprop 4f:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
314 fprop 4f:BN1 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
315 fprop 4f:BN1 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
316 fprop 4f:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
317 fprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
318 fprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
319 fprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
320 fprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
321 fprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
322 fprop 4f:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
323 fprop 4f:BN2 batch_norm T=(32,256,14,14) batch_norm_collect_statistics_kernel
324 fprop 4f:BN2 batch_norm T=(32,256,14,14) batch_norm_transform_input_kernel
325 fprop 4f:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
326 fprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
327 fprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
328 fprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
329 fprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
330 fprop 4f:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
331 fprop 4f:BN3 batch_norm T=(32,1024,14,14) batch_norm_collect_statistics_kernel
332 fprop 4f:BN3 batch_norm T=(32,1024,14,14) batch_norm_transform_input_kernel
333 fprop 4f:Residual __iadd__ T=[(32,1024,14,14),(32,1024,14,14)] modern::elementwise_kernel
334 fprop 4f:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
335 fprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
336 fprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
337 fprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
338 fprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
339 fprop 5a:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
340 fprop 5a:BN1 batch_norm T=(32,512,14,14) batch_norm_collect_statistics_kernel
341 fprop 5a:BN1 batch_norm T=(32,512,14,14) batch_norm_transform_input_kernel
342 fprop 5a:ReLU1 relu T=(32,512,14,14) modern::elementwise_kernel
343 fprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
344 fprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
345 fprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 cudnn::gemm::computeOffsetsKernel
346 fprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
347 fprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
348 fprop 5a:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
349 fprop 5a:BN2 batch_norm T=(32,512,7,7) batch_norm_collect_statistics_kernel
350 fprop 5a:BN2 batch_norm T=(32,512,7,7) batch_norm_transform_input_kernel
351 fprop 5a:ReLU2 relu T=(32,512,7,7) modern::elementwise_kernel
352 fprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
353 fprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
354 fprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
355 fprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
356 fprop 5a:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
357 fprop 5a:BN3 batch_norm T=(32,2048,7,7) batch_norm_collect_statistics_kernel
358 fprop 5a:BN3 batch_norm T=(32,2048,7,7) batch_norm_transform_input_kernel
359 fprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nchwToNhwcKernel
360 fprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 cudnn::gemm::computeOffsetsKernel
361 fprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
362 fprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nhwcToNchwKernel
363 fprop 5a:Residual:Projection __add__ T=[(1,)] legacy::elementwise_kernel
364 fprop 5a:Residual:Projection batch_norm T=(32,2048,7,7) batch_norm_collect_statistics_kernel
365 fprop 5a:Residual:Projection batch_norm T=(32,2048,7,7) batch_norm_transform_input_kernel
366 fprop 5a:Residual __iadd__ T=[(32,2048,7,7),(32,2048,7,7)] modern::elementwise_kernel
367 fprop 5a:ReLU3 relu T=(32,2048,7,7) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
368 fprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
369 fprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
370 fprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
371 fprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
372 fprop 5b:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
373 fprop 5b:BN1 batch_norm T=(32,512,7,7) batch_norm_collect_statistics_kernel
374 fprop 5b:BN1 batch_norm T=(32,512,7,7) batch_norm_transform_input_kernel
375 fprop 5b:ReLU1 relu T=(32,512,7,7) modern::elementwise_kernel
376 fprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
377 fprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
378 fprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
379 fprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
380 fprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
381 fprop 5b:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
382 fprop 5b:BN2 batch_norm T=(32,512,7,7) batch_norm_collect_statistics_kernel
383 fprop 5b:BN2 batch_norm T=(32,512,7,7) batch_norm_transform_input_kernel
384 fprop 5b:ReLU2 relu T=(32,512,7,7) modern::elementwise_kernel
385 fprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
386 fprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
387 fprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
388 fprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
389 fprop 5b:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
390 fprop 5b:BN3 batch_norm T=(32,2048,7,7) batch_norm_collect_statistics_kernel
391 fprop 5b:BN3 batch_norm T=(32,2048,7,7) batch_norm_transform_input_kernel
392 fprop 5b:Residual __iadd__ T=[(32,2048,7,7),(32,2048,7,7)] modern::elementwise_kernel
393 fprop 5b:ReLU3 relu T=(32,2048,7,7) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
394 fprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
395 fprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
396 fprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
397 fprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
398 fprop 5c:BN1 __add__ T=[(1,)] legacy::elementwise_kernel
399 fprop 5c:BN1 batch_norm T=(32,512,7,7) batch_norm_collect_statistics_kernel
400 fprop 5c:BN1 batch_norm T=(32,512,7,7) batch_norm_transform_input_kernel
401 fprop 5c:ReLU1 relu T=(32,512,7,7) modern::elementwise_kernel
402 fprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
403 fprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
404 fprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
405 fprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 turing_fp16_s1688cudnn_fp16_128x128_ldg8_relu_f2f_exp_small_nhwc_tn_v1
406 fprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
407 fprop 5c:BN2 __add__ T=[(1,)] legacy::elementwise_kernel
408 fprop 5c:BN2 batch_norm T=(32,512,7,7) batch_norm_collect_statistics_kernel
409 fprop 5c:BN2 batch_norm T=(32,512,7,7) batch_norm_transform_input_kernel
410 fprop 5c:ReLU2 relu T=(32,512,7,7) modern::elementwise_kernel
411 fprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
412 fprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
413 fprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 turing_fp16_s1688cudnn_fp16_256x128_ldg8_relu_f2f_exp_interior_nhwc_tn_v1
414 fprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
415 fprop 5c:BN3 __add__ T=[(1,)] legacy::elementwise_kernel
416 fprop 5c:BN3 batch_norm T=(32,2048,7,7) batch_norm_collect_statistics_kernel
417 fprop 5c:BN3 batch_norm T=(32,2048,7,7) batch_norm_transform_input_kernel
418 fprop 5c:Residual __iadd__ T=[(32,2048,7,7),(32,2048,7,7)] modern::elementwise_kernel
419 fprop 5c:ReLU3 relu T=(32,2048,7,7) modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
420 fprop - adaptive_avg_pool2d T=[(32,2048,7,7)] reduce_kernel
421 fprop FC bias M=1000,N=32 legacy::elementwise_kernel
422 fprop FC linear M=1000,N=32,K=2048 turing_fp16_s1688gemm_fp16_256x64_ldg8_f2f_tn
423 fprop FC linear M=1000,N=32,K=2048 splitKreduce_kernel
Idx Direction Layer Op Params Kernel
424 fprop - cross_entropy T=[(32,1000),(32,)],[,] softmax_warp_forward
425 fprop - cross_entropy T=[(32,1000),(32,)],[,] cunn_ClassNLLCriterion_updateOutput_kernel
Idx Direction Layer Op Params Kernel
426 fprop - zero_ T=[(64,3,7,7)] modern::elementwise_kernel
427 fprop - zero_ T=[(64,)] modern::elementwise_kernel
428 fprop - zero_ T=[(64,)] modern::elementwise_kernel
429 fprop - zero_ T=[(64,64,1,1)] modern::elementwise_kernel
430 fprop - zero_ T=[(64,)] modern::elementwise_kernel
431 fprop - zero_ T=[(64,)] modern::elementwise_kernel
432 fprop - zero_ T=[(64,64,3,3)] modern::elementwise_kernel
433 fprop - zero_ T=[(64,)] modern::elementwise_kernel
434 fprop - zero_ T=[(64,)] modern::elementwise_kernel
435 fprop - zero_ T=[(256,64,1,1)] modern::elementwise_kernel
436 fprop - zero_ T=[(256,)] modern::elementwise_kernel
437 fprop - zero_ T=[(256,)] modern::elementwise_kernel
438 fprop - zero_ T=[(256,64,1,1)] modern::elementwise_kernel
439 fprop - zero_ T=[(256,)] modern::elementwise_kernel
440 fprop - zero_ T=[(256,)] modern::elementwise_kernel
441 fprop - zero_ T=[(64,256,1,1)] modern::elementwise_kernel
442 fprop - zero_ T=[(64,)] modern::elementwise_kernel
443 fprop - zero_ T=[(64,)] modern::elementwise_kernel
444 fprop - zero_ T=[(64,64,3,3)] modern::elementwise_kernel
445 fprop - zero_ T=[(64,)] modern::elementwise_kernel
446 fprop - zero_ T=[(64,)] modern::elementwise_kernel
447 fprop - zero_ T=[(256,64,1,1)] modern::elementwise_kernel
448 fprop - zero_ T=[(256,)] modern::elementwise_kernel
449 fprop - zero_ T=[(256,)] modern::elementwise_kernel
450 fprop - zero_ T=[(64,256,1,1)] modern::elementwise_kernel
451 fprop - zero_ T=[(64,)] modern::elementwise_kernel
452 fprop - zero_ T=[(64,)] modern::elementwise_kernel
453 fprop - zero_ T=[(64,64,3,3)] modern::elementwise_kernel
454 fprop - zero_ T=[(64,)] modern::elementwise_kernel
455 fprop - zero_ T=[(64,)] modern::elementwise_kernel
456 fprop - zero_ T=[(256,64,1,1)] modern::elementwise_kernel
457 fprop - zero_ T=[(256,)] modern::elementwise_kernel
458 fprop - zero_ T=[(256,)] modern::elementwise_kernel
459 fprop - zero_ T=[(128,256,1,1)] modern::elementwise_kernel
460 fprop - zero_ T=[(128,)] modern::elementwise_kernel
461 fprop - zero_ T=[(128,)] modern::elementwise_kernel
462 fprop - zero_ T=[(128,128,3,3)] modern::elementwise_kernel
463 fprop - zero_ T=[(128,)] modern::elementwise_kernel
464 fprop - zero_ T=[(128,)] modern::elementwise_kernel
465 fprop - zero_ T=[(512,128,1,1)] modern::elementwise_kernel
466 fprop - zero_ T=[(512,)] modern::elementwise_kernel
467 fprop - zero_ T=[(512,)] modern::elementwise_kernel
468 fprop - zero_ T=[(512,256,1,1)] modern::elementwise_kernel
469 fprop - zero_ T=[(512,)] modern::elementwise_kernel
470 fprop - zero_ T=[(512,)] modern::elementwise_kernel
471 fprop - zero_ T=[(128,512,1,1)] modern::elementwise_kernel
472 fprop - zero_ T=[(128,)] modern::elementwise_kernel
473 fprop - zero_ T=[(128,)] modern::elementwise_kernel
474 fprop - zero_ T=[(128,128,3,3)] modern::elementwise_kernel
475 fprop - zero_ T=[(128,)] modern::elementwise_kernel
476 fprop - zero_ T=[(128,)] modern::elementwise_kernel
477 fprop - zero_ T=[(512,128,1,1)] modern::elementwise_kernel
478 fprop - zero_ T=[(512,)] modern::elementwise_kernel
479 fprop - zero_ T=[(512,)] modern::elementwise_kernel
480 fprop - zero_ T=[(128,512,1,1)] modern::elementwise_kernel
481 fprop - zero_ T=[(128,)] modern::elementwise_kernel
482 fprop - zero_ T=[(128,)] modern::elementwise_kernel
483 fprop - zero_ T=[(128,128,3,3)] modern::elementwise_kernel
484 fprop - zero_ T=[(128,)] modern::elementwise_kernel
485 fprop - zero_ T=[(128,)] modern::elementwise_kernel
486 fprop - zero_ T=[(512,128,1,1)] modern::elementwise_kernel
487 fprop - zero_ T=[(512,)] modern::elementwise_kernel
488 fprop - zero_ T=[(512,)] modern::elementwise_kernel
489 fprop - zero_ T=[(128,512,1,1)] modern::elementwise_kernel
490 fprop - zero_ T=[(128,)] modern::elementwise_kernel
491 fprop - zero_ T=[(128,)] modern::elementwise_kernel
492 fprop - zero_ T=[(128,128,3,3)] modern::elementwise_kernel
493 fprop - zero_ T=[(128,)] modern::elementwise_kernel
494 fprop - zero_ T=[(128,)] modern::elementwise_kernel
495 fprop - zero_ T=[(512,128,1,1)] modern::elementwise_kernel
496 fprop - zero_ T=[(512,)] modern::elementwise_kernel
497 fprop - zero_ T=[(512,)] modern::elementwise_kernel
498 fprop - zero_ T=[(256,512,1,1)] modern::elementwise_kernel
499 fprop - zero_ T=[(256,)] modern::elementwise_kernel
500 fprop - zero_ T=[(256,)] modern::elementwise_kernel
501 fprop - zero_ T=[(256,256,3,3)] modern::elementwise_kernel
502 fprop - zero_ T=[(256,)] modern::elementwise_kernel
503 fprop - zero_ T=[(256,)] modern::elementwise_kernel
504 fprop - zero_ T=[(1024,256,1,1)] modern::elementwise_kernel
505 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
506 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
507 fprop - zero_ T=[(1024,512,1,1)] modern::elementwise_kernel
508 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
509 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
510 fprop - zero_ T=[(256,1024,1,1)] modern::elementwise_kernel
511 fprop - zero_ T=[(256,)] modern::elementwise_kernel
512 fprop - zero_ T=[(256,)] modern::elementwise_kernel
513 fprop - zero_ T=[(256,256,3,3)] modern::elementwise_kernel
514 fprop - zero_ T=[(256,)] modern::elementwise_kernel
515 fprop - zero_ T=[(256,)] modern::elementwise_kernel
516 fprop - zero_ T=[(1024,256,1,1)] modern::elementwise_kernel
517 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
518 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
519 fprop - zero_ T=[(256,1024,1,1)] modern::elementwise_kernel
520 fprop - zero_ T=[(256,)] modern::elementwise_kernel
521 fprop - zero_ T=[(256,)] modern::elementwise_kernel
522 fprop - zero_ T=[(256,256,3,3)] modern::elementwise_kernel
523 fprop - zero_ T=[(256,)] modern::elementwise_kernel
524 fprop - zero_ T=[(256,)] modern::elementwise_kernel
525 fprop - zero_ T=[(1024,256,1,1)] modern::elementwise_kernel
526 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
527 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
528 fprop - zero_ T=[(256,1024,1,1)] modern::elementwise_kernel
529 fprop - zero_ T=[(256,)] modern::elementwise_kernel
530 fprop - zero_ T=[(256,)] modern::elementwise_kernel
531 fprop - zero_ T=[(256,256,3,3)] modern::elementwise_kernel
532 fprop - zero_ T=[(256,)] modern::elementwise_kernel
533 fprop - zero_ T=[(256,)] modern::elementwise_kernel
534 fprop - zero_ T=[(1024,256,1,1)] modern::elementwise_kernel
535 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
536 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
537 fprop - zero_ T=[(256,1024,1,1)] modern::elementwise_kernel
538 fprop - zero_ T=[(256,)] modern::elementwise_kernel
539 fprop - zero_ T=[(256,)] modern::elementwise_kernel
540 fprop - zero_ T=[(256,256,3,3)] modern::elementwise_kernel
541 fprop - zero_ T=[(256,)] modern::elementwise_kernel
542 fprop - zero_ T=[(256,)] modern::elementwise_kernel
543 fprop - zero_ T=[(1024,256,1,1)] modern::elementwise_kernel
544 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
545 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
546 fprop - zero_ T=[(256,1024,1,1)] modern::elementwise_kernel
547 fprop - zero_ T=[(256,)] modern::elementwise_kernel
548 fprop - zero_ T=[(256,)] modern::elementwise_kernel
549 fprop - zero_ T=[(256,256,3,3)] modern::elementwise_kernel
550 fprop - zero_ T=[(256,)] modern::elementwise_kernel
551 fprop - zero_ T=[(256,)] modern::elementwise_kernel
552 fprop - zero_ T=[(1024,256,1,1)] modern::elementwise_kernel
553 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
554 fprop - zero_ T=[(1024,)] modern::elementwise_kernel
555 fprop - zero_ T=[(512,1024,1,1)] modern::elementwise_kernel
556 fprop - zero_ T=[(512,)] modern::elementwise_kernel
557 fprop - zero_ T=[(512,)] modern::elementwise_kernel
558 fprop - zero_ T=[(512,512,3,3)] modern::elementwise_kernel
559 fprop - zero_ T=[(512,)] modern::elementwise_kernel
560 fprop - zero_ T=[(512,)] modern::elementwise_kernel
561 fprop - zero_ T=[(2048,512,1,1)] modern::elementwise_kernel
562 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
563 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
564 fprop - zero_ T=[(2048,1024,1,1)] modern::elementwise_kernel
565 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
566 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
567 fprop - zero_ T=[(512,2048,1,1)] modern::elementwise_kernel
568 fprop - zero_ T=[(512,)] modern::elementwise_kernel
569 fprop - zero_ T=[(512,)] modern::elementwise_kernel
570 fprop - zero_ T=[(512,512,3,3)] modern::elementwise_kernel
571 fprop - zero_ T=[(512,)] modern::elementwise_kernel
572 fprop - zero_ T=[(512,)] modern::elementwise_kernel
573 fprop - zero_ T=[(2048,512,1,1)] modern::elementwise_kernel
574 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
575 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
576 fprop - zero_ T=[(512,2048,1,1)] modern::elementwise_kernel
577 fprop - zero_ T=[(512,)] modern::elementwise_kernel
578 fprop - zero_ T=[(512,)] modern::elementwise_kernel
579 fprop - zero_ T=[(512,512,3,3)] modern::elementwise_kernel
580 fprop - zero_ T=[(512,)] modern::elementwise_kernel
581 fprop - zero_ T=[(512,)] modern::elementwise_kernel
582 fprop - zero_ T=[(2048,512,1,1)] modern::elementwise_kernel
583 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
584 fprop - zero_ T=[(2048,)] modern::elementwise_kernel
585 fprop - zero_ T=[(1000,2048)] modern::elementwise_kernel
586 fprop - zero_ T=[(1000,)] modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
587 fprop - backward T=, legacy::elementwise_kernel
588 bprop - cross_entropy T=[(32,1000),(32,)],[,] cunn_ClassNLLCriterion_updateGradInput_kernel
589 bprop - cross_entropy T=[(32,1000),(32,)],[,] softmax_warp_backward
590 bprop FC linear M=2048,N=32,K=1000 turing_fp16_s1688gemm_fp16_256x64_ldg8_f2f_nn
591 bprop FC linear M=2048,N=1000,K=32 splitKreduce_kernel
592 bprop FC linear X=(32,2048),W=(1000,2048) turing_fp16_s1688gemm_fp16_128x128_ldg8_f2f_nt
593 fprop - sum na reduce_kernel
594 fprop - add_ na modern::elementwise_kernel
595 fprop - add_ na modern::elementwise_kernel
596 fprop - div na legacy::elementwise_kernel
Idx Direction Layer Op Params Kernel
597 bprop 5c:ReLU3 relu T=(32,2048,7,7) modern::elementwise_kernel
598 bprop 5c:BN3 batch_norm T=(32,2048,7,7) batch_norm_backward_kernel
599 fprop - add_ na modern::elementwise_kernel
600 fprop - add_ na modern::elementwise_kernel
601 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
602 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
603 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
604 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
605 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
606 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
607 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
608 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 scalePackedTensor_kernel
609 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
610 bprop 5c:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
611 fprop - add_ na modern::elementwise_kernel
612 bprop 5c:ReLU2 relu T=(32,512,7,7) modern::elementwise_kernel
613 bprop 5c:BN2 batch_norm T=(32,512,7,7) batch_norm_backward_kernel
614 fprop - add_ na modern::elementwise_kernel
615 fprop - add_ na modern::elementwise_kernel
616 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
617 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
618 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
619 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
620 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
621 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
622 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
623 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
624 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
625 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
626 bprop 5c:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
627 fprop - add_ na modern::elementwise_kernel
628 bprop 5c:ReLU1 relu T=(32,512,7,7) modern::elementwise_kernel
629 bprop 5c:BN1 batch_norm T=(32,512,7,7) batch_norm_backward_kernel
630 fprop - add_ na modern::elementwise_kernel
631 fprop - add_ na modern::elementwise_kernel
632 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
633 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
634 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
635 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
636 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
637 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
638 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
639 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 scalePackedTensor_kernel
640 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
641 bprop 5c:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
642 fprop - add na modern::elementwise_kernel
643 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
644 bprop 5b:ReLU3 relu T=(32,2048,7,7) modern::elementwise_kernel
645 bprop 5b:BN3 batch_norm T=(32,2048,7,7) batch_norm_backward_kernel
646 fprop - add_ na modern::elementwise_kernel
647 fprop - add_ na modern::elementwise_kernel
648 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
649 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
650 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
651 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
652 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
653 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
654 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
655 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 scalePackedTensor_kernel
656 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
657 bprop 5b:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
658 fprop - add_ na modern::elementwise_kernel
659 bprop 5b:ReLU2 relu T=(32,512,7,7) modern::elementwise_kernel
660 bprop 5b:BN2 batch_norm T=(32,512,7,7) batch_norm_backward_kernel
661 fprop - add_ na modern::elementwise_kernel
662 fprop - add_ na modern::elementwise_kernel
663 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
664 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
665 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
666 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
667 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
668 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
669 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
670 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
671 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
672 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
673 bprop 5b:Conv2 conv2d N=32,C=512,H=7,W=7,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
674 fprop - add_ na modern::elementwise_kernel
675 bprop 5b:ReLU1 relu T=(32,512,7,7) modern::elementwise_kernel
676 bprop 5b:BN1 batch_norm T=(32,512,7,7) batch_norm_backward_kernel
677 fprop - add_ na modern::elementwise_kernel
678 fprop - add_ na modern::elementwise_kernel
679 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
680 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
681 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
682 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
683 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
684 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
685 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
686 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 scalePackedTensor_kernel
687 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
688 bprop 5b:Conv1 conv2d N=32,C=2048,H=7,W=7,K=512,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
689 fprop - add na modern::elementwise_kernel
690 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
691 bprop 5a:ReLU3 relu T=(32,2048,7,7) modern::elementwise_kernel
692 bprop 5a:Residual:Projection batch_norm T=(32,2048,7,7) batch_norm_backward_kernel
693 fprop - add_ na modern::elementwise_kernel
694 fprop - add_ na modern::elementwise_kernel
695 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nchwToNhwcKernel
696 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 dgrad_1x1_stride_2x2
697 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nhwcToNchwKernel
698 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nchwToNhwcKernel
699 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nchwToNhwcKernel
700 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 cudnn::gemm::computeWgradOffsetsKernel
701 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 scalePackedTensor_kernel
702 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
703 bprop 5a:Residual:Projection conv2d N=32,C=1024,H=14,W=14,K=2048,P=7,Q=7,R=1,S=1,U=2,V=2 nhwcToNchwKernel
704 fprop - add_ na modern::elementwise_kernel
705 bprop 5a:BN3 batch_norm T=(32,2048,7,7) batch_norm_backward_kernel
706 fprop - add_ na modern::elementwise_kernel
707 fprop - add_ na modern::elementwise_kernel
708 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
709 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeOffsetsKernel
710 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
711 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
712 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
713 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nchwToNhwcKernel
714 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
715 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 scalePackedTensor_kernel
716 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
717 bprop 5a:Conv3 conv2d N=32,C=512,H=7,W=7,K=2048,P=7,Q=7,R=1,S=1 nhwcToNchwKernel
718 fprop - add_ na modern::elementwise_kernel
719 bprop 5a:ReLU2 relu T=(32,512,7,7) modern::elementwise_kernel
720 bprop 5a:BN2 batch_norm T=(32,512,7,7) batch_norm_backward_kernel
721 fprop - add_ na modern::elementwise_kernel
722 fprop - add_ na modern::elementwise_kernel
723 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
724 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
725 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 dgrad_1d
726 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
727 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
728 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
729 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 cudnn::gemm::computeWgradOffsetsKernel
730 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 scalePackedTensor_kernel
731 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
732 bprop 5a:Conv2 conv2d N=32,C=512,H=14,W=14,K=512,P=7,Q=7,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
733 fprop - add_ na modern::elementwise_kernel
734 bprop 5a:ReLU1 relu T=(32,512,14,14) modern::elementwise_kernel
735 bprop 5a:BN1 batch_norm T=(32,512,14,14) batch_norm_backward_kernel
736 fprop - add_ na modern::elementwise_kernel
737 fprop - add_ na modern::elementwise_kernel
738 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
739 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
740 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
741 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
742 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
743 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
744 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
745 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
746 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
747 bprop 5a:Conv1 conv2d N=32,C=1024,H=14,W=14,K=512,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
748 fprop - add na modern::elementwise_kernel
749 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
750 bprop 4f:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
751 bprop 4f:BN3 batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
752 fprop - add_ na modern::elementwise_kernel
753 fprop - add_ na modern::elementwise_kernel
754 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
755 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
756 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
757 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
758 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
759 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
760 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
761 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
762 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
763 bprop 4f:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
764 fprop - add_ na modern::elementwise_kernel
765 bprop 4f:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
766 bprop 4f:BN2 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
767 fprop - add_ na modern::elementwise_kernel
768 fprop - add_ na modern::elementwise_kernel
769 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
770 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
771 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
772 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
773 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
774 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
775 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
776 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
777 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
778 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
779 bprop 4f:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
780 fprop - add_ na modern::elementwise_kernel
781 bprop 4f:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
782 bprop 4f:BN1 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
783 fprop - add_ na modern::elementwise_kernel
784 fprop - add_ na modern::elementwise_kernel
785 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
786 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
787 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
788 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
789 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
790 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
791 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
792 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
793 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
794 bprop 4f:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
795 fprop - add na modern::elementwise_kernel
796 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
797 bprop 4e:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
798 bprop 4e:BN3 batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
799 fprop - add_ na modern::elementwise_kernel
800 fprop - add_ na modern::elementwise_kernel
801 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
802 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
803 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
804 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
805 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
806 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
807 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
808 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
809 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
810 bprop 4e:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
811 fprop - add_ na modern::elementwise_kernel
812 bprop 4e:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
813 bprop 4e:BN2 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
814 fprop - add_ na modern::elementwise_kernel
815 fprop - add_ na modern::elementwise_kernel
816 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
817 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
818 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
819 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
820 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
821 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
822 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
823 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
824 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
825 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
826 bprop 4e:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
827 fprop - add_ na modern::elementwise_kernel
828 bprop 4e:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
829 bprop 4e:BN1 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
830 fprop - add_ na modern::elementwise_kernel
831 fprop - add_ na modern::elementwise_kernel
832 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
833 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
834 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
835 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
836 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
837 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
838 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
839 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
840 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
841 bprop 4e:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
842 fprop - add na modern::elementwise_kernel
843 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
844 bprop 4d:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
845 bprop 4d:BN3 batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
846 fprop - add_ na modern::elementwise_kernel
847 fprop - add_ na modern::elementwise_kernel
848 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
849 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
850 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
851 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
852 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
853 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
854 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
855 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
856 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
857 bprop 4d:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
858 fprop - add_ na modern::elementwise_kernel
859 bprop 4d:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
860 bprop 4d:BN2 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
861 fprop - add_ na modern::elementwise_kernel
862 fprop - add_ na modern::elementwise_kernel
863 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
864 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
865 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
866 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
867 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
868 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
869 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
870 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
871 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
872 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
873 bprop 4d:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
874 fprop - add_ na modern::elementwise_kernel
875 bprop 4d:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
876 bprop 4d:BN1 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
877 fprop - add_ na modern::elementwise_kernel
878 fprop - add_ na modern::elementwise_kernel
879 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
880 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
881 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
882 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
883 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
884 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
885 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
886 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
887 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
888 bprop 4d:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
889 fprop - add na modern::elementwise_kernel
890 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
891 bprop 4c:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
892 bprop 4c:BN3 batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
893 fprop - add_ na modern::elementwise_kernel
894 fprop - add_ na modern::elementwise_kernel
895 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
896 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
897 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
898 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
899 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
900 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
901 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
902 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
903 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
904 bprop 4c:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
905 fprop - add_ na modern::elementwise_kernel
906 bprop 4c:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
907 bprop 4c:BN2 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
908 fprop - add_ na modern::elementwise_kernel
909 fprop - add_ na modern::elementwise_kernel
910 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
911 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
912 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
913 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
914 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
915 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
916 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
917 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
918 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
919 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
920 bprop 4c:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
921 fprop - add_ na modern::elementwise_kernel
922 bprop 4c:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
923 bprop 4c:BN1 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
924 fprop - add_ na modern::elementwise_kernel
925 fprop - add_ na modern::elementwise_kernel
926 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
927 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
928 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
929 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
930 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
931 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
932 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
933 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
934 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
935 bprop 4c:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
936 fprop - add na modern::elementwise_kernel
937 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
938 bprop 4b:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
939 bprop 4b:BN3 batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
940 fprop - add_ na modern::elementwise_kernel
941 fprop - add_ na modern::elementwise_kernel
942 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
943 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
944 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
945 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
946 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
947 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
948 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
949 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
950 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
951 bprop 4b:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
952 fprop - add_ na modern::elementwise_kernel
953 bprop 4b:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
954 bprop 4b:BN2 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
955 fprop - add_ na modern::elementwise_kernel
956 fprop - add_ na modern::elementwise_kernel
957 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
958 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
959 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
960 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
961 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
962 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
963 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
964 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
965 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
966 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
967 bprop 4b:Conv2 conv2d N=32,C=256,H=14,W=14,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
968 fprop - add_ na modern::elementwise_kernel
969 bprop 4b:ReLU1 relu T=(32,256,14,14) modern::elementwise_kernel
970 bprop 4b:BN1 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
971 fprop - add_ na modern::elementwise_kernel
972 fprop - add_ na modern::elementwise_kernel
973 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
974 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
975 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
976 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
977 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
978 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
979 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
980 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
981 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
982 bprop 4b:Conv1 conv2d N=32,C=1024,H=14,W=14,K=256,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
983 fprop - add na modern::elementwise_kernel
984 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
985 bprop 4a:ReLU3 relu T=(32,1024,14,14) modern::elementwise_kernel
986 bprop 4a:Residual:Projection batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
987 fprop - add_ na modern::elementwise_kernel
988 fprop - add_ na modern::elementwise_kernel
989 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nchwToNhwcKernel
990 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 dgrad_1x1_stride_2x2
991 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nhwcToNchwKernel
992 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nchwToNhwcKernel
993 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nchwToNhwcKernel
994 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 cudnn::gemm::computeWgradOffsetsKernel
995 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 scalePackedTensor_kernel
996 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
997 bprop 4a:Residual:Projection conv2d N=32,C=512,H=28,W=28,K=1024,P=14,Q=14,R=1,S=1,U=2,V=2 nhwcToNchwKernel
998 fprop - add_ na modern::elementwise_kernel
999 bprop 4a:BN3 batch_norm T=(32,1024,14,14) batch_norm_backward_kernel
1000 fprop - add_ na modern::elementwise_kernel
1001 fprop - add_ na modern::elementwise_kernel
1002 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
1003 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1004 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1005 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1006 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
1007 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nchwToNhwcKernel
1008 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1009 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 scalePackedTensor_kernel
1010 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1011 bprop 4a:Conv3 conv2d N=32,C=256,H=14,W=14,K=1024,P=14,Q=14,R=1,S=1 nhwcToNchwKernel
1012 fprop - add_ na modern::elementwise_kernel
1013 bprop 4a:ReLU2 relu T=(32,256,14,14) modern::elementwise_kernel
1014 bprop 4a:BN2 batch_norm T=(32,256,14,14) batch_norm_backward_kernel
1015 fprop - add_ na modern::elementwise_kernel
1016 fprop - add_ na modern::elementwise_kernel
1017 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1018 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1019 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 dgrad_1d
1020 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
1021 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1022 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1023 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 cudnn::gemm::computeWgradOffsetsKernel
1024 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 scalePackedTensor_kernel
1025 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1026 bprop 4a:Conv2 conv2d N=32,C=256,H=28,W=28,K=256,P=14,Q=14,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
1027 fprop - add_ na modern::elementwise_kernel
1028 bprop 4a:ReLU1 relu T=(32,256,28,28) modern::elementwise_kernel
1029 bprop 4a:BN1 batch_norm T=(32,256,28,28) batch_norm_backward_kernel
1030 fprop - add_ na modern::elementwise_kernel
1031 fprop - add_ na modern::elementwise_kernel
1032 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1033 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1034 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1035 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1036 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1037 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1038 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1039 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1040 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1041 bprop 4a:Conv1 conv2d N=32,C=512,H=28,W=28,K=256,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1042 fprop - add na modern::elementwise_kernel
1043 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1044 bprop 3d:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
1045 bprop 3d:BN3 batch_norm T=(32,512,28,28) batch_norm_backward_kernel
1046 fprop - add_ na modern::elementwise_kernel
1047 fprop - add_ na modern::elementwise_kernel
1048 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1049 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1050 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1051 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1052 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1053 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1054 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1055 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1056 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1057 bprop 3d:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1058 fprop - add_ na modern::elementwise_kernel
1059 bprop 3d:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
1060 bprop 3d:BN2 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1061 fprop - add_ na modern::elementwise_kernel
1062 fprop - add_ na modern::elementwise_kernel
1063 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1064 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1065 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
1066 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
1067 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
1068 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1069 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1070 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
1071 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
1072 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1073 bprop 3d:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
1074 fprop - add_ na modern::elementwise_kernel
1075 bprop 3d:ReLU1 relu T=(32,128,28,28) modern::elementwise_kernel
1076 bprop 3d:BN1 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1077 fprop - add_ na modern::elementwise_kernel
1078 fprop - add_ na modern::elementwise_kernel
1079 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1080 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1081 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1082 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1083 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1084 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1085 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1086 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1087 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1088 bprop 3d:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1089 fprop - add na modern::elementwise_kernel
1090 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1091 bprop 3c:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
1092 bprop 3c:BN3 batch_norm T=(32,512,28,28) batch_norm_backward_kernel
1093 fprop - add_ na modern::elementwise_kernel
1094 fprop - add_ na modern::elementwise_kernel
1095 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1096 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1097 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1098 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1099 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1100 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1101 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1102 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1103 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1104 bprop 3c:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1105 fprop - add_ na modern::elementwise_kernel
1106 bprop 3c:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
1107 bprop 3c:BN2 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1108 fprop - add_ na modern::elementwise_kernel
1109 fprop - add_ na modern::elementwise_kernel
1110 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1111 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1112 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
1113 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
1114 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
1115 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1116 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1117 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
1118 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
1119 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1120 bprop 3c:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
1121 fprop - add_ na modern::elementwise_kernel
1122 bprop 3c:ReLU1 relu T=(32,128,28,28) modern::elementwise_kernel
1123 bprop 3c:BN1 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1124 fprop - add_ na modern::elementwise_kernel
1125 fprop - add_ na modern::elementwise_kernel
1126 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1127 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1128 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1129 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1130 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1131 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1132 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1133 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1134 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1135 bprop 3c:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1136 fprop - add na modern::elementwise_kernel
1137 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1138 bprop 3b:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
1139 bprop 3b:BN3 batch_norm T=(32,512,28,28) batch_norm_backward_kernel
1140 fprop - add_ na modern::elementwise_kernel
1141 fprop - add_ na modern::elementwise_kernel
1142 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1143 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1144 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1145 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1146 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1147 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1148 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1149 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1150 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1151 bprop 3b:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1152 fprop - add_ na modern::elementwise_kernel
1153 bprop 3b:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
1154 bprop 3b:BN2 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1155 fprop - add_ na modern::elementwise_kernel
1156 fprop - add_ na modern::elementwise_kernel
1157 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1158 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1159 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
1160 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
1161 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
1162 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1163 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1164 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
1165 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
1166 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1167 bprop 3b:Conv2 conv2d N=32,C=128,H=28,W=28,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
1168 fprop - add_ na modern::elementwise_kernel
1169 bprop 3b:ReLU1 relu T=(32,128,28,28) modern::elementwise_kernel
1170 bprop 3b:BN1 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1171 fprop - add_ na modern::elementwise_kernel
1172 fprop - add_ na modern::elementwise_kernel
1173 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1174 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1175 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1176 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1177 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1178 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1179 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1180 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1181 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1182 bprop 3b:Conv1 conv2d N=32,C=512,H=28,W=28,K=128,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1183 fprop - add na modern::elementwise_kernel
1184 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1185 bprop 3a:ReLU3 relu T=(32,512,28,28) modern::elementwise_kernel
1186 bprop 3a:Residual:Projection batch_norm T=(32,512,28,28) batch_norm_backward_kernel
1187 fprop - add_ na modern::elementwise_kernel
1188 fprop - add_ na modern::elementwise_kernel
1189 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nchwToNhwcKernel
1190 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 dgrad_1x1_stride_2x2
1191 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nhwcToNchwKernel
1192 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nchwToNhwcKernel
1193 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nchwToNhwcKernel
1194 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 cudnn::gemm::computeWgradOffsetsKernel
1195 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 scalePackedTensor_kernel
1196 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1197 bprop 3a:Residual:Projection conv2d N=32,C=256,H=56,W=56,K=512,P=28,Q=28,R=1,S=1,U=2,V=2 nhwcToNchwKernel
1198 fprop - add_ na modern::elementwise_kernel
1199 bprop 3a:BN3 batch_norm T=(32,512,28,28) batch_norm_backward_kernel
1200 fprop - add_ na modern::elementwise_kernel
1201 fprop - add_ na modern::elementwise_kernel
1202 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1203 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1204 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1205 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1206 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1207 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nchwToNhwcKernel
1208 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1209 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 scalePackedTensor_kernel
1210 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1211 bprop 3a:Conv3 conv2d N=32,C=128,H=28,W=28,K=512,P=28,Q=28,R=1,S=1 nhwcToNchwKernel
1212 fprop - add_ na modern::elementwise_kernel
1213 bprop 3a:ReLU2 relu T=(32,128,28,28) modern::elementwise_kernel
1214 bprop 3a:BN2 batch_norm T=(32,128,28,28) batch_norm_backward_kernel
1215 fprop - add_ na modern::elementwise_kernel
1216 fprop - add_ na modern::elementwise_kernel
1217 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1218 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1219 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 dgrad_2d
1220 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
1221 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1222 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nchwToNhwcKernel
1223 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 cudnn::gemm::computeWgradOffsetsKernel
1224 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 scalePackedTensor_kernel
1225 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1226 bprop 3a:Conv2 conv2d N=32,C=128,H=56,W=56,K=128,P=28,Q=28,R=3,S=3,ph=1,pw=1,U=2,V=2 nhwcToNchwKernel
1227 fprop - add_ na modern::elementwise_kernel
1228 bprop 3a:ReLU1 relu T=(32,128,56,56) modern::elementwise_kernel
1229 bprop 3a:BN1 batch_norm T=(32,128,56,56) batch_norm_backward_kernel
1230 fprop - add_ na modern::elementwise_kernel
1231 fprop - add_ na modern::elementwise_kernel
1232 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1233 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1234 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1235 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1236 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1237 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1238 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1239 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1240 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1241 bprop 3a:Conv1 conv2d N=32,C=256,H=56,W=56,K=128,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1242 fprop - add na modern::elementwise_kernel
1243 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1244 bprop 2c:ReLU3 relu T=(32,256,56,56) modern::elementwise_kernel
1245 bprop 2c:BN3 batch_norm T=(32,256,56,56) batch_norm_backward_kernel
1246 fprop - add_ na modern::elementwise_kernel
1247 fprop - add_ na modern::elementwise_kernel
1248 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1249 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1250 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1251 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1252 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1253 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1254 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1255 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1256 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1257 bprop 2c:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1258 fprop - add_ na modern::elementwise_kernel
1259 bprop 2c:ReLU2 relu T=(32,64,56,56) modern::elementwise_kernel
1260 bprop 2c:BN2 batch_norm T=(32,64,56,56) batch_norm_backward_kernel
1261 fprop - add_ na modern::elementwise_kernel
1262 fprop - add_ na modern::elementwise_kernel
1263 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1264 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1265 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
1266 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
1267 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
1268 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1269 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1270 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
1271 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
1272 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1273 bprop 2c:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
1274 fprop - add_ na modern::elementwise_kernel
1275 bprop 2c:ReLU1 relu T=(32,64,56,56) modern::elementwise_kernel
1276 bprop 2c:BN1 batch_norm T=(32,64,56,56) batch_norm_backward_kernel
1277 fprop - add_ na modern::elementwise_kernel
1278 fprop - add_ na modern::elementwise_kernel
1279 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1280 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1281 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1282 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1283 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1284 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1285 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1286 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1287 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1288 bprop 2c:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1289 fprop - add na modern::elementwise_kernel
1290 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1291 bprop 2b:ReLU3 relu T=(32,256,56,56) modern::elementwise_kernel
1292 bprop 2b:BN3 batch_norm T=(32,256,56,56) batch_norm_backward_kernel
1293 fprop - add_ na modern::elementwise_kernel
1294 fprop - add_ na modern::elementwise_kernel
1295 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1296 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1297 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1298 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1299 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1300 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1301 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1302 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1303 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1304 bprop 2b:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1305 fprop - add_ na modern::elementwise_kernel
1306 bprop 2b:ReLU2 relu T=(32,64,56,56) modern::elementwise_kernel
1307 bprop 2b:BN2 batch_norm T=(32,64,56,56) batch_norm_backward_kernel
1308 fprop - add_ na modern::elementwise_kernel
1309 fprop - add_ na modern::elementwise_kernel
1310 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1311 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1312 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
1313 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
1314 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
1315 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1316 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1317 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
1318 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
1319 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1320 bprop 2b:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
1321 fprop - add_ na modern::elementwise_kernel
1322 bprop 2b:ReLU1 relu T=(32,64,56,56) modern::elementwise_kernel
1323 bprop 2b:BN1 batch_norm T=(32,64,56,56) batch_norm_backward_kernel
1324 fprop - add_ na modern::elementwise_kernel
1325 fprop - add_ na modern::elementwise_kernel
1326 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1327 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1328 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1329 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x128_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1330 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1331 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1332 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1333 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1334 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1335 bprop 2b:Conv1 conv2d N=32,C=256,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1336 fprop - add na modern::elementwise_kernel
1337 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1338 bprop 2a:ReLU3 relu T=(32,256,56,56) modern::elementwise_kernel
1339 bprop 2a:Residual:Projection batch_norm T=(32,256,56,56) batch_norm_backward_kernel
1340 fprop - add_ na modern::elementwise_kernel
1341 fprop - add_ na modern::elementwise_kernel
1342 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1343 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1344 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1345 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1346 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1347 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1348 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1349 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1350 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1351 bprop 2a:Residual:Projection conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1352 fprop - add_ na modern::elementwise_kernel
1353 bprop 2a:BN3 batch_norm T=(32,256,56,56) batch_norm_backward_kernel
1354 fprop - add_ na modern::elementwise_kernel
1355 fprop - add_ na modern::elementwise_kernel
1356 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1357 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1358 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1359 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1360 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1361 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1362 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1363 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1364 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1365 bprop 2a:Conv3 conv2d N=32,C=64,H=56,W=56,K=256,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1366 fprop - add_ na modern::elementwise_kernel
1367 bprop 2a:ReLU2 relu T=(32,64,56,56) modern::elementwise_kernel
1368 bprop 2a:BN2 batch_norm T=(32,64,56,56) batch_norm_backward_kernel
1369 fprop - add_ na modern::elementwise_kernel
1370 fprop - add_ na modern::elementwise_kernel
1371 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1372 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1373 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeOffsetsKernel
1374 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeBOffsetsKernel
1375 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_small_nhwc2nchw_tt_v1
1376 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1377 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nchwToNhwcKernel
1378 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 cudnn::gemm::computeWgradOffsetsKernel
1379 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 scalePackedTensor_kernel
1380 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1381 bprop 2a:Conv2 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=3,S=3,ph=1,pw=1 nhwcToNchwKernel
1382 fprop - add_ na modern::elementwise_kernel
1383 bprop 2a:ReLU1 relu T=(32,64,56,56) modern::elementwise_kernel
1384 bprop 2a:BN1 batch_norm T=(32,64,56,56) batch_norm_backward_kernel
1385 fprop - add_ na modern::elementwise_kernel
1386 fprop - add_ na modern::elementwise_kernel
1387 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1388 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeOffsetsKernel
1389 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeBOffsetsKernel
1390 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 volta_fp16_s884cudnn_fp16_256x64_ldg8_dgrad_f2f_exp_interior_nhwc2nchw_tt_v1
1391 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1392 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nchwToNhwcKernel
1393 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 cudnn::gemm::computeWgradOffsetsKernel
1394 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 scalePackedTensor_kernel
1395 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 turing_s1688cudnn_fp16_128x128_ldg8_wgrad_idx_exp_interior_nhwc_nt_v1
1396 bprop 2a:Conv1 conv2d N=32,C=64,H=56,W=56,K=64,P=56,Q=56,R=1,S=1 nhwcToNchwKernel
1397 fprop - add na modern::elementwise_kernel
1398 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1399 bprop block_1 max_pool2d T=[(32,64,112,112)] modern::elementwise_kernel
1400 bprop block_1 max_pool2d T=[(32,64,112,112)] max_pool_backward_nchw
1401 bprop block_1 relu T=(32,64,112,112) modern::elementwise_kernel
1402 bprop block_1 batch_norm T=(32,64,112,112) batch_norm_backward_kernel
1403 fprop - add_ na modern::elementwise_kernel
1404 fprop - add_ na modern::elementwise_kernel
1405 bprop block_1 conv2d N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 nchwToNhwc3To4Kernel
1406 bprop block_1 conv2d N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 nchwToNhwcKernel
1407 bprop block_1 conv2d N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 cask_cudnn::first_layer_wgrad_kernel
1408 bprop block_1 conv2d N=32,C=3,H=224,W=224,K=64,P=112,Q=112,R=7,S=7,ph=3,pw=3,U=2,V=2 generic4To3Channel_kernel
1409 fprop - add_ na modern::elementwise_kernel
Idx Direction Layer Op Params Kernel
1410 fprop - mul_ T=[(64,3,7,7)] modern::elementwise_kernel
1411 fprop - add_ T=[(64,3,7,7),(64,3,7,7)] modern::elementwise_kernel
1412 fprop - add_ T=[(64,3,7,7),(64,3,7,7)] modern::elementwise_kernel
1413 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1414 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1415 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1416 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1417 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1418 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1419 fprop - mul_ T=[(64,64,1,1)] modern::elementwise_kernel
1420 fprop - add_ T=[(64,64,1,1),(64,64,1,1)] modern::elementwise_kernel
1421 fprop - add_ T=[(64,64,1,1),(64,64,1,1)] modern::elementwise_kernel
1422 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1423 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1424 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1425 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1426 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1427 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1428 fprop - mul_ T=[(64,64,3,3)] modern::elementwise_kernel
1429 fprop - add_ T=[(64,64,3,3),(64,64,3,3)] modern::elementwise_kernel
1430 fprop - add_ T=[(64,64,3,3),(64,64,3,3)] modern::elementwise_kernel
1431 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1432 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1433 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1434 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1435 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1436 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1437 fprop - mul_ T=[(256,64,1,1)] modern::elementwise_kernel
1438 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1439 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1440 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1441 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1442 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1443 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1444 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1445 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1446 fprop - mul_ T=[(256,64,1,1)] modern::elementwise_kernel
1447 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1448 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1449 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1450 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1451 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1452 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1453 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1454 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1455 fprop - mul_ T=[(64,256,1,1)] modern::elementwise_kernel
1456 fprop - add_ T=[(64,256,1,1),(64,256,1,1)] modern::elementwise_kernel
1457 fprop - add_ T=[(64,256,1,1),(64,256,1,1)] modern::elementwise_kernel
1458 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1459 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1460 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1461 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1462 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1463 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1464 fprop - mul_ T=[(64,64,3,3)] modern::elementwise_kernel
1465 fprop - add_ T=[(64,64,3,3),(64,64,3,3)] modern::elementwise_kernel
1466 fprop - add_ T=[(64,64,3,3),(64,64,3,3)] modern::elementwise_kernel
1467 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1468 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1469 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1470 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1471 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1472 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1473 fprop - mul_ T=[(256,64,1,1)] modern::elementwise_kernel
1474 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1475 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1476 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1477 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1478 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1479 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1480 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1481 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1482 fprop - mul_ T=[(64,256,1,1)] modern::elementwise_kernel
1483 fprop - add_ T=[(64,256,1,1),(64,256,1,1)] modern::elementwise_kernel
1484 fprop - add_ T=[(64,256,1,1),(64,256,1,1)] modern::elementwise_kernel
1485 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1486 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1487 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1488 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1489 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1490 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1491 fprop - mul_ T=[(64,64,3,3)] modern::elementwise_kernel
1492 fprop - add_ T=[(64,64,3,3),(64,64,3,3)] modern::elementwise_kernel
1493 fprop - add_ T=[(64,64,3,3),(64,64,3,3)] modern::elementwise_kernel
1494 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1495 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1496 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1497 fprop - mul_ T=[(64,)] modern::elementwise_kernel
1498 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1499 fprop - add_ T=[(64,),(64,)] modern::elementwise_kernel
1500 fprop - mul_ T=[(256,64,1,1)] modern::elementwise_kernel
1501 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1502 fprop - add_ T=[(256,64,1,1),(256,64,1,1)] modern::elementwise_kernel
1503 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1504 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1505 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1506 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1507 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1508 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1509 fprop - mul_ T=[(128,256,1,1)] modern::elementwise_kernel
1510 fprop - add_ T=[(128,256,1,1),(128,256,1,1)] modern::elementwise_kernel
1511 fprop - add_ T=[(128,256,1,1),(128,256,1,1)] modern::elementwise_kernel
1512 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1513 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1514 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1515 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1516 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1517 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1518 fprop - mul_ T=[(128,128,3,3)] modern::elementwise_kernel
1519 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1520 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1521 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1522 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1523 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1524 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1525 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1526 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1527 fprop - mul_ T=[(512,128,1,1)] modern::elementwise_kernel
1528 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1529 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1530 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1531 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1532 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1533 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1534 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1535 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1536 fprop - mul_ T=[(512,256,1,1)] modern::elementwise_kernel
1537 fprop - add_ T=[(512,256,1,1),(512,256,1,1)] modern::elementwise_kernel
1538 fprop - add_ T=[(512,256,1,1),(512,256,1,1)] modern::elementwise_kernel
1539 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1540 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1541 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1542 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1543 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1544 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1545 fprop - mul_ T=[(128,512,1,1)] modern::elementwise_kernel
1546 fprop - add_ T=[(128,512,1,1),(128,512,1,1)] modern::elementwise_kernel
1547 fprop - add_ T=[(128,512,1,1),(128,512,1,1)] modern::elementwise_kernel
1548 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1549 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1550 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1551 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1552 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1553 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1554 fprop - mul_ T=[(128,128,3,3)] modern::elementwise_kernel
1555 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1556 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1557 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1558 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1559 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1560 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1561 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1562 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1563 fprop - mul_ T=[(512,128,1,1)] modern::elementwise_kernel
1564 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1565 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1566 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1567 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1568 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1569 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1570 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1571 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1572 fprop - mul_ T=[(128,512,1,1)] modern::elementwise_kernel
1573 fprop - add_ T=[(128,512,1,1),(128,512,1,1)] modern::elementwise_kernel
1574 fprop - add_ T=[(128,512,1,1),(128,512,1,1)] modern::elementwise_kernel
1575 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1576 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1577 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1578 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1579 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1580 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1581 fprop - mul_ T=[(128,128,3,3)] modern::elementwise_kernel
1582 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1583 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1584 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1585 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1586 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1587 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1588 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1589 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1590 fprop - mul_ T=[(512,128,1,1)] modern::elementwise_kernel
1591 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1592 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1593 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1594 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1595 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1596 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1597 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1598 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1599 fprop - mul_ T=[(128,512,1,1)] modern::elementwise_kernel
1600 fprop - add_ T=[(128,512,1,1),(128,512,1,1)] modern::elementwise_kernel
1601 fprop - add_ T=[(128,512,1,1),(128,512,1,1)] modern::elementwise_kernel
1602 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1603 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1604 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1605 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1606 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1607 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1608 fprop - mul_ T=[(128,128,3,3)] modern::elementwise_kernel
1609 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1610 fprop - add_ T=[(128,128,3,3),(128,128,3,3)] modern::elementwise_kernel
1611 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1612 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1613 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1614 fprop - mul_ T=[(128,)] modern::elementwise_kernel
1615 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1616 fprop - add_ T=[(128,),(128,)] modern::elementwise_kernel
1617 fprop - mul_ T=[(512,128,1,1)] modern::elementwise_kernel
1618 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1619 fprop - add_ T=[(512,128,1,1),(512,128,1,1)] modern::elementwise_kernel
1620 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1621 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1622 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1623 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1624 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1625 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1626 fprop - mul_ T=[(256,512,1,1)] modern::elementwise_kernel
1627 fprop - add_ T=[(256,512,1,1),(256,512,1,1)] modern::elementwise_kernel
1628 fprop - add_ T=[(256,512,1,1),(256,512,1,1)] modern::elementwise_kernel
1629 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1630 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1631 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1632 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1633 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1634 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1635 fprop - mul_ T=[(256,256,3,3)] modern::elementwise_kernel
1636 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1637 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1638 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1639 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1640 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1641 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1642 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1643 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1644 fprop - mul_ T=[(1024,256,1,1)] modern::elementwise_kernel
1645 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1646 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1647 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1648 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1649 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1650 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1651 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1652 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1653 fprop - mul_ T=[(1024,512,1,1)] modern::elementwise_kernel
1654 fprop - add_ T=[(1024,512,1,1),(1024,512,1,1)] modern::elementwise_kernel
1655 fprop - add_ T=[(1024,512,1,1),(1024,512,1,1)] modern::elementwise_kernel
1656 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1657 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1658 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1659 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1660 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1661 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1662 fprop - mul_ T=[(256,1024,1,1)] modern::elementwise_kernel
1663 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1664 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1665 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1666 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1667 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1668 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1669 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1670 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1671 fprop - mul_ T=[(256,256,3,3)] modern::elementwise_kernel
1672 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1673 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1674 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1675 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1676 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1677 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1678 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1679 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1680 fprop - mul_ T=[(1024,256,1,1)] modern::elementwise_kernel
1681 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1682 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1683 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1684 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1685 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1686 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1687 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1688 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1689 fprop - mul_ T=[(256,1024,1,1)] modern::elementwise_kernel
1690 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1691 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1692 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1693 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1694 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1695 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1696 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1697 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1698 fprop - mul_ T=[(256,256,3,3)] modern::elementwise_kernel
1699 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1700 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1701 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1702 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1703 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1704 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1705 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1706 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1707 fprop - mul_ T=[(1024,256,1,1)] modern::elementwise_kernel
1708 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1709 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1710 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1711 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1712 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1713 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1714 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1715 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1716 fprop - mul_ T=[(256,1024,1,1)] modern::elementwise_kernel
1717 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1718 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1719 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1720 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1721 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1722 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1723 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1724 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1725 fprop - mul_ T=[(256,256,3,3)] modern::elementwise_kernel
1726 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1727 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1728 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1729 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1730 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1731 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1732 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1733 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1734 fprop - mul_ T=[(1024,256,1,1)] modern::elementwise_kernel
1735 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1736 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1737 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1738 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1739 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1740 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1741 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1742 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1743 fprop - mul_ T=[(256,1024,1,1)] modern::elementwise_kernel
1744 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1745 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1746 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1747 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1748 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1749 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1750 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1751 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1752 fprop - mul_ T=[(256,256,3,3)] modern::elementwise_kernel
1753 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1754 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1755 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1756 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1757 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1758 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1759 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1760 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1761 fprop - mul_ T=[(1024,256,1,1)] modern::elementwise_kernel
1762 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1763 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1764 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1765 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1766 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1767 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1768 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1769 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1770 fprop - mul_ T=[(256,1024,1,1)] modern::elementwise_kernel
1771 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1772 fprop - add_ T=[(256,1024,1,1),(256,1024,1,1)] modern::elementwise_kernel
1773 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1774 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1775 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1776 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1777 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1778 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1779 fprop - mul_ T=[(256,256,3,3)] modern::elementwise_kernel
1780 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1781 fprop - add_ T=[(256,256,3,3),(256,256,3,3)] modern::elementwise_kernel
1782 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1783 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1784 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1785 fprop - mul_ T=[(256,)] modern::elementwise_kernel
1786 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1787 fprop - add_ T=[(256,),(256,)] modern::elementwise_kernel
1788 fprop - mul_ T=[(1024,256,1,1)] modern::elementwise_kernel
1789 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1790 fprop - add_ T=[(1024,256,1,1),(1024,256,1,1)] modern::elementwise_kernel
1791 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1792 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1793 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1794 fprop - mul_ T=[(1024,)] modern::elementwise_kernel
1795 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1796 fprop - add_ T=[(1024,),(1024,)] modern::elementwise_kernel
1797 fprop - mul_ T=[(512,1024,1,1)] modern::elementwise_kernel
1798 fprop - add_ T=[(512,1024,1,1),(512,1024,1,1)] modern::elementwise_kernel
1799 fprop - add_ T=[(512,1024,1,1),(512,1024,1,1)] modern::elementwise_kernel
1800 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1801 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1802 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1803 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1804 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1805 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1806 fprop - mul_ T=[(512,512,3,3)] modern::elementwise_kernel
1807 fprop - add_ T=[(512,512,3,3),(512,512,3,3)] modern::elementwise_kernel
1808 fprop - add_ T=[(512,512,3,3),(512,512,3,3)] modern::elementwise_kernel
1809 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1810 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1811 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1812 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1813 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1814 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1815 fprop - mul_ T=[(2048,512,1,1)] modern::elementwise_kernel
1816 fprop - add_ T=[(2048,512,1,1),(2048,512,1,1)] modern::elementwise_kernel
1817 fprop - add_ T=[(2048,512,1,1),(2048,512,1,1)] modern::elementwise_kernel
1818 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1819 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1820 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1821 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1822 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1823 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1824 fprop - mul_ T=[(2048,1024,1,1)] modern::elementwise_kernel
1825 fprop - add_ T=[(2048,1024,1,1),(2048,1024,1,1)] modern::elementwise_kernel
1826 fprop - add_ T=[(2048,1024,1,1),(2048,1024,1,1)] modern::elementwise_kernel
1827 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1828 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1829 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1830 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1831 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1832 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1833 fprop - mul_ T=[(512,2048,1,1)] modern::elementwise_kernel
1834 fprop - add_ T=[(512,2048,1,1),(512,2048,1,1)] modern::elementwise_kernel
1835 fprop - add_ T=[(512,2048,1,1),(512,2048,1,1)] modern::elementwise_kernel
1836 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1837 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1838 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1839 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1840 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1841 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1842 fprop - mul_ T=[(512,512,3,3)] modern::elementwise_kernel
1843 fprop - add_ T=[(512,512,3,3),(512,512,3,3)] modern::elementwise_kernel
1844 fprop - add_ T=[(512,512,3,3),(512,512,3,3)] modern::elementwise_kernel
1845 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1846 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1847 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1848 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1849 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1850 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1851 fprop - mul_ T=[(2048,512,1,1)] modern::elementwise_kernel
1852 fprop - add_ T=[(2048,512,1,1),(2048,512,1,1)] modern::elementwise_kernel
1853 fprop - add_ T=[(2048,512,1,1),(2048,512,1,1)] modern::elementwise_kernel
1854 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1855 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1856 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1857 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1858 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1859 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1860 fprop - mul_ T=[(512,2048,1,1)] modern::elementwise_kernel
1861 fprop - add_ T=[(512,2048,1,1),(512,2048,1,1)] modern::elementwise_kernel
1862 fprop - add_ T=[(512,2048,1,1),(512,2048,1,1)] modern::elementwise_kernel
1863 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1864 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1865 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1866 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1867 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1868 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1869 fprop - mul_ T=[(512,512,3,3)] modern::elementwise_kernel
1870 fprop - add_ T=[(512,512,3,3),(512,512,3,3)] modern::elementwise_kernel
1871 fprop - add_ T=[(512,512,3,3),(512,512,3,3)] modern::elementwise_kernel
1872 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1873 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1874 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1875 fprop - mul_ T=[(512,)] modern::elementwise_kernel
1876 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1877 fprop - add_ T=[(512,),(512,)] modern::elementwise_kernel
1878 fprop - mul_ T=[(2048,512,1,1)] modern::elementwise_kernel
1879 fprop - add_ T=[(2048,512,1,1),(2048,512,1,1)] modern::elementwise_kernel
1880 fprop - add_ T=[(2048,512,1,1),(2048,512,1,1)] modern::elementwise_kernel
1881 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1882 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1883 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1884 fprop - mul_ T=[(2048,)] modern::elementwise_kernel
1885 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1886 fprop - add_ T=[(2048,),(2048,)] modern::elementwise_kernel
1887 fprop - mul_ T=[(1000,2048)] modern::elementwise_kernel
1888 fprop - add_ T=[(1000,2048),(1000,2048)] modern::elementwise_kernel
1889 fprop - add_ T=[(1000,2048),(1000,2048)] modern::elementwise_kernel
1890 fprop - mul_ T=[(1000,)] modern::elementwise_kernel
1891 fprop - add_ T=[(1000,),(1000,)] modern::elementwise_kernel
1892 fprop - add_ T=[(1000,),(1000,)] modern::elementwise_kernel