AOMedia AV1 Codec
nonrd_opt.h
1/*
2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12#ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13#define AOM_AV1_ENCODER_NONRD_OPT_H_
14
15#include "av1/encoder/context_tree.h"
16#include "av1/encoder/rdopt_utils.h"
17#include "av1/encoder/rdopt.h"
18
19#define RTC_INTER_MODES (4)
20#define RTC_INTRA_MODES (4)
21#define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
22#define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
23#define NUM_COMP_INTER_MODES_RT (6)
24#define NUM_INTER_MODES 12
25#define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
26 (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
27#define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
28#define FILTER_SEARCH_SIZE 2
29#if !CONFIG_REALTIME_ONLY
30#define MOTION_MODE_SEARCH_SIZE 2
31#endif
32
33extern int g_pick_inter_mode_cnt;
35typedef struct {
36 uint8_t *data;
37 int stride;
38 int in_use;
39} PRED_BUFFER;
40
41typedef struct {
42 PRED_BUFFER *best_pred;
43 PREDICTION_MODE best_mode;
44 TX_SIZE best_tx_size;
45 TX_TYPE tx_type;
46 MV_REFERENCE_FRAME best_ref_frame;
47 MV_REFERENCE_FRAME best_second_ref_frame;
48 uint8_t best_mode_skip_txfm;
49 uint8_t best_mode_initial_skip_flag;
50 int_interpfilters best_pred_filter;
51 MOTION_MODE best_motion_mode;
52 WarpedMotionParams wm_params;
53 int num_proj_ref;
54 PALETTE_MODE_INFO pmi;
55 int64_t best_sse;
56} BEST_PICKMODE;
57
58typedef struct {
59 MV_REFERENCE_FRAME ref_frame;
60 PREDICTION_MODE pred_mode;
61} REF_MODE;
62
63typedef struct {
64 MV_REFERENCE_FRAME ref_frame[2];
65 PREDICTION_MODE pred_mode;
66} COMP_REF_MODE;
67
68struct estimate_block_intra_args {
69 AV1_COMP *cpi;
70 MACROBLOCK *x;
71 PREDICTION_MODE mode;
72 int skippable;
73 RD_STATS *rdc;
74 unsigned int best_sad;
75 bool prune_mode_based_on_sad;
76 bool prune_palette_sad;
77};
79
83typedef struct {
85 BEST_PICKMODE best_pickmode;
87 RD_STATS this_rdc;
89 RD_STATS best_rdc;
91 int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
93 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
95 unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
97 unsigned int ref_costs_single[REF_FRAMES];
99 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
101 int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
103 int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
105 int use_ref_frame_mask[REF_FRAMES];
107 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
109 bool use_scaled_ref_frame[REF_FRAMES];
111
112static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
113 2, 2, 3, 3, 3, 4,
114 4, 4, 5, 5 };
115static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
116 2, 3, 2, 3, 4, 3,
117 4, 5, 4, 5 };
118
119static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
120 SMOOTH_PRED };
121
122static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
123 NEWMV };
124
125static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
126 { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
127 { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
128 { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
129 { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
130 { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
131 { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
132 { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
133 { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
134};
135
136// GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
137// mode
138static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
139 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
140 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
141 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
142 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
143 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
144 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
145};
146
147static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
148 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
149 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
150 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
151 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
152 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
153 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
154};
155
156static const int_interpfilters filters_ref_set[9] = {
157 [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
158 [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
159 [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
160 [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
161 [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
162 [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
163 [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
164 [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
165 [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
166};
167
168enum {
169 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
170 INTER_NEAREST = (1 << NEARESTMV),
171 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
172 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
173 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
174};
175
176// The original scan order (default_scan_8x8) is modified according to the extra
177// transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
178// aom_hadamard_8x8_c.
179DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
180 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
181 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
182 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
183 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
184};
185
186// The original scan order (av1_default_iscan_8x8) is modified to match
187// hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
188// aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
189// order of coefficients, such that the normal scan order is no longer
190// guaranteed to scan low coefficients first, therefore we modify the scan order
191// accordingly.
192// Note that this one has to be used together with default_scan_8x8_transpose.
193DECLARE_ALIGNED(16, static const int16_t,
194 av1_default_iscan_8x8_transpose[64]) = {
195 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
196 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
197 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
198 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
199};
200
201// The original scan order (default_scan_16x16) is modified according to the
202// extra transpose in hadamard c implementation in lp case, i.e.,
203// aom_hadamard_lp_16x16_c.
204DECLARE_ALIGNED(16, static const int16_t,
205 default_scan_lp_16x16_transpose[256]) = {
206 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
207 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
208 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
209 9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
210 106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
211 146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
212 33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
213 196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
214 57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
215 198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
216 59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
217 222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
218 109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
219 149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
220 246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
221 211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
222 215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
223 255
224};
225
226#if CONFIG_AV1_HIGHBITDEPTH
227// The original scan order (default_scan_16x16) is modified according to the
228// extra shift in hadamard c implementation in fp case, i.e.,
229// aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
230// outputs, so we handle them separately.
231DECLARE_ALIGNED(16, static const int16_t,
232 default_scan_fp_16x16_transpose[256]) = {
233 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
234 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
235 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
236 5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
237 102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
238 146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
239 33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
240 200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
241 53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
242 202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
243 55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
244 222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
245 109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
246 153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
247 250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
248 211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
249 219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
250 255
251};
252#endif
253
254// The original scan order (av1_default_iscan_16x16) is modified to match
255// hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
256// Since hadamard AVX2 implementation will modify the order of coefficients,
257// such that the normal scan order is no longer guaranteed to scan low
258// coefficients first, therefore we modify the scan order accordingly. Note that
259// this one has to be used together with default_scan_lp_16x16_transpose.
260DECLARE_ALIGNED(16, static const int16_t,
261 av1_default_iscan_lp_16x16_transpose[256]) = {
262 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
263 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
264 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
265 122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
266 47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
267 114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
268 39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
269 153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
270 70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
271 185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
272 102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
273 216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
274 141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
275 208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
276 133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
277 231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
278 168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
279 255
280};
281
282#if CONFIG_AV1_HIGHBITDEPTH
283// The original scan order (av1_default_iscan_16x16) is modified to match
284// hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
285// Since hadamard AVX2 implementation will modify the order of coefficients,
286// such that the normal scan order is no longer guaranteed to scan low
287// coefficients first, therefore we modify the scan order accordingly. Note that
288// this one has to be used together with default_scan_fp_16x16_transpose.
289DECLARE_ALIGNED(16, static const int16_t,
290 av1_default_iscan_fp_16x16_transpose[256]) = {
291 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
292 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
293 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
294 122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
295 47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
296 127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
297 52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
298 174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
299 84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
300 171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
301 81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
302 203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
303 128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
304 208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
305 133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
306 231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
307 168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
308 255
309};
310#endif
311
312// For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
313// but the fastest way to calculate the IDTX transform (i.e. no transposes)
314// results in coefficients that are a transposition of the entropy coding
315// versions. These tables are used as substitute for the scan order for the
316// faster version of IDTX.
317
318// Must be used together with av1_fast_idtx_iscan_4x4
319DECLARE_ALIGNED(16, static const int16_t,
320 av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
321 9, 12, 13, 10, 7, 11, 14, 15 };
322
323// Must be used together with av1_fast_idtx_scan_4x4
324DECLARE_ALIGNED(16, static const int16_t,
325 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
326 3, 8, 11, 13, 9, 10, 14, 15 };
327
328static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
329 av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
330};
331
332// Must be used together with av1_fast_idtx_iscan_8x8
333DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
334 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
335 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
336 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
337 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
338};
339
340// Must be used together with av1_fast_idtx_scan_8x8
341DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
342 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
343 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
344 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
345 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
346};
347
348static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
349 av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
350};
351
352// Must be used together with av1_fast_idtx_iscan_16x16
353DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
354 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
355 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
356 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
357 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
358 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
359 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
360 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
361 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
362 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
363 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
364 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
365 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
366 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
367 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
368 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
369 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
370 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
371 255
372};
373
374// Must be used together with av1_fast_idtx_scan_16x16
375DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
376 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
377 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
378 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
379 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
380 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
381 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
382 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
383 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
384 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
385 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
386 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
387 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
388 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
389 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
390 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
391 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
392 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
393 255
394};
395
396// Indicates the blocks for which RD model should be based on special logic
397static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
398 BLOCK_SIZE bsize) {
399 const AV1_COMMON *const cm = &cpi->common;
400 const int large_block = bsize >= BLOCK_32X32;
401 // Only enable for low bitdepth to mitigate issue: b/303023614.
402 return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
403 !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
404 cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
405}
431static inline void find_predictors(
432 AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
433 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
434 struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
435 int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
436 AV1_COMMON *const cm = &cpi->common;
437 MACROBLOCKD *const xd = &x->e_mbd;
438 MB_MODE_INFO *const mbmi = xd->mi[0];
439 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
440 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
441 const bool ref_is_scaled =
442 ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
443 const YV12_BUFFER_CONFIG *scaled_ref =
444 av1_get_scaled_ref_frame(cpi, ref_frame);
445 const YV12_BUFFER_CONFIG *yv12 =
446 ref_is_scaled && scaled_ref ? scaled_ref : ref;
447 const int num_planes = av1_num_planes(cm);
448 x->pred_mv_sad[ref_frame] = INT_MAX;
449 x->pred_mv0_sad[ref_frame] = INT_MAX;
450 x->pred_mv1_sad[ref_frame] = INT_MAX;
451 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
452 // TODO(kyslov) this needs various further optimizations. to be continued..
453 assert(yv12 != NULL);
454 if (yv12 != NULL) {
455 struct scale_factors *const sf =
456 scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
457 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
458 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
459 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
460 mbmi_ext->mode_context);
461 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
462 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
463 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
464 av1_find_best_ref_mvs_from_stack(
465 cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
466 &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
467 frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
468 // Early exit for non-LAST frame if force_skip_low_temp_var is set.
469 if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
470 !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
471 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
472 bsize);
473 }
474 }
476 av1_count_overlappable_neighbors(cm, xd);
477 }
478 mbmi->num_proj_ref = 1;
479 *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
480}
481
482static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
483 PREDICTION_MODE pred_mode,
484 MV_REFERENCE_FRAME ref_frame0,
485 MV_REFERENCE_FRAME ref_frame1,
486 const AV1_COMMON *cm) {
487 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
488 mbmi->ref_mv_idx = 0;
489 mbmi->mode = pred_mode;
490 mbmi->uv_mode = UV_DC_PRED;
491 mbmi->ref_frame[0] = ref_frame0;
492 mbmi->ref_frame[1] = ref_frame1;
493 pmi->palette_size[PLANE_TYPE_Y] = 0;
494 pmi->palette_size[PLANE_TYPE_UV] = 0;
495 mbmi->filter_intra_mode_info.use_filter_intra = 0;
496 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
497 mbmi->motion_mode = SIMPLE_TRANSLATION;
498 mbmi->num_proj_ref = 1;
499 mbmi->interintra_mode = 0;
500 set_default_interp_filters(mbmi, cm->features.interp_filter);
501}
502
503static inline void init_estimate_block_intra_args(
504 struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
505 args->cpi = cpi;
506 args->x = x;
507 args->mode = DC_PRED;
508 args->skippable = 1;
509 args->rdc = 0;
510 args->best_sad = UINT_MAX;
511 args->prune_mode_based_on_sad = false;
512 args->prune_palette_sad = false;
513}
514
515static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
516 for (int buf_idx = 0; buf_idx < len; buf_idx++) {
517 if (!p[buf_idx].in_use) {
518 p[buf_idx].in_use = 1;
519 return buf_idx;
520 }
521 }
522 return -1;
523}
524
525static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
526 unsigned int source_variance) {
527 return (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
528 cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
529 cpi->sf.rt_sf.rc_compute_spatial_var_sc &&
530 cpi->rc.frame_spatial_variance < 1200 &&
531 cpi->rc.perc_spatial_flat_blocks < 5 &&
532 cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000);
533}
534
535static inline void free_pred_buffer(PRED_BUFFER *p) {
536 if (p != NULL) p->in_use = 0;
537}
538
539#if CONFIG_INTERNAL_STATS
540static inline void store_coding_context_nonrd(MACROBLOCK *x,
541 PICK_MODE_CONTEXT *ctx,
542 int mode_index) {
543#else
544static inline void store_coding_context_nonrd(MACROBLOCK *x,
545 PICK_MODE_CONTEXT *ctx) {
546#endif // CONFIG_INTERNAL_STATS
547 MACROBLOCKD *const xd = &x->e_mbd;
548 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
549
550 // Take a snapshot of the coding context so it can be
551 // restored if we decide to encode this way
552 ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
553
554 ctx->skippable = txfm_info->skip_txfm;
555#if CONFIG_INTERNAL_STATS
556 ctx->best_mode_index = mode_index;
557#endif // CONFIG_INTERNAL_STATS
558 ctx->mic = *xd->mi[0];
559 ctx->skippable = txfm_info->skip_txfm;
560 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
561 av1_ref_frame_type(xd->mi[0]->ref_frame));
562}
563
564void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
565 BLOCK_SIZE bsize, TX_SIZE tx_size);
566
567void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
568 int pred_stride, RD_STATS *this_rdc, int *skippable,
569 BLOCK_SIZE bsize, TX_SIZE tx_size);
570
571int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
572 MACROBLOCK *x, MACROBLOCKD *xd,
573 RD_STATS *this_rdc, int start_plane,
574 int stop_plane);
575
576void av1_estimate_block_intra(int plane, int block, int row, int col,
577 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
578 void *arg);
579
580void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
581 int best_early_term, unsigned int ref_cost_intra,
582 int reuse_prediction, struct buf_2d *orig_dst,
583 PRED_BUFFER *tmp_buffers,
584 PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
585 BEST_PICKMODE *best_pickmode,
586 PICK_MODE_CONTEXT *ctx,
587 unsigned int *best_sad_norm);
588
589#endif // AOM_AV1_ENCODER_NONRD_OPT_H_
struct macroblock MACROBLOCK
Encoder's parameters related to the current coding block.
struct AV1_COMP AV1_COMP
Top level encoder structure.
@ AOM_CBR
Definition aom_encoder.h:187
static void find_predictors(AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], struct buf_2d yv12_mb[8][3], BLOCK_SIZE bsize, int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame)
Finds predicted motion vectors for a block.
Definition nonrd_opt.h:431
int width
Definition av1_common_int.h:782
FeatureFlags features
Definition av1_common_int.h:912
CommonQuantParams quant_params
Definition av1_common_int.h:929
int height
Definition av1_common_int.h:783
RateControlCfg rc_cfg
Definition encoder.h:955
Top level encoder structure.
Definition encoder.h:2878
RATE_CONTROL rc
Definition encoder.h:3085
SPEED_FEATURES sf
Definition encoder.h:3105
AV1EncoderConfig oxcf
Definition encoder.h:2926
AV1_COMMON common
Definition encoder.h:2921
int base_qindex
Definition av1_common_int.h:620
InterpFilter interp_filter
Definition av1_common_int.h:414
bool switchable_motion_mode
Definition av1_common_int.h:412
bool allow_high_precision_mv
Definition av1_common_int.h:374
Structure to store parameters and statistics used in non-rd inter mode evaluation.
Definition nonrd_opt.h:83
int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES]
Array to hold motion vector for all modes and reference frames.
Definition nonrd_opt.h:99
unsigned int ref_costs_single[REF_FRAMES]
Array to hold ref cost of single reference mode for all ref frames.
Definition nonrd_opt.h:97
bool use_scaled_ref_frame[REF_FRAMES]
Array to hold flag indicating if scaled reference frame is used.
Definition nonrd_opt.h:109
int64_t uv_dist[(4)][REF_FRAMES]
Distortion of chroma planes for all modes and reference frames.
Definition nonrd_opt.h:91
RD_STATS this_rdc
Structure to RD cost of current mode.
Definition nonrd_opt.h:87
unsigned int vars[(4)][REF_FRAMES]
Array to hold variance of all modes and reference frames.
Definition nonrd_opt.h:95
uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES]
Array to hold flags of evaluated modes for each reference frame.
Definition nonrd_opt.h:107
int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES]
Array to hold best mv for all modes and reference frames.
Definition nonrd_opt.h:101
int single_inter_mode_costs[(4)][REF_FRAMES]
Array to hold inter mode cost of single ref mode for all ref frames.
Definition nonrd_opt.h:103
RD_STATS best_rdc
Pointer to the RD Cost for the best mode found so far.
Definition nonrd_opt.h:89
int use_ref_frame_mask[REF_FRAMES]
Array to hold use reference frame mask for each reference frame.
Definition nonrd_opt.h:105
BEST_PICKMODE best_pickmode
Structure to hold best inter mode data.
Definition nonrd_opt.h:85
struct buf_2d yv12_mb[REF_FRAMES][3]
Buffer to hold predicted block for all reference frames and planes.
Definition nonrd_opt.h:93
Extended mode info derived from mbmi.
Definition block.h:222
int_mv global_mvs[REF_FRAMES]
Global mvs.
Definition block.h:231
int16_t mode_context[MODE_CTX_REF_FRAMES]
Context used to encode the current mode.
Definition block.h:233
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]
Number of ref mvs in the drl.
Definition block.h:229
Stores the prediction/txfm mode of the current coding block.
Definition blockd.h:222
int_mv mv[2]
The motion vectors used by the current inter mode.
Definition blockd.h:244
PREDICTION_MODE mode
The prediction mode used.
Definition blockd.h:232
UV_PREDICTION_MODE uv_mode
The UV mode when intra is used.
Definition blockd.h:234
PALETTE_MODE_INFO palette_mode_info
Stores the size and colors of palette mode.
Definition blockd.h:280
uint8_t segment_id
The segment id.
Definition blockd.h:310
uint8_t ref_mv_idx
Which ref_mv to use.
Definition blockd.h:314
MV_REFERENCE_FRAME ref_frame[2]
The reference frames for the MV.
Definition blockd.h:246
FILTER_INTRA_MODE_INFO filter_intra_mode_info
The type of filter intra mode used (if applicable).
Definition blockd.h:274
MOTION_MODE motion_mode
The motion mode used by the inter prediction.
Definition blockd.h:250
uint8_t num_proj_ref
Number of samples used by warp causal.
Definition blockd.h:252
INTERINTRA_MODE interintra_mode
The type of intra mode used by inter-intra.
Definition blockd.h:259
enum aom_rc_mode mode
Definition encoder.h:609
REAL_TIME_SPEED_FEATURES rt_sf
Definition speed_features.h:2008
Stores various encoding/search decisions related to txfm search.
Definition block.h:526
uint8_t skip_txfm
Whether to skip transform and quantization on a partition block level.
Definition block.h:528
MACROBLOCKD e_mbd
Decoder's view of current coding block.
Definition block.h:896
int pred_mv1_sad[REF_FRAMES]
The sad of the 2nd mv ref (near).
Definition block.h:1115
int pred_mv0_sad[REF_FRAMES]
The sad of the 1st mv ref (nearest).
Definition block.h:1113
TxfmSearchInfo txfm_search_info
Results of the txfm searches that have been done.
Definition block.h:1311
int pred_mv_sad[REF_FRAMES]
Sum absolute distortion of the predicted mv for each ref frame.
Definition block.h:1105
MB_MODE_INFO_EXT mbmi_ext
Derived coding information.
Definition block.h:903
uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition blockd.h:781
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition blockd.h:776
MB_MODE_INFO ** mi
Definition blockd.h:617